In [236]:
import pandas as pd
from pandas.io.parsers import ParserError
import numpy as np
from helper import get_mapper
import json
import os
import re

In [237]:
from os import listdir, stat
from os.path import isfile, join
BASE_DIR = "."
MIN_SIZE = 512

In [238]:
def extract_blockidf(fullname):
    return fullname.split("Generation_DE ")[1].rsplit('[MW]')[0]

In [239]:
def get_files_from_folder(folder):
    onlyfiles = [folder + "/" + f for f in listdir(folder) if isfile(join(folder, f))]
    onlyfiles.sort()
    files = [f for f in onlyfiles if stat(f).st_size > MIN_SIZE]
    return files

In [240]:
bpm = pd.read_csv("../basic/block_plant_mapper.csv")

In [241]:
FOLDERS = [os.path.join(BASE_DIR, o) for o in os.listdir(BASE_DIR) if os.path.isdir(os.path.join(BASE_DIR,o))]
FOLDERS.sort()
FOLDERS = FOLDERS[1:-2]


In [242]:
FILES_L = [get_files_from_folder(f) for f in FOLDERS]
FILES = [item for sublist in FILES_L for item in sublist]

In [243]:
FOLDERS

['./2015', './2016_17', './2018_19', './2020']

In [244]:
mapper = get_mapper('plantmapper.json')

In [245]:
mapper

{'Bergkamen': {'list': ['A'], 'A': 'BNA0067'},
 'Boxberg': {'list': ['R', 'Q', 'P', 'N'],
  'R': 'BNA1404',
  'Q': 'BNA0124',
  'P': 'BNA0123',
  'N': 'BNA0122'},
 'Braunkohlekraftwerk_Lippendorf': {'list': ['R', 'LIP S'],
  'R': 'BNA0115',
  'LIP S': 'BNA0116'},
 'Brokdorf': {'list': ['KBR'], 'KBR': 'BNA0157'},
 'Burghausen_GT': {'list': ['Burghausen_GT 1', 'BGH - O1'],
  'Burghausen_GT 1': 'BNA0172a',
  'BGH - O1': 'BNA0172b'},
 'Buschhaus': {'list': ['D'], 'D': 'BNA0439'},
 'Cuno_Heizkraftwerk_Herdecke': {'list': ['H6'], 'H6': 'BNA0442'},
 'Datteln': {'list': ['3.0', '1.0', '2.0', '4.0'],
  '3.0': 'BNA0189',
  '1.0': 'BNA0187',
  '2.0': 'BNA0188',
  '4.0': ''},
 'Dormagen': {'list': ['Gas-und-Dampfturbinen'],
  'Gas-und-Dampfturbinen': 'BNA0199'},
 'Duisburg_Hamborn': {'list': ['Block 3', 'Block 4', 'Block 5'],
  'Block 3': 'BNA0395',
  'Block 4': 'BNA0396',
  'Block 5': 'BNA0397'},
 'Duisburg_Heizkraftwerk_III': {'list': ['III/A', 'III/B'],
  'III/A': 'BNA0213',
  'III/B': 'BNA0214

In [300]:
#bpm

In [247]:
#FILES

In [248]:
def get_smard_name(f):
    return f.rsplit("/")[2].rsplit("_", 3)[0]

In [306]:
prod_mapper = []
for f in FILES:
    fn = get_smard_name(f)
    #print(fn)
    blockid, plantid = get_plant_from_prod_name(fn)
    prod_mapper.append([f, blockid, plantid])

plant Abwinden-Asten not found!
plant Altenw_rth not found!
plant Donaustadt not found!
plant Fernheizkraftwerk_Mellach not found!
plant Gerlos not found!
plant Greifenstein not found!
plant H_usling not found!
plant K_stenkraftwerk_K.I.E.L. not found!
plant Kaprun_Hauptstufe not found!
plant Kaprun_Oberstufe not found!
plant Malta_Hauptstufe not found!
plant Malta_Oberstufe not found!
plant Mayrhofen not found!
plant Melk not found!
plant Ottensheim-Wilhering not found!
plant Riedersbach not found!
plant Ro_hag not found!
plant Schwarzach not found!
plant Timelkam_ not found!
plant Wallsee-Mitterkirchen not found!
plant Ybbs-Persenbeug not found!


In [305]:
def get_plant_from_prod_name(prodname):
    tmp = ""
    try:
        tmp = mapper[prodname]
    except KeyError:
        print("plant " + prodname + " not found!")
        return None, None
    blocklist = tmp['list']
    block = blocklist[0]
    blockid = tmp[block]
    
    try:
        plantidx = bpm.loc[bpm.blockid == blockid, 'plantid'].item()
    except ValueError:
        return blockid, np.nan
    
    return blockid, plantidx

In [252]:
get_plant_from_prod_name("Buschhaus")

('BNA0439', '03-01-01012110180')

In [253]:
#prod_mapper 

In [254]:
prd_df = pd.DataFrame(prod_mapper, columns = ['file', 'blockid', 'plantid'])

In [255]:
#prd_df

In [307]:
for index, row in list(prd_df.iterrows())[0:5]:
    dfname = row[0]
    plantid = row[2]
    smardname = get_smard_name(dfname)
    df = pd.read_csv(dfname, delimiter=";", parse_dates=[["Datum", "Uhrzeit"]])
    try:
        newdf = convert2plantid(df, smardname)
    except IndexError:
        print(smardname)
    print(newdf.dtypes)

produced_at    datetime64[ns]
BNA0067                object
dtype: object
produced_at    datetime64[ns]
BNA1404                object
BNA0124                object
BNA0123                object
BNA0122                object
dtype: object
produced_at    datetime64[ns]
BNA0115                object
BNA0116                object
dtype: object
produced_at    datetime64[ns]
BNA0157                object
dtype: object
produced_at    datetime64[ns]
BNA0172a               object
BNA0172b               object
dtype: object


In [257]:
testdf = pd.read_csv("./2015/Boxberg_201501010000_201512312345_71.csv", delimiter=";", parse_dates=[["Datum", "Uhrzeit"]])

In [292]:
def convert2plantid(df, plantname):
    oldcols = list(df.columns)
    newcols = [extract_blockidf(x) for x in list(df.columns)[1:]]
    newcols2 = ['produced_at'] + [mapper[plantname][x] for x in newcols]
    test = dict(zip(oldcols, newcols2))
    result = df.rename(columns=test)
    return result

In [293]:
testdf

Unnamed: 0,Datum_Uhrzeit,Generation_DE R[MW],Generation_DE Q[MW],Generation_DE P[MW],Generation_DE N[MW]
0,2015-01-01 00:00:00,-,-,-,-
1,2015-01-01 01:00:00,-,-,-,-
2,2015-01-01 02:00:00,-,-,-,-
3,2015-01-01 03:00:00,-,-,-,-
4,2015-01-01 04:00:00,-,-,-,-
...,...,...,...,...,...
8755,2015-12-31 19:00:00,598,837,219,0
8756,2015-12-31 20:00:00,598,835,220,0
8757,2015-12-31 21:00:00,598,835,221,0
8758,2015-12-31 22:00:00,598,837,221,0


In [294]:
cols = list(testdf2.columns)[1:]

In [295]:
convert2plantid(testdf, "Boxberg")

Unnamed: 0,produced_at,BNA1404,BNA0124,BNA0123,BNA0122
0,2015-01-01 00:00:00,-,-,-,-
1,2015-01-01 01:00:00,-,-,-,-
2,2015-01-01 02:00:00,-,-,-,-
3,2015-01-01 03:00:00,-,-,-,-
4,2015-01-01 04:00:00,-,-,-,-
...,...,...,...,...,...
8755,2015-12-31 19:00:00,598,837,219,0
8756,2015-12-31 20:00:00,598,835,220,0
8757,2015-12-31 21:00:00,598,835,221,0
8758,2015-12-31 22:00:00,598,837,221,0


In [296]:
testdf

Unnamed: 0,Datum_Uhrzeit,Generation_DE R[MW],Generation_DE Q[MW],Generation_DE P[MW],Generation_DE N[MW]
0,2015-01-01 00:00:00,-,-,-,-
1,2015-01-01 01:00:00,-,-,-,-
2,2015-01-01 02:00:00,-,-,-,-
3,2015-01-01 03:00:00,-,-,-,-
4,2015-01-01 04:00:00,-,-,-,-
...,...,...,...,...,...
8755,2015-12-31 19:00:00,598,837,219,0
8756,2015-12-31 20:00:00,598,835,220,0
8757,2015-12-31 21:00:00,598,835,221,0
8758,2015-12-31 22:00:00,598,837,221,0


In [None]:
newcols2

In [None]:
extract_blockidf('Generation_DE Q[MW]')