In [5]:
import pandas as pd
from pandas.io.parsers import ParserError
import numpy as np
from helper import get_mapper
import json
from sqlalchemy import create_engine
import os
import re

In [6]:
from os import listdir, stat
from os.path import isfile, join

In [7]:
import datetime
from calendar import isleap

In [8]:
MIN_SIZE = 512
BASE_DIR = "."
FIX_DIR = "fix"

In [9]:
def get_fix_files(BASE_DIR):
    tmp = [f for f in os.listdir(BASE_DIR) if os.path.isfile(join(BASE_DIR, f))]
    tmp.sort()
    return tmp

In [10]:
def get_files_from_folder(folder):
    onlyfiles = [folder + "/" + f for f in listdir(folder) if isfile(join(folder, f))]
    onlyfiles.sort()
    files = [f for f in onlyfiles if stat(f).st_size > MIN_SIZE]
    return files

In [11]:
FOLDERS = [os.path.join(BASE_DIR, o) for o in os.listdir(BASE_DIR) if os.path.isdir(os.path.join(BASE_DIR,o))]
FOLDERS.sort()
FOLDERS = FOLDERS[1:-2]


In [12]:
hours = (365 * 5 + 1) * 24
base = datetime.datetime(2015, 1, 1)
date_list = [base + datetime.timedelta(hours=x) for x in range(0, hours)]
COMPLETE = pd.DataFrame(data={'produced_at': date_list})
COMPLETE.produced_at = pd.to_datetime(COMPLETE.produced_at)

In [13]:
FOLDERS

['./2015', './2016_17', './2018_19', './2020']

In [14]:
mapper = get_mapper('plantmapper.json')

In [32]:
FILES_L = [get_files_from_folder(f) for f in FOLDERS]
FILES = [item for sublist in FILES_L for item in sublist]

In [33]:
FILES

['./2015/Bergkamen_201501010000_201512312345_28.csv',
 './2015/Boxberg_201501010000_201512312345_71.csv',
 './2015/Braunkohlekraftwerk_Lippendorf_201501010000_201512312345_136.csv',
 './2015/Brokdorf_201501010000_201512312345_150.csv',
 './2015/Burghausen_GT_201501010000_201512312345_36.csv',
 './2015/Buschhaus_201501010000_201512312345_146.csv',
 './2015/Cuno_Heizkraftwerk_Herdecke_201501010000_201512312345_17.csv',
 './2015/Dormagen_201501010000_201512312345_70.csv',
 './2015/Duisburg_Hamborn_201501010000_201512312345_43.csv',
 './2015/Duisburg_Heizkraftwerk_III_201501010000_201512312345_18.csv',
 './2015/Duisburg_Ruhrort_201501010000_201512312345_5.csv',
 './2015/E-Werk_Wilhelmshaven_201501010000_201512312345_79.csv',
 './2015/Emsland_201501010000_201512312345_87.csv',
 './2015/Farge_201501010000_201512312345_52.csv',
 './2015/Franken_1_201501010000_201512312345_104.csv',
 './2015/Frimmersdorf_201501010000_201512312345_34.csv',
 './2015/GKH_St_cken_201501010000_201512312345_54.csv',

In [17]:
def get_block(col):
    return col.split("Generation_DE ")[1].rsplit('[MW]')[0]

In [18]:
def gen_powers(dirs):
    dfs = []
    for d in dirs:
        print(d)
        files = get_files_from_folder(d)
        tmp = gen_power(files)
        dfs.append(tmp)
        print(dfs)
    
    df = pd.concat(dfs, ignore_index=True)
    return df

In [19]:
def get_msg(tup):
    msg = ""
    corr_dates, corr_power = tup
    if corr_dates:
        msg += "Dates wrong"
    elif corr_power:
        msg += "Power wrong"
    return msg

In [20]:
def validate_dirs(dirs):
    errored_files = []
    for d in dirs:
        print(d)
        files = get_files_from_folder(d)
        for file in files:
            try:
                df = pd.read_csv(file, sep=";")#, na_values=0)
                result, errtup = is_valid_df(df)
                if not result:
                    errored_files.append((file, get_msg(errtup)))
            except ParserError:
                errored_files.append((file, "ParserError"))
    return errored_files

In [21]:
def df_correct_dates(df, name):
    count = df.loc[df[name].apply(lambda x: len(x.split("."))) != 3].shape[0]
    if count == 0:
        return True
    else:
        return False

In [22]:
def df_correct_power(df):
    cols = list(df)[2:]
    #print(cols)
    map_dict = {}
    
    for col in cols:
        map_dict[col] = str
        
        
    df2 = df.astype(map_dict)
    
    res = True
    
    for col in cols:
        count = df2.loc[df2[col].apply(lambda x: len(x.split(":"))) != 1].shape[0]
        if count == 0:
            res = res and True
        else:
            res = res and False
    return res

In [23]:
def is_valid_df(df):
    result = True
    date, tod = ("Date", "Time of day")
    if "Date" not in list(df):
        date, tod = ("Datum", "Uhrzeit")
    
    corr_dates = df_correct_dates(df, date)
    corr_power = df_correct_power(df)
    
    result = result and corr_dates
    result = result and corr_power
    return (result, (corr_dates, corr_power))

In [24]:
def get_name_from_file(s):
    return s.rsplit('_2018', 1)[0]

In [25]:
def get_plant(f):
    return f.split("/")[2].rsplit("_", 3)[0]

In [26]:
def get_columns(plant, cols):
    map_dict = {}
    to_delete = []
    for c in cols:
        block = ""
        try:
            block = mapper[plant][get_block(c)]
        except KeyError:
            print("KeyError")
            print(plant)
            print(cols)
            print(c)
        if block:
            map_dict[c] = block
        else:
            to_delete.append(c)
            
    return map_dict, to_delete

In [26]:
def conv_to_dt(df):
    date, tod = ("Date", "Time of day")
    #print(df.shape)
    #print(list(df))
    if "Date" not in list(df):
        date, tod = ("Datum", "Uhrzeit")
    df["produced_at"] = df[date] + " " + df[tod]
    df["produced_at"] = pd.to_datetime(df['produced_at'], errors='coerce')
    df = df.drop(columns=[date, tod])
    df2 = df.drop_duplicates(["produced_at"])
    df3 = df2.dropna(subset=['produced_at']) # remove coerced errors
    return df2

In [27]:
def rename_to_blockid(df, name):
    cols = list(df)[:-1]
    map_dict, to_delete = get_columns(name, cols)
    df2 = df.rename(columns=map_dict)
    df3 = df2.drop(columns=to_delete)
    return df3

In [28]:
def fix_path(file):
    tmp = file.split("/")
    return "fix/" + tmp[2]

In [29]:
def get_str_dict(array):
    res = {}
    for a in array:
        res[a] = str
    return res

def get_int_dict(array):
    res = {}
    for a in array:
        res[a] = int
    return res

In [30]:
def fix_num(x):
    if isinstance(x, float):
        return int(x)
    elif str(x).isnumeric():
        return x
    else:
        return "".join(re.findall(r"\d", x))

In [43]:
def gen_date_df(years):
    hours = 0
    baseyear = years[0]
    for year in years:
        days = 366 if isleap(year) else 365
        hours += days * 24
    base = datetime.datetime(baseyear, 1, 1)
    date_list = [base + datetime.timedelta(hours=x) for x in range(0, hours)]
    datedf = pd.DataFrame(data={'produced_at': date_list})
    datedf.produced_at = pd.to_datetime(datedf.produced_at)
    return datedf

In [44]:
def melt_to_power(df):
    #df = fix_df(df)
    final = df.dropna(subset=["produced_at"])
    powers = final.melt(id_vars=["produced_at"], var_name='blockid', value_name='power')
    powers2 = powers.copy()
    #powers2['power'] = powers2['power'].fillna(0)
    #powers2.power.replace(['-'], [0], inplace=True)
    #powers2 = powers2.astype({"power": str})
    #powers2['power'] = powers2['power'].apply(lambda x: x.replace(".", ""))
    powers2['power'] = powers2['power'].fillna(0)
    powers2 = powers2.astype({"power": int})
    #powers3 = powers2.copy()
    return powers2

In [45]:
def gen_power(filelist):
    df = gen_df(filelist)
    return conv_to_power(df)

In [57]:
def gen_power3(files):
    result = COMPLETE
    for plant, files in FILES_DICT.items():
        final = pd.DataFrame()
        for idx, f in enumerate(files):
            f = "fix/" + f
            df = pd.read_csv(f, parse_dates=["produced_at"])
            df2 = df.merge(DATEDF_LIST[idx], on='produced_at', how='right')
            if final.empty:
                final = df2
            else:
                final = final.append(df2, sort=False)
        result = pd.merge(result, final, how='left', on=['produced_at'])
    return result.sort_values(by=['produced_at'])

In [47]:
def gen_power2(files):
    result = COMPLETE
    final = pd.DataFrame()
    for idx, f in enumerate(files):
        f = "fix/" + f
        df = pd.read_csv(f, parse_dates=["produced_at"])
        #print()
        #df.produced_at = pd.to_datetime(df.produced_at)
        #print(df.dtypes)
        #print(DATEDF_LIST[idx].dtypes)
        #df2 = df.merge(DATEDF_LIST[idx], on='produced_at')
        df2 = df.merge(DATEDF_LIST[idx], on='produced_at', how='right') # use left for not filling missing values with na
        #print(df.shape)
        if final.empty:
            final = df2
        else:
            final = final.append(df2)
        final.sort_values(by=['produced_at'], inplace=True)
    return final

In [48]:
def get_year(f):
    return f.split("/")[1]

In [49]:
def get_date_df_from_file(f):
    return gen_date_df(get_years(get_year(f)))

In [50]:
def get_years(yearstr):
    if yearstr == "2015":
        return [2015]
    elif yearstr == "2016_17":
        return [2016, 2017]
    elif yearstr == "2018_19":
        return [2018, 2019]
    elif yearstr == "2020":
        return [2020]
    else:
        raise ValueError("wrong data " + yearstr)

In [51]:
def get_fix_filename(f):
    return f.split("/")[1]

In [52]:
def fill_to_int(df):
    headers = list(df)
    headers.remove('produced_at')
    ad = {}
    for h in headers:
        ad[h] = 0
    try:
        df2 = df.fillna(ad)
    except:
        pass
    df2 = df2.astype(get_int_dict(headers))    
    return df2

In [53]:
def fix_df(df):
    headers = list(df)
    headers.remove('produced_at')
    #print(headers)
    powers2 = df.copy()
    powers2 = powers2.astype(get_str_dict(headers))
    powers2[headers] = powers2[headers].fillna("0")
    #powers2[headers] = powers2[headers].applymap(lambda x: int(x) if str(x).isnumeric() else x) # cast floats to int
    powers2[headers] = powers2[headers].applymap(lambda x: "0" if not str(x).isnumeric() and ":" in x else x) # remove dates from int column
    powers2[headers] = powers2[headers].applymap(lambda x: x if str(x).isnumeric() else "".join(re.findall(r"\d", x)) or 0) # remove . in ints
    powers2[headers] = powers2[headers].applymap(lambda x: x if not (str(x).isnumeric() and len(str(x)) < 4) else int(str(x)[0:4])) # trunc to first 4 digits
    powers2[headers] = powers2[headers].fillna(0)
    #powers2[headers] = powers2[headers].replace(r'^\s*$', 0, regex=True) # replace emptystrings with zero
    powers3 = powers2.copy()
    powers3 = powers3.astype(get_int_dict(headers))
    #powers3['produced_at'] = pd.to_datetime(powers3['produced_at'])
    return powers3

In [42]:
def gen_df(filelist):
    #powers = pd.DataFrame()
    tmp = COMPLETE
    for file in filelist[:]:
        
        refdf = get_date_df_from_file(file)
        
        #print(file)
        try:
            df = pd.read_csv(file, sep=";", na_values=["-", ''])#, na_values=0)
            cols = list(df)
            str_cols = cols[2:len(cols)]
            dtdict = {}
            for s in str_cols:
                dtdict[s] = str
            df = pd.read_csv(file, sep=";", na_values=["-", ''], dtype=dtdict)#, na_values=0)
        except (ParserError, UnicodeDecodeError):
            print(file)
            continue
        name = get_plant(file)
        try:
            df2 = conv_to_dt(df)
        except (ParserError, TypeError) as e:
            continue
        df3 = rename_to_blockid(df2, name)
        df4 = pd.merge(refdf, df3, how='left', on=['produced_at']) # fill nan values
        df5 = fix_df(df4)
        #return df5
        fixp = fix_path(file)
        df6 = df5.sort_values(by=['produced_at'])
        df6.to_csv(fixp, index=False)
        if tmp.empty:
            tmp = df6 # merge with itself if no other exists
        tmp = pd.merge(tmp, df6, how='left', on=['produced_at'])
    print("finished!")
    return tmp2

In [43]:
def gen_df(filelist, err_bl=True):
    #powers = pd.DataFrame()
    tmp = COMPLETE
    for file in filelist[:]:
        
        refdf = get_date_df_from_file(file)
        
        #print(file)
        try:
            df = pd.read_csv(file, sep=";", na_values=["-", ''], error_bad_lines=err_bl, nrows=5)#, na_values=0)
            cols = list(df)
            str_cols = cols[2:len(cols)]
            use_cols = ['Datum', 'Uhrzeit'] + str_cols
            dtdict = {}
            for s in str_cols:
                dtdict[s] = str
            df = pd.read_csv(file, sep=";", na_values=["-", ''], dtype=dtdict, error_bad_lines=err_bl)#, na_values=0)
        except (ParserError, UnicodeDecodeError) as e:
            print(e)
            print(file)
            continue
        name = get_plant(file)
        try:
            df2 = conv_to_dt(df)
        except (ParserError, TypeError, KeyError) as e:
            print(file)
            print(e)
            continue
        df3 = rename_to_blockid(df2, name)
        df4 = pd.merge(refdf, df3, how='left', on=['produced_at']) # fill nan values
        df5 = fix_df(df4)
        #return df5
        fixp = fix_path(file)
        df6 = df5.sort_values(by=['produced_at'])
        df6.to_csv(fixp, index=False)
        if tmp.empty:
            tmp = df6 # merge with itself if no other exists
        tmp = pd.merge(tmp, df6, how='left', on=['produced_at'])
        try:
            tmp2 = fill_to_int(tmp)
        except:
            pass
        #old = tmp.copy()
    return tmp2

In [44]:
def validate_files(filelist, err_bl=True):
    tmp = COMPLETE
    for file in filelist[:]:
        
        refdf = get_date_df_from_file(file)
        
        #print(file)
        try:
            df = pd.read_csv(file, sep=";", na_values=["-", ''], error_bad_lines=err_bl, nrows=5)#, na_values=0)
            cols = list(df)
            str_cols = cols[2:len(cols)]
            use_cols = ['Datum', 'Uhrzeit'] + str_cols
            dtdict = {}
            for s in str_cols:
                dtdict[s] = str
            df = pd.read_csv(file, sep=";", na_values=["-", ''], dtype=dtdict, error_bad_lines=err_bl)#, na_values=0)
        except (ParserError, UnicodeDecodeError) as e:
            print(e)
            print(file)
            continue
        name = get_plant(file)
        try:
            df2 = conv_to_dt(df)
        except (ParserError, TypeError, KeyError) as e:
            print(file)
            print(e)
            continue

In [45]:
#validate_files(FILES, err_bl=True)

In [46]:
mapper = get_mapper('plantmapper.json')

In [47]:
#test = gen_df(FILES, True)

In [48]:
#FILES

In [49]:
parta = gen_df(FILES, err_bl=False)

b'Skipping line 4: expected 3 fields, saw 5\nSkipping line 5: expected 3 fields, saw 5\nSkipping line 6: expected 3 fields, saw 5\nSkipping line 7: expected 3 fields, saw 5\nSkipping line 8: expected 3 fields, saw 5\nSkipping line 9: expected 3 fields, saw 5\nSkipping line 10: expected 3 fields, saw 5\nSkipping line 11: expected 3 fields, saw 5\nSkipping line 12: expected 3 fields, saw 5\nSkipping line 13: expected 3 fields, saw 5\nSkipping line 14: expected 3 fields, saw 5\nSkipping line 15: expected 3 fields, saw 5\nSkipping line 16: expected 3 fields, saw 5\nSkipping line 17: expected 3 fields, saw 5\nSkipping line 18: expected 3 fields, saw 5\nSkipping line 19: expected 3 fields, saw 5\nSkipping line 20: expected 3 fields, saw 5\nSkipping line 21: expected 3 fields, saw 5\nSkipping line 22: expected 3 fields, saw 5\nSkipping line 23: expected 3 fields, saw 5\nSkipping line 24: expected 3 fields, saw 5\nSkipping line 25: expected 3 fields, saw 5\nSkipping line 26: expected 3 fields,

b'Skipping line 4: expected 3 fields, saw 5\nSkipping line 5: expected 3 fields, saw 5\nSkipping line 6: expected 3 fields, saw 5\nSkipping line 7: expected 3 fields, saw 5\nSkipping line 8: expected 3 fields, saw 5\nSkipping line 9: expected 3 fields, saw 5\nSkipping line 10: expected 3 fields, saw 5\nSkipping line 11: expected 3 fields, saw 5\nSkipping line 12: expected 3 fields, saw 5\nSkipping line 13: expected 3 fields, saw 5\nSkipping line 14: expected 3 fields, saw 5\nSkipping line 15: expected 3 fields, saw 5\nSkipping line 16: expected 3 fields, saw 5\nSkipping line 17: expected 3 fields, saw 5\nSkipping line 18: expected 3 fields, saw 5\nSkipping line 19: expected 3 fields, saw 5\nSkipping line 20: expected 3 fields, saw 5\nSkipping line 21: expected 3 fields, saw 5\nSkipping line 22: expected 3 fields, saw 5\nSkipping line 23: expected 3 fields, saw 5\nSkipping line 24: expected 3 fields, saw 5\nSkipping line 25: expected 3 fields, saw 5\nSkipping line 26: expected 3 fields,

KeyError
Kraftwerk_BASF_Ludwigshafen_Mitte
['Generation_DE GUD A 800 ']
Generation_DE GUD A 800 


b'Skipping line 7403: expected 5 fields, saw 9\nSkipping line 7404: expected 5 fields, saw 9\nSkipping line 7405: expected 5 fields, saw 9\nSkipping line 7406: expected 5 fields, saw 9\nSkipping line 7407: expected 5 fields, saw 9\nSkipping line 7408: expected 5 fields, saw 9\nSkipping line 7409: expected 5 fields, saw 9\nSkipping line 7410: expected 5 fields, saw 9\nSkipping line 7411: expected 5 fields, saw 9\nSkipping line 7412: expected 5 fields, saw 9\nSkipping line 7413: expected 5 fields, saw 9\nSkipping line 7414: expected 5 fields, saw 9\nSkipping line 7415: expected 5 fields, saw 9\nSkipping line 7416: expected 5 fields, saw 9\nSkipping line 7417: expected 5 fields, saw 9\nSkipping line 7418: expected 5 fields, saw 9\nSkipping line 7419: expected 5 fields, saw 9\nSkipping line 7420: expected 5 fields, saw 9\nSkipping line 7421: expected 5 fields, saw 9\nSkipping line 7422: expected 5 fields, saw 9\nSkipping line 7423: expected 5 fields, saw 9\nSkipping line 7424: expected 5 f

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the c

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the c

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the c

KeyError
Abwinden-Asten
['Generation_DE [MW]']
Generation_DE [MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(


KeyError
Altenw_rth
['Generation_DE [MW]']
Generation_DE [MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the c

KeyError
Donaustadt
['Generation_DE [MW]']
Generation_DE [MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the c

KeyError
Fernheizkraftwerk_Mellach
['Generation_DE [MW]']
Generation_DE [MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the c

KeyError
Gerlos
['Generation_DE [MW]']
Generation_DE [MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(


KeyError
Gersteinwerk
['Generation_DE G1[MW]', 'Generation_DE I1[MW]', 'Generation_DE F1[MW]', 'Generation_DE H1[MW]', 'Generation_DE I2[MW]', 'Generation_DE K1 [MW]', 'Generation_DE F2[MW]', 'Generation_DE G2[MW]', 'Generation_DE K2[MW]']
Generation_DE K1 [MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(


KeyError
Greifenstein
['Generation_DE [MW]']
Generation_DE [MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(


KeyError
H_usling
['Generation_DE [MW]']
Generation_DE [MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the c

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the c

KeyError
K_stenkraftwerk_K.I.E.L.
['Generation_DE BHKW Modul 1-20']
Generation_DE BHKW Modul 1-20


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(


KeyError
Kaprun_Hauptstufe
['Generation_DE [MW]']
Generation_DE [MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(


KeyError
Kaprun_Oberstufe
['Generation_DE [MW]']
Generation_DE [MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the c

KeyError
Kraftwerk_BASF_Ludwigshafen_Mitte
['Generation_DE [MW]', 'Generation_DE GUD A 800 ']
Generation_DE [MW]
KeyError
Kraftwerk_BASF_Ludwigshafen_Mitte
['Generation_DE [MW]', 'Generation_DE GUD A 800 ']
Generation_DE GUD A 800 


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the c

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(


KeyError
Malta_Hauptstufe
['Generation_DE [MW]']
Generation_DE [MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(


KeyError
Malta_Oberstufe
['Generation_DE [MW]']
Generation_DE [MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(


KeyError
Mayrhofen
['Generation_DE [MW]']
Generation_DE [MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(


KeyError
Melk
['Generation_DE [MW]']
Generation_DE [MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the c

KeyError
Ottensheim-Wilhering
['Generation_DE [MW]']
Generation_DE [MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the c

KeyError
Riedersbach
['Generation_DE Riedersbach 1[MW]', 'Generation_DE Riedersbach 2[MW]']
Generation_DE Riedersbach 1[MW]
KeyError
Riedersbach
['Generation_DE Riedersbach 1[MW]', 'Generation_DE Riedersbach 2[MW]']
Generation_DE Riedersbach 2[MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(


KeyError
Ro_hag
['Generation_DE [MW]']
Generation_DE [MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the c

KeyError
Schwarzach
['Generation_DE [MW]']
Generation_DE [MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the c

KeyError
Timelkam_
['Generation_DE [MW]']
Generation_DE [MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the c

KeyError
Wallsee-Mitterkirchen
['Generation_DE [MW]']
Generation_DE [MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the c

KeyError
Ybbs-Persenbeug
['Generation_DE [MW]']
Generation_DE [MW]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(


In [51]:
list(parta)

['produced_at',
 'BNA0067_x',
 'BNA1404_x',
 'BNA0124_x',
 'BNA0123_x',
 'BNA0122_x',
 'BNA0115_x',
 'BNA0116_x',
 'BNA0157_x',
 'BNA0172a_x',
 'BNA0172b_x',
 'BNA0439_x',
 'BNA0442_x',
 'BNA0199_x',
 'BNA0395_x',
 'BNA0396_x',
 'BNA0397_x',
 'BNA0213_x',
 'BNA0214_x',
 'BNA0218_x',
 'BNA0217_x',
 'BNA0219_x',
 'BNA1061_x',
 'BNA1060_x',
 'BNA0605_x',
 'BNA0604_x',
 'BNA0606_x',
 'BNA0602_x',
 'BNA0603_x',
 'BNA0147_x',
 'BNA0744_x',
 'BNA0745_x',
 'BNA0309_x',
 'BNA0305_x',
 'BNA0304_x',
 'BNA0307_x',
 'BNA0308_x',
 'BNA0312_x',
 'BNA0310_x',
 'BNA0303_x',
 'BNA0311_x',
 'BNA0306_x',
 'BNA0302_x',
 'BNA0314_x',
 'BNA0313_x',
 'BNA0421_x',
 'BNA0420_x',
 'BNA0995_x',
 'BNA0993_x',
 'BNA0994_x',
 'BNA0686_x',
 'BNA0526_x',
 'BNA1040_x',
 'BNA1042_x',
 'BNA1039_x',
 'BNA1041_x',
 'BNA1043_x',
 'BNA1046a_x',
 'BNA1044_x',
 'BNA1045_x',
 'BNA1046b_x',
 'BNA0350_x',
 'BNA0352_x',
 'BNA0353_x',
 'BNA0351_x',
 'BNA0251_x',
 'BNA0015_x',
 'BNA0016_x',
 'BNA0017_x',
 'BNA0018_x',
 'BNA0020_x',


In [58]:
CDF = gen_power3(FILES)

FileNotFoundError: [Errno 2] No such file or directory: 'fix/./2015/Bergkamen_201501010000_201512312345_28.csv'

In [1]:
partb = melt_to_power(parta)

NameError: name 'melt_to_power' is not defined

In [54]:
testfiles = get_fix_files(FIX_DIR)

In [55]:
FILES_DICT = {}
for f in FILES:
    name = f.split("_20")[0]
    if not name in FILES_DICT:
        tmp = [f]
        FILES_DICT[name] = tmp
    else:
        tmp = FILES_DICT[name]
        tmp = tmp + [f]
        FILES_DICT[name] = tmp
    #FILES_DICT[name] = []
    #print(name)

In [56]:
DATEDF_LIST = [gen_date_df(year) for year in [[2015], [2016, 2017], [2018, 2019], [2020]]]

In [None]:
CDF2 = melt_to_power(CDF)

In [None]:
CDF

In [None]:
#CDF.sort_values(by='produced_at')

In [None]:
#CDF.loc[CDF['BNA1404']]

In [None]:
CDF2

In [None]:
partb

In [None]:
F2 = ["./2016_17/Buschhaus_201601010000_201712312345_146.csv", "./2016_17/Brokdorf_201601010000_201712312345_150.csv"]

In [None]:
test1 = gen_df(F2)

In [None]:
test1.sort_values(by="BNA0439", ascending=False)

In [None]:
test1.sort_values(by="BNA0439", ascending=False)

In [None]:
#pd.concat([DATEDF_LIST[0], test1, test1], sort=False).drop_duplicates(subset=['produced_at'], keep=False)

In [None]:
#FILES[0:3]

In [None]:
#FILES_DICT

In [None]:
#testfiles

In [None]:
#DATEDF_LIST[2]

In [None]:
CDF2.to_csv("CDF2t1.csv", index=False)

In [None]:
#CDF = pd.read_csv("CDF.csv", parse_dates=['produced_at'])

In [None]:
#CDF2.to_csv("CDF2.csv", index=False)

In [None]:
#CDF = pd.read_csv("produced_power.csv", parse_dates=['produced_at'])

In [None]:
CDF.dtypes

In [None]:
CDF

In [None]:
df = CDF2

In [None]:
#df.to_csv("produced_power_pg.csv", index=True, header=False)
#df.to_csv("produced_power.csv", index=False)
#df.("produced_power_nh.csv", index=False, header=False)

In [None]:
CDF2

In [None]:
#CDF

In [None]:
#CDF.to_csv("CDF.csv", index=False)

In [None]:
list(CDF2)

In [None]:
#CDF.dtypes

In [106]:
gy = CDF.resample('1Y', on='produced_at').sum()
gm = CDF.resample('1M', on='produced_at').sum()

In [107]:
gm2 = gm.reset_index()
gm2['year'] = gm2['produced_at']
gm2['month'] = gm2['produced_at']

In [108]:
gm2['year'] = gm2['year'].apply(lambda x: str(x).split("-")[0])
gm2['month'] = gm2['month'].apply(lambda x: int(str(x).split("-")[1]))
gm2['month'] = gm2['month'].astype(int)

In [109]:
gm3 = gm2.sort_values(by=["year", 'month'])
gm4 = gm3.drop(columns='produced_at')

In [110]:
gm5 = gm4.melt(id_vars=["year", "month"], var_name='blockid', value_name='power')
gm5['power'] = gm5['power'].astype(int)

In [111]:
gm5

Unnamed: 0,year,month,blockid,power
0,2015,1,BNA0067,70621
1,2015,2,BNA0067,68088
2,2015,3,BNA0067,66709
3,2015,4,BNA0067,53328
4,2015,5,BNA0067,76938
...,...,...,...,...
15295,2019,8,BNA1093,58135
15296,2019,9,BNA1093,58514
15297,2019,10,BNA1093,110960
15298,2019,11,BNA1093,151811


In [None]:
gm5.to_csv("monthly.csv", header=False)

In [113]:
#gm4.("monthly.csv", header=False)

In [114]:
gm2[headers] = powers2[headers].applymap(lambda x: "0" if not str(x).isnumeric() and ":" in x else x) # remove dates from int column
powers2[headers] = powers2[headers].applymap(lambda x: x if str(x).isnumeric() else "".join(re.findall(r"\d", x)) or 0)

NameError: name 'powers2' is not defined

In [None]:
gm2

In [None]:
gm2 = gm.copy()

In [None]:
gm

In [None]:
CDF2.groupby(CDF2['produced_at'])CDF2.groupby(CDF2['produced_at']),

In [None]:
CDF2

In [None]:
COMPLETE.shape

In [None]:
#FILES

In [None]:
#mask = CDF.duplicated(subset=['produced_at'])
#CDF[mask].sort_values(by=['produced_at'])

In [None]:
FT = testfiles[15:18]
testdf = gen_power2(FT)
#testdf

In [None]:
#set subtraction
#pd.concat([COMPLETE, df, df]).drop_duplicates(subset=['produced_at'], keep=False)

In [None]:
#a = gen_df([FILES[0]])

In [None]:
failed = gen_power(["./2016_17/Heizkraftwerk_Altbach_Deizisau_201601010000_201712312345_10.csv"])

In [None]:
mq = failed.duplicated(subset=['produced_at', 'blockid'])

In [None]:
#failed[mq]

In [None]:
a = (365 * 5 + 1) * 24

In [None]:
a

In [None]:
COMPLETE

In [None]:
base = datetime.datetime(2015, 1, 1)
date_list = [base + datetime.timedelta(hours=x) for x in range(0, a)]

In [None]:
date_list[a-1]

In [None]:
datedf = pd.DataFrame(data={'producet_at': date_list})

In [None]:
datedf

In [None]:
tdf = pd.read_csv("./fix/./2015/Boxberg_201501010000_201512312345_71.csv")

In [None]:
tdf.dtypes

In [None]:
TEST = "1.234"

In [None]:
r1 = r"\d"
r2 = r"\\d"
r3 = r"\\\d"
r4 = "\\d"
r5 = "\\\\d"

In [None]:
test = re.findall(r"\d", TEST)

In [None]:
"".join(test)

In [None]:
alist = ['BNA0104', 'BNA0124']

In [None]:
tdf

In [None]:
tdf[alist] = tdf[alist].replace(['-'], [0])

In [None]:
errs = validate_dirs(FOLDERS)

In [None]:
errs

In [None]:
#df = gen_powers(FOLDERS)

In [None]:
mask = (df['produced_at'] > "2015-01-01 00:00:00") & (df['produced_at'] <= "2017-01-01 00:00:00")

In [None]:
df.loc[mask]

In [None]:
df = df.sort_values(by=["produced_at", "blockid"]).reset_index(drop=True)

In [None]:
mask = df.duplicated(subset=["produced_at", "blockid"])

In [None]:
df[mask]

In [None]:
fs = get_files_from_folder(FOLDERS[0])

In [None]:
bk = fs[1]

In [None]:
tf = pd.read_csv(bk, sep=";")#, na_values=0)

In [None]:
date, tod = ("Datum", "Uhrzeit")
tf["produced_at"] = tf[date] + " " + tf[tod]

In [None]:
tf.loc[tf.duplicated(['produced_at'], keep=False)]

In [None]:
tf.sort_values(by=['Datum'])

In [None]:
tf["produced_at"] = pd.to_datetime(tf['produced_at'])

In [None]:
tf

In [None]:
tf2 = conv_to_dt(tf)

In [None]:
F15 = FOLDERS[2]
F16 = FOLDERS[0]
F18 = FOLDERS[1]

In [None]:
F15

In [None]:
df.sort_values(by=['produced_at', 'blockid'])

In [None]:
df.loc[mask]

In [None]:
df

In [None]:
ft = pd.read_csv("faulty.csv", sep=';')

In [None]:
errs = validate_dirs(FOLDERS)

In [None]:
#errs

In [None]:
is_valid_df(ft)

In [None]:
ft.loc[ft['Datum'].apply(lambda x: len(x.split("."))) != 3].shape[0]

In [None]:
df = gen_powers(FOLDERS)

In [None]:
df = pd.read_csv("./2015/Bergkamen_201501010000_201512312345_28.csv", sep=";")#, na_values=0)

In [None]:
#df

In [None]:
files = get_files_from_folder(F15)
final = gen_df(files)

In [None]:
'''
KeyError
Heizkraftwerk_Dresden-Nossener_Br_cke
['Generation_DE Heizkraftwerk Dresden-Nossener Brücke ']
Generation_DE Heizkraftwerk Dresden-Nossener Brücke 
ParserError
./2015/Kraftwerk_BASF_Ludwigshafen_Mitte_201501010000_201512312345_20.csv
KeyError
Kraftwerk_BASF_Ludwigshafen_Mitte
['Generation_DE Koepchenwerk[MW]']
Generation_DE Koepchenwerk[MW]
KeyError
Kraftwerk_BASF_Ludwigshafen_S_d
['Generation_DE GUD C 200']
Generation_DE GUD C 200
KeyError
Kraftwerk_West
['Generation_DE West 2[MW]', 'Generation_DE West 1[MW]']
Generation_DE West 2[MW]
KeyError
Kraftwerk_West
['Generation_DE West 2[MW]', 'Generation_DE West 1[MW]']
Generation_DE West 1[MW]

'''

In [None]:
#files

In [None]:
name = get_name_from_file(FN)

In [None]:
onlyfiles.sort()

In [None]:
#onlyfiles

In [None]:
dq = pd.read_csv(files[2], sep=";")

In [None]:
#final = gen_df(F15)

In [None]:
p2 = conv_to_power(final)

In [None]:
powers3.shape

In [None]:
p2

In [None]:
final = final.dropna(subset=["produced_at"])

In [None]:
powers = final.melt(id_vars=["produced_at"], var_name='blockid', value_name='power')

In [None]:
powers2 = powers.copy()
powers2['power'] = powers2['power'].fillna(0)
powers2.power.replace(['-'], [0], inplace=True)
powers3 = powers2.astype({"power": int})

In [None]:
powers3.dtypes

In [None]:
#powers3.to_csv("produced_power_pg.csv", index=True, header=False)
#powers3.to_csv("produced_power.csv", index=False)
#powers3.to_csv("produced_power_nh.csv", index=False, header=False)

In [None]:
list(powers3)

In [None]:
powers.blockid.str.len().drop_duplicates()

In [None]:
mask = powers['blockid'].str.len() == 8

In [None]:
engine = create_engine('postgresql://simon:"N0m1596."@localhost:5432/power')

powers.to_sql("power", engine, if_exists="replace", method="multi")

In [None]:
powers.loc[mask]

In [None]:
powers

In [None]:
#powers.groupby("Blockid")['Blockid'].apply(lambda x: x.str.len(x).count())

In [None]:
powers.dtypes

In [None]:
df

In [None]:
dq = pd.read_csv(files[1], sep=";")
df = pd.read_csv(files[2], sep=";")

In [None]:
files[2]

In [None]:
files[1]

In [None]:
df2 = conv_to_dt(df)
df3 = rename_to_blockid(df2, "Braunkohlekraftwerk_Lippendorf")
dq2 = conv_to_dt(dq)
dq3 = rename_to_blockid(dq2, "Boxberg")

In [None]:
ids = dq3["Datetime"]

In [None]:
dq3[ids.isin(ids[ids.duplicated()])]

In [None]:
#dq3.duplicated(["Datetime"])

In [None]:
dq3.drop_duplicates(["Datetime"])

In [None]:
tot = pd.merge(dq3, df3, how='outer', on=['Datetime'])

In [None]:
tot.drop_duplicates(["Datetime"])

In [None]:
tot = pd.concat([dq3, df3], axis=1, sort=False)

In [None]:
tot

In [None]:
'''
Error tokenizing data. C error: Expected 3 fields in line 2355, saw 4

./2016_17/Cuno_Heizkraftwerk_Herdecke_201601010000_201712312345_17.csv
Error tokenizing data. C error: Expected 4 fields in line 7786, saw 9

./2016_17/Duisburg_Heizkraftwerk_III_201601010000_201712312345_18.csv
Error tokenizing data. C error: Expected 3 fields in line 5981, saw 4

./2016_17/Gemeinschaftskraftwerk_Kiel_201601010000_201712312345_7.csv
Error tokenizing data. C error: Expected 4 fields in line 5861, saw 5

./2016_17/Huckingen_201601010000_201712312345_6.csv
Error tokenizing data. C error: Expected 5 fields in line 13947, saw 7

./2016_17/Kraftwerk_BASF_Ludwigshafen_Mitte_201601010000_201712312345_20.csv
Error tokenizing data. C error: Expected 5 fields in line 7403, saw 9

./2016_17/Kraftwerk_Mittelsb_ren_201601010000_201712312345_4.csv
Error tokenizing data. C error: Expected 5 fields in line 8430, saw 6

./2016_17/Kraftwerk_Werdohl-Elverlingsen_201601010000_201712312345_15.csv
Error tokenizing data. C error: Expected 3 fields in line 15454, saw 5

./2016_17/Kraftwerk_Wilhelmshaven_201601010000_201712312345_13.csv
Error tokenizing data. C error: Expected 4 fields in line 1721, saw 8

./2016_17/Reuter_West_201601010000_201712312345_16.csv
Error tokenizing data. C error: Expected 4 fields in line 7454, saw 11

./2016_17/Tiefstack_201601010000_201712312345_31.csv
Error tokenizing data. C error: Expected 3 fields in line 14633, saw 4

./2016_17/Trianel_Kohlekraftwerk_L_nen_201601010000_201712312345_26.csv
Error tokenizing data. C error: Expected 4 fields in line 5877, saw 6

./2016_17/Waldeck_2_201601010000_201712312345_32.csv
Error tokenizing data. C error: Expected 3 fields in line 7138, saw 10

./2016_17/Wehr_201601010000_201712312345_3.csv
Error tokenizing data. C error: Expected 3 fields in line 3689, saw 4

./2016_17/Weiher_201601010000_201712312345_21.csv

'''