In [None]:
# Data input: *_attr.csv, *_hh/*.csv;
# Data output: *_monthly/*.csv, *_monthly_interval/*.csv, *_profiles/*.csv, *_profiles_interval/*.csv;

# Data structure(e.g., Irish):
# |--data
#    |--Irish_2010_attr.csv
#    |--Irish_2010_hh
#    |  |--*.csv
#    |
#    |--Irish_2010_monthly
#    |  |--*.csv
#    |
#    |--Irish_2010_monthly_interval
#    |  |--*.csv
#    |
#    |--Irish_2010_profiles
#    |  |--*.csv
#    |
#    |--Irish_2010_profiles_interval
#       |--*.csv


In [1]:
import pandas as pd
import numpy as np
import dateutil
import csv

# Path
def path(loc, id, year):
    s = '../data/' + loc + '_' + str(year) + '_hh/' + str(id) + '.csv'
    return s

def path_m(loc, id, year):
    s = '../data/' + loc + '_' + str(year) + '_monthly/' + str(id) + '.csv'
    return s

def path_m_i(loc, id, year):
    s = '../data/' + loc + '_' + str(year) + '_monthly_interval/' + str(id) + '.csv'
    return s

def path_p(loc, id, year):
    s = '../data/' + loc + '_' + str(year) + '_profiles/' + str(id) + '.csv'
    return s

def path_p_i(loc, id, year):
    s = '../data/' + loc + '_' + str(year) + '_profiles_interval/' + str(id) + '.csv'
    return s

month = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]

# Point series

In [295]:
def to_monthly(df, loc, id, year):
    
    with open(path_m(loc, id, year), 'w') as f:
        for i in range(12):
            writer = csv.writer(f)
            writer.writerow(df[sum(month[:i])*24:sum(month[:i+1])*24])

In [296]:
# Generate monthly series
def generate_series(loc, year):
    
    attr = pd.read_csv('../data/' + loc + '_' + str(year) + '_attr.csv')

    for i in range(len(attr)):
        id = attr['ID'][i]
        df = pd.read_csv(path(loc, id, year))

        tmp = []
        for j in range(int(len(df)/2)):
            tmp.append(df['KW'][j*2] + df['KW'][j*2+1])
        to_monthly(tmp, loc, id, year)

In [297]:
# Generate monthly profiles
def generate_profiles(loc, year):
    
    attr = pd.read_csv('../data/' + loc + '_' + str(year) + '_attr.csv')
    
    for k in range (len(attr)):
        id = attr['ID'][k]
        df = pd.read_csv(path_m(loc, id, year), header=None)
        
        profiles = []
        for i in range(12):
            tmp = []
            for j in range(month[i]):
                tmp.append(df.iloc[i, :][24*j:24*(j+1)].values)
            profiles.append(np.average(np.array(tmp), axis=0))
        pd.DataFrame(profiles).to_csv(path_p(loc, id, year), index=None, header=None)

In [164]:
generate_series('Irish', 2010)
generate_profiles('Irish', 2010)

In [298]:
generate_series('London', 2013)
generate_profiles('London', 2013)

# Interval-valued series

In [2]:
def to_monthly_i(df, loc, id, year):
    
    with open(path_m_i(loc, id, year), 'w') as f:
        for i in range(12):
            writer = csv.writer(f)
            writer.writerows(df[:, sum(month[:i])*24:sum(month[:i+1])*24])

In [7]:
# Generate monthly series (interval)
def generate_series_i(loc, year):
    
    attr = pd.read_csv('../data/' + loc + '_' + str(year) + '_attr.csv')

    for i in range(len(attr)):
        id = attr['ID'][i]
        df = pd.read_csv(path(loc, id, year))
        
        df['KW'] = df['KW'] * 2
        
        tmp = []
        lower = []
        upper = []
        for j in range(int(len(df)/2)):
            if df['KW'][j*2] > df['KW'][j*2+1]:
                lower.append(df['KW'][j*2+1])
                upper.append(df['KW'][j*2])
            else:
                lower.append(df['KW'][j*2])
                upper.append(df['KW'][j*2+1])
        tmp.append(lower)
        tmp.append(upper)
        to_monthly_i(np.array(tmp), loc, id, year)

In [8]:
# Generate monthly profiles (interval)
def generate_profiles_i(loc, year):
    
    attr = pd.read_csv('../data/' + loc + '_' + str(year) + '_attr.csv')
    
    for k in range (len(attr)):
        id = attr['ID'][k]
        df = pd.read_csv(path_m_i(loc, id, year), header=None)

        # the profile in January
        profiles = []
        for j in range(month[0]):
            profiles.append(df.iloc[0:2, :].values[:, 24*j:24*(j+1)])
        profiles = np.average(np.array(profiles), axis=0)

        for i in range(1, 12):
            tmp = []
            for j in range(month[i]):
                tmp.append(df.iloc[2*i:2*(i+1), :].values[:, 24*j:24*(j+1)])
            profiles = np.vstack((profiles, np.average(np.array(tmp), axis=0)))
        pd.DataFrame(profiles).to_csv(path_p_i(loc, id, year), index=None, header=None)

In [9]:
generate_series_i('Irish', 2010)
generate_profiles_i('Irish', 2010)

In [10]:
generate_series_i('London', 2013)
generate_profiles_i('London', 2013)