## Load data

In [77]:
import pandas as pd
loads = pd.read_csv('../data/clean/load/HighLoads.csv')
loads.head()
loads['total'] = loads.sum(axis=1)
loads.loc[loads['hour'] == 24, 'hour'] = 0
loads.index = pd.to_datetime(loads[['year', 'month', 'day', 'hour']])
last_year = loads.index.year[-1]
loads.loc[loads['hour'] == 0].index += pd.DateOffset(day=1)
#loads.loc[loads['year'] > last_year].index = loads.loc[loads['year'] > last_year].index.year - 1



In [78]:
loads['year'][-1]

2030

In [79]:
import os
import sys

def get_load_data(path='../data/clean/load', filename='HighLoads.csv', corrections=True, 
                  total=False, *args, **kwargs):
    """ Load consumption data
    
    TODO:
    This could be a csv or it could connect to a DB.
    """
    df = pd.read_csv(os.path.join(path, filename))
    # Calculate the sum of loads
    df['total'] = df.sum(axis=1)
    # Convert to datetime if does not exist
    last_year = df['year'].iloc[-1:].values
    if corrections:
        try:
            df.loc[df['hour'] == 24, 'hour'] = 0
            df.loc[df['hour'] == 0, 'hour'] +=  1
            # Fix below code to represent a year regression
            df.loc[df['year'] > last_year] -= pd.DateOffset(day=365)
        except ValueError as e:
            # TODO Add error if data is wrong
            pass
    df.index = pd.to_datetime(df[['year', 'month', 'day', 'hour']])
    
    if total:
        df = df[['total']].sort_index()
    return df.sort_index()

loads = get_load_data()

## Timepoints creation

This is the part where you can change the time resolution of switch. Here you can define the number of timepoints you will use for the analysis. One easy approach, to do it automatically is to groupb the data by maximum per month.

In [103]:
timepoints = loads.groupby([pd.TimeGrouper('A'), 
                                 pd.TimeGrouper('M')]).idxmax().add_suffix('_peakTime')
timepoints.head()

Unnamed: 0,Unnamed: 1,year_peakTime,month_peakTime,day_peakTime,hour_peakTime,01-hermosillo_peakTime,02-cananea_peakTime,03-obregon_peakTime,04-los_mochis_peakTime,05-culiacan_peakTime,06-mazatlan_peakTime,...,45-cozumel_peakTime,46-tijuana_peakTime,47-ensenada_peakTime,48-mexicali_peakTime,49-san_luis_rio_colorado_peakTime,50-villa_constitucion_peakTime,51-la_paz_peakTime,52-los_cabos_peakTime,53-mulege_peakTime,total_peakTime
2016-12-31,2016-01-31,2016-01-01 01:00:00,2016-01-01 01:00:00,2016-01-31 01:00:00,2016-01-01 23:00:00,2016-01-21 21:00:00,2016-01-21 21:00:00,2016-01-21 21:00:00,2016-01-21 21:00:00,2016-01-21 21:00:00,2016-01-21 21:00:00,...,2016-01-13 19:00:00,2016-01-16 21:00:00,2016-01-16 21:00:00,2016-01-16 21:00:00,2016-01-16 21:00:00,2016-01-27 21:00:00,2016-01-27 21:00:00,2016-01-27 21:00:00,2016-01-16 21:00:00,2016-01-28 20:00:00
2016-12-31,2016-02-29,2016-02-01 01:00:00,2016-02-01 01:00:00,2016-02-29 01:00:00,2016-02-01 23:00:00,2016-02-27 21:00:00,2016-02-27 21:00:00,2016-02-27 21:00:00,2016-02-27 21:00:00,2016-02-27 21:00:00,2016-02-27 21:00:00,...,2016-02-26 20:00:00,2016-02-20 21:00:00,2016-02-20 21:00:00,2016-02-20 21:00:00,2016-02-20 21:00:00,2016-02-17 21:00:00,2016-02-17 21:00:00,2016-02-17 21:00:00,2016-02-26 22:00:00,2016-02-25 20:00:00
2016-12-31,2016-03-31,2016-03-01 01:00:00,2016-03-01 01:00:00,2016-03-31 01:00:00,2016-03-01 23:00:00,2016-03-31 17:00:00,2016-03-31 17:00:00,2016-03-31 17:00:00,2016-03-31 17:00:00,2016-03-31 17:00:00,2016-03-31 17:00:00,...,2016-03-28 20:00:00,2016-03-25 23:00:00,2016-03-25 23:00:00,2016-03-25 23:00:00,2016-03-25 23:00:00,2016-03-26 21:00:00,2016-03-26 21:00:00,2016-03-26 21:00:00,2016-03-07 21:00:00,2016-03-31 21:00:00
2016-12-31,2016-04-30,2016-04-01 01:00:00,2016-04-01 01:00:00,2016-04-30 01:00:00,2016-04-01 23:00:00,2016-04-29 18:00:00,2016-04-29 18:00:00,2016-04-29 18:00:00,2016-04-29 18:00:00,2016-04-29 18:00:00,2016-04-29 18:00:00,...,2016-04-29 23:00:00,2016-04-21 17:00:00,2016-04-21 17:00:00,2016-04-21 17:00:00,2016-04-21 17:00:00,2016-04-25 22:00:00,2016-04-25 22:00:00,2016-04-25 22:00:00,2016-04-09 22:00:00,2016-04-29 17:00:00
2016-12-31,2016-05-31,2016-05-01 01:00:00,2016-05-01 01:00:00,2016-05-31 01:00:00,2016-05-01 23:00:00,2016-05-31 01:00:00,2016-05-31 01:00:00,2016-05-31 01:00:00,2016-05-31 01:00:00,2016-05-31 01:00:00,2016-05-31 01:00:00,...,2016-05-28 23:00:00,2016-05-27 20:00:00,2016-05-27 20:00:00,2016-05-27 20:00:00,2016-05-27 20:00:00,2016-05-30 18:00:00,2016-05-30 18:00:00,2016-05-30 18:00:00,2016-05-16 15:00:00,2016-05-27 22:00:00


Using the `timepoints` dataframe we can get the number of timepoints that will be used and the date.

In [81]:
# Calculate the number of timepoints
ts_num_tps = timepoints.groupby(level=[0]).size().unique()[0]
ts_num_tps

12

In [82]:
dates = timepoints['total_peakTime']
dates;

For this example, we will recreate a full day considering the maximum of each month. We will obtain `n`number of points in front and behind the timestamp of the maximum load.

In [83]:
def get_representative_day(data, dates, number=4):
    """ Construc a representative day based on a single timestamp
    
    Args:
    data
    dates
    number
    Todo: Write readme
    """
    years = []
    if isinstance(dates, pd.Series):
        for day in dates:
            i_date = day - pd.DateOffset(hours=12)
            f_date = day + pd.DateOffset(hours=12)
            mask = (data.index >= i_date) & (data.index <= f_date)
            # reset_index to preserve timepoint reference
            years.append(data.loc[mask].iloc[::number].reset_index())
    else:
        i_date = dates - pd.DateOffset(hours=12)
        f_date = dates + pd.DateOffset(hours=12)
        mask = (data.index >= i_date) & (data.index <= f_date)
        years.append(data.loc[mask].iloc[::number].reset_index())
    output_data = pd.concat(years)
    output_data.rename(columns={'index':'date'}, inplace=True)
    return output_data

output_data = get_representative_day(loads, dates[:2], number=7) # Fix this horrendus bug
output_data = output_data.loc[output_data.date.dt.year <= 2025]
output_data

Unnamed: 0,date,year,month,day,hour,01-hermosillo,02-cananea,03-obregon,04-los_mochis,05-culiacan,...,45-cozumel,46-tijuana,47-ensenada,48-mexicali,49-san_luis_rio_colorado,50-villa_constitucion,51-la_paz,52-los_cabos,53-mulege,total
0,2016-01-28 08:00:00,2016,1,28,8,554.301317,141.941123,286.110568,229.86627,273.186589,...,22.160019,280.261778,83.244114,482.525373,101.262285,20.754515,85.269937,96.884503,11.0,31481.057036
1,2016-01-28 15:00:00,2016,1,28,15,643.279145,164.725866,332.037749,266.764976,317.039181,...,30.289031,385.063646,114.37265,662.962251,139.128585,24.772748,101.778852,115.642087,14.0,35168.501283
2,2016-01-28 22:00:00,2016,1,28,22,657.60873,168.395273,339.43417,272.707391,324.101496,...,30.269911,412.597521,122.550836,710.367194,149.076938,26.824834,110.209849,125.221466,15.0,35846.423151
3,2016-01-29 05:00:00,2016,1,29,5,493.770214,126.440794,254.866572,204.764293,243.353924,...,21.296279,305.623223,90.77704,526.190053,110.42571,17.7213,72.807974,82.725104,9.0,28336.536088
0,2016-02-25 08:00:00,2016,2,25,8,601.845369,154.11583,310.651112,249.582575,296.618605,...,25.959402,286.184756,85.003374,492.722937,103.402335,21.094551,86.666975,98.471831,12.0,32565.258661
1,2016-02-25 15:00:00,2016,2,25,15,688.05657,176.192117,355.150259,285.333973,339.107669,...,34.945799,390.650069,116.031945,672.58037,141.147033,24.879871,102.218964,116.142147,15.0,36655.913554
2,2016-02-25 22:00:00,2016,2,25,22,706.853831,181.005572,364.852735,293.129113,348.371872,...,34.385249,401.281754,119.189797,690.884892,144.988402,27.434472,112.714547,128.067326,15.0,37046.791286
3,2016-02-26 05:00:00,2016,2,26,5,548.352524,140.417803,283.040013,227.399332,270.254735,...,24.841522,312.222903,92.737295,537.55269,112.810262,18.423548,75.693158,86.003277,10.0,29015.175584


In [84]:
len(output_data)

8

## Creating tab file

### Timestamp

The timestamp file needs to include the format:


In [85]:
identifier = 'P'
output_data['timestamp'] = output_data['date'].dt.strftime('%Y%m%d%H')
output_data['TIMESERIES'] = output_data['date'].dt.strftime('%Y_%m{}'.format(identifier))
output_data['daysinmonth'] = output_data['date'].dt.daysinmonth
output_data;

In [86]:
timepoints_tab = output_data[['timestamp', 'TIMESERIES', 'daysinmonth']]
timepoints_tab.index.name = 'timepoint_id'
tmp = timepoints_tab.reset_index(drop=True)
tmp = tmp.rename(columns={'TIMESERIES':'timeseries'})
tmp.index += 1  # To start on 1
tmp.index.name = 'timepoint_id'
tmp[['timestamp', 'timeseries']].to_csv('switch-inputs/timepoints.tab', sep='\t')
tmp;

In [87]:
from collections import OrderedDict
# Todo implement multiple periods based on the data
d = OrderedDict({'INVESTMENT_PERIOD': [2016], 'period_start': [2015], 'period_end':[2025]})
periods_tab = pd.DataFrame(d)
periods_tab= periods_tab.set_index('INVESTMENT_PERIOD')
periods_tab.to_csv('switch-inputs/periods.tab', sep='\t')
periods_tab.head()

Unnamed: 0_level_0,period_start,period_end
INVESTMENT_PERIOD,Unnamed: 1_level_1,Unnamed: 2_level_1
2016,2015,2025


In [88]:
timeseries_tab = timepoints_tab[['TIMESERIES', 'daysinmonth']].drop_duplicates('TIMESERIES').reset_index(drop=True)
ts_duration_of_tp = 6#(24/len(output_data))
timeseries_tab['ts_period'] = 2016 # Fix this to change investment period
timeseries_tab['ts_duration_of_tp'] = ts_duration_of_tp
timeseries_tab['ts_num_tps'] = output_data[['timestamp', 'TIMESERIES']].groupby('TIMESERIES').count().values
timeseries_tab['ts_scale_to_period'] = 10*24*(365/2)/(timeseries_tab['ts_duration_of_tp']*timeseries_tab['ts_num_tps'])
timeseries_tab.index +=1
timeseries_tab.index.name = 'timepoint_id'
del timeseries_tab['daysinmonth']
timeseries_tab.to_csv('switch-inputs/timeseries.tab', index=False, sep='\t')
timeseries_tab.head()

Unnamed: 0_level_0,TIMESERIES,ts_period,ts_duration_of_tp,ts_num_tps,ts_scale_to_period
timepoint_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,2016_01P,2016,6,4,1825.0
2,2016_02P,2016,6,4,1825.0


In [89]:
len(timeseries_tab)

2

## Variable capacity factor  data

In [90]:
import pandas as pd
data_path = '../data/clean/SWITCH/'
ren_cap_data = pd.read_csv(data_path + 'ren-all.csv', index_col=0, parse_dates=True)
ren_cap_data.head()

Unnamed: 0_level_0,capacity_factor,GENERATION_PROJECT
datetime_mexico-utc-6,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-01-01 00:00:00,0.0,cs_pp_01
2014-01-01 01:00:00,0.0,cs_pp_01
2014-01-01 02:00:00,0.0,cs_pp_01
2014-01-01 03:00:00,0.0,cs_pp_01
2014-01-01 04:00:00,0.0,cs_pp_01


In [91]:
renewable_plants = len(ren_cap_data.GENERATION_PROJECT.unique())
renewable_plants

287

In [92]:
ren_cap_data_year = ren_cap_data.index.year.unique()
ren_cap_data_year

Int64Index([2014], dtype='int64', name='datetime_mexico-utc-6')

In [93]:
periods = set(output_data.date.dt.year)
periods

{2016}

In [94]:
# Get the generation using the filter dates
filter_dates = pd.DatetimeIndex(output_data['date'].reset_index(drop=True))
df = pd.DataFrame([])
ren_tmp = ren_cap_data.copy()
ren_tmp.index = ren_tmp.index + pd.DateOffset(years=2)
#df = df.append(ren_tmp)
for year in periods:
    df = df.append(ren_tmp)
    ren_tmp.index = ren_tmp.index + pd.DateOffset(years=1)
grouped = df.loc[filter_dates].dropna().reset_index(drop=True).groupby('GENERATION_PROJECT', as_index=False)
tmp = []
for name, group in grouped:
    tmp.append(group.reset_index(drop=True))
variable_cap = pd.concat(tmp)

In [95]:
os.remove("switch-inputs/variable_capacity_factors.tab")
variable_tab = variable_cap.groupby('GENERATION_PROJECT')
for keys in variable_tab.groups.keys():
    data = variable_tab.get_group(keys).reset_index(drop=True)
    data.index +=1
    data.index.name = 'timepoint'
    data.rename(columns={'capacity_factor': 'gen_max_capacity_factor'},
               inplace=True)
    data.reset_index()[['GENERATION_PROJECT', 'timepoint', 'gen_max_capacity_factor']].to_csv('switch-inputs/variable_capacity_factors.tab', 
                                                                  sep='\t', index=False, 
                mode='a', header=(not os.path.exists('switch-inputs/variable_capacity_factors.tab')))

In [96]:
loads_tmp = loads[loads.year <= 2025]
list_tmp = []
tmp = (loads_tmp.loc[output_data['date']].drop(['year', 'month','day','hour', 'total'], axis=1).reset_index()
        .drop_duplicates('index').reset_index(drop=True))
del tmp['index']
tmp = tmp.unstack(0)
for name, group in tmp.groupby(level=0):
    list_tmp.append(group.reset_index())

In [97]:
loads_tab = pd.concat(list_tmp)
loads_tab

Unnamed: 0,level_0,level_1,0
0,01-hermosillo,0,554.301317
1,01-hermosillo,1,643.279145
2,01-hermosillo,2,657.608730
3,01-hermosillo,3,493.770214
4,01-hermosillo,4,601.845369
5,01-hermosillo,5,688.056570
6,01-hermosillo,6,706.853831
7,01-hermosillo,7,548.352524
0,02-cananea,0,141.941123
1,02-cananea,1,164.725866


In [98]:
loads_tab = pd.concat(list_tmp)
loads_tab.index += 1
loads_tab = loads_tab.rename(columns={'level_0':'LOAD_ZONE', 0:'zone_demand_mw'})
del loads_tab['level_1']
loads_tab.index.name = 'TIMEPOINT'
loads_tab = loads_tab.reset_index()[['LOAD_ZONE', 'TIMEPOINT', 'zone_demand_mw']]
loads_tab.to_csv('switch-inputs/loads.tab', sep='\t', index=False)

In [99]:
loads_tab

Unnamed: 0,LOAD_ZONE,TIMEPOINT,zone_demand_mw
0,01-hermosillo,1,554.301317
1,01-hermosillo,2,643.279145
2,01-hermosillo,3,657.608730
3,01-hermosillo,4,493.770214
4,01-hermosillo,5,601.845369
5,01-hermosillo,6,688.056570
6,01-hermosillo,7,706.853831
7,01-hermosillo,8,548.352524
8,02-cananea,1,141.941123
9,02-cananea,2,164.725866
