## Main packages

In [None]:
import pandas as pd
import numpy as np

## Data

In [None]:
loads = pd.read_csv('../data/clean/loads/HighLoads.csv')
loads.head()

## Data preparation

In [None]:
# Calculate the sum of all loads
loads['total'] = loads.sum(axis=1)

# Create datetimeindex for easy query
loads.index = pd.to_datetime(loads[['year', 'month', 'day', 'hour']])


# Transform 24 hours to 0 to be in accordance with CENACE
loads.loc[loads['hour'] == 24, 'hour'] = 0

# Move changed hours to next day
loads.loc[loads['hour'] == 0].index += pd.DateOffset(day=1)

# Get the last year of the load timeseries
last_year = loads.index.year[-1]

loads.loc[loads['year'] > last_year].index = loads.loc[loads['year'] > last_year].index.year - 1
loads = loads.sort_index()

## Representative days

In [None]:
def get_representative_day(data, dates, number=4):
    """ Construc a representative day based on a single timestamp
    
    Args:
    data
    dates
    number
    Todo: Write readme
    """
    years = []
    if isinstance(dates, pd.Series):
        for day in dates:
            i_date = day - pd.DateOffset(hours=12)
            f_date = day + pd.DateOffset(hours=12)
            mask = (data.index >= i_date) & (data.index <= f_date)
            # reset_index to preserve timepoint reference
            years.append(data.loc[mask].iloc[::number].reset_index())
    else:
        i_date = dates - pd.DateOffset(hours=12)
        f_date = dates + pd.DateOffset(hours=12)
        mask = (data.index >= i_date) & (data.index <= f_date)
        years.append(data.loc[mask].iloc[::number].reset_index())
    output_data = pd.concat(years)
    output_data.rename(columns={'index':'date'}, inplace=True)
    return output_data

## Timepoints creation

For the timepoints creation we are going to retrieve the maximum of each day and the median of the average of every month.

### Peak days

In [None]:
timepoints = loads.groupby([pd.TimeGrouper('A'), 
                                 pd.TimeGrouper('M')])['total'].idxmax()
timepoints.head()

In [None]:
def get_representative_day(data, number=4, freq='MS'):
    """ Construc a representative day based on a single timestamp
    # Month start is to avoid getting more timepoints in a even division
    Args:
    data
    dates
    number
    Todo: Write readme
    """
    years = []
    if number & 1:
        raise ValueError('Odd number of timepoints. Use even number')
    for index, group in data.groupby([pd.TimeGrouper('A'), pd.TimeGrouper(freq=freq)]):
        peak_timestamp = group.idxmax()
        mask = peak_timestamp.strftime('%Y-%m-%d') 
        years.append(group.loc[mask].iloc[::int((24/4))].reset_index())    
    output_data = pd.concat(years)
    output_data.rename(columns={'index':'date', 'total':'peak_day'}, inplace=True)

    return output_data
peak_dates = get_representative_day(loads['total']['2016'], number=4);
peak_dates;

### Median days

In [None]:
def get_median_day(data, number=4, freq='MS'):
    years = []
    for index, group in loads[['total']].groupby([pd.TimeGrouper('A'), pd.TimeGrouper('MS')]):
        grouper = group.groupby(pd.TimeGrouper('D')).mean()
        if len(grouper) & 1:
            # Odd number of days
            index_median = grouper.loc[grouper['total']==grouper['total'].median()].index[0]
        else:
            # Even number of days
            index_median = (np.abs(grouper['total']-grouper['total'].median())).argmin()
        years.append(group.loc[index_median.strftime('%Y-%m-%d')].iloc[::int((24/number))].reset_index())
    output_data = pd.concat(years)
    output_data.rename(columns={'index':'date', 'total':'peak_day'}, inplace=True)

    return output_data
mean_days = get_median_day(loads)
mean_days.head()

## Debug

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
loads['2016-01'].plot(y='total', ax=ax)

In [None]:
loads['2016-01']['total'].min()