# Pivot mult-indexing + apply

In [None]:
import pandas as pd
import datetime
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(rc={'figure.figsize':(12,6)})
plt.style.use(['seaborn-whitegrid'])

import warnings
warnings.simplefilter('ignore')

import utils

Recap from previous notebook

In [None]:
freq = "10min"

df = pd.read_csv("data/comfort_data.csv.gz",  compression='gzip')
qry = ["solar==0",
       "days==14",
       "strategy in ['no_control', 'smooth', 'single_bucket']"]

df = df.query(" and ".join(qry))
df.timestamp = pd.to_datetime(df.timestamp)
df.shape

# limit the data a bit...
qry = ["solar==0",
       "days==14",
       "feeder=='one'",
       "control==0",
       "season=='summer'",
       "attribute=='shower demand 110'",
       "strategy in ['no_control']"]

tmp = df.query(" and ".join(qry))
tmp.timestamp = pd.to_datetime(tmp.timestamp)

## Question #2

### What is the average cumulative % of customers impacted as a function of time?

Count a customer as unsatisfied only once...

(how many phone calls with the Vermont utility get)

Warm up

In [None]:
counts = tmp.pivot_table(index='shift', columns=['day', 'name'], values='value', aggfunc='count')
counts.head()

We need a function that only counts the first instance of an event (cold shower).

In [None]:
counts[1, 'zmq_waterheater_194']

In [None]:
counts[1, 'zmq_waterheater_194'].first_valid_index()

In [None]:
counts[1, 'zmq_waterheater_194'].sum()

In [None]:
import numpy as np

def fill_first_index(df):
    zeros = np.zeros(df.shape[0])
    index = df.first_valid_index()

    if index is not None:
        tmp = pd.Series(zeros, index=df.index)
        tmp.loc[tmp.index==index] = 1.0
        return tmp
    
    return pd.Series(zeros, index=df.index)

In [None]:
trial = counts[1, 'zmq_waterheater_194']

trial.sum(), fill_first_index(trial).sum()

We can `apply` this to the columns (`axis=0`) of our dataframe.

In [None]:
final = counts.apply(fill_first_index, axis=0)
final.head()

Check that we only have a single value per day and waterheater

In [None]:
final.sum(axis=0).unique()

In [None]:
ax = final.sum(axis=1, level=0).cumsum().mean(axis=1).plot(lw=2, color='red')

final.sum(axis=1, level=0).cumsum().plot(legend=False, color='lightgrey', ax=ax)

Remember this number?

### Multiple Scenarios 

In [None]:
qry = ["feeder=='one'",
       "season=='summer'",
       "attribute=='shower demand 110'"]

tmp = df.query(" and ".join(qry))

counts = tmp.pivot_table(index='shift', 
                         columns=['feeder', 'season', 'strategy', 'control', 'day', 'name'], 
                         values='value', aggfunc='count')
counts.head()

In [None]:
first = counts.apply(fill_first_index, axis=0)
first.head()

In [None]:
first.sum(axis=1, level=[0,1,2,3,4]).cumsum().head()

In [None]:
final = first.sum(axis=1, level=[0,1,2,3,4]).cumsum().mean(axis=1, level=[0,1,2,3])

In [None]:
final[('one', 'summer', 'no_control')].plot()

In [None]:
final[('one', 'summer')].plot()