https://realpython.com/fast-flexible-pandas

In [1]:
import pandas as pd
import numpy as np
from pprint import pprint as pp

In [99]:
pd.__version__

'0.23.4'

In [100]:
df = pd.read_csv('energy.csv')

In [106]:
df.head()

Unnamed: 0,date_time,energy_kwh
0,2013-01-01 00:00:00,0.586
1,2013-01-01 01:00:00,0.58
2,2013-01-01 02:00:00,0.572
3,2013-01-01 03:00:00,0.596
4,2013-01-01 04:00:00,0.592


In [107]:
df.dtypes

date_time     datetime64[ns]
energy_kwh           float64
dtype: object

In [108]:
def convert_with_format(df, column_name):
    return pd.to_datetime(df[column_name], format='%d/%m/%y %H:%M')

In [109]:
df['date_time'] = convert_with_format(df, 'date_time')

In [110]:
df['date_time'].head()

0   2013-01-01 00:00:00
1   2013-01-01 01:00:00
2   2013-01-01 02:00:00
3   2013-01-01 03:00:00
4   2013-01-01 04:00:00
Name: date_time, dtype: datetime64[ns]

In [111]:
df.dtypes

date_time     datetime64[ns]
energy_kwh           float64
dtype: object

In [112]:
def apply_tariff(kwh, hour):
    """Calculates cost of electricity for given hour."""    
    if 0 <= hour < 7:
        rate = 12
    elif 7 <= hour < 17:
        rate = 20
    elif 17 <= hour < 24:
        rate = 28
    else:
        raise ValueError(f'Invalid hour: {hour}')
    return rate * kwh

In [113]:
def apply_tariff_withapply(df):
    df['cost_cents'] = df.apply(
        lambda row: apply_tariff(
            kwh=row['energy_kwh'],
            hour=row['date_time'].hour),
        axis=1)
apply_tariff_withapply(df)

In [114]:
df.head()

Unnamed: 0,date_time,energy_kwh,cost_cents
0,2013-01-01 00:00:00,0.586,7.032
1,2013-01-01 01:00:00,0.58,6.96
2,2013-01-01 02:00:00,0.572,6.864
3,2013-01-01 03:00:00,0.596,7.152
4,2013-01-01 04:00:00,0.592,7.104


In [115]:
df.set_index('date_time', inplace=True)

In [116]:
df.head()

Unnamed: 0_level_0,energy_kwh,cost_cents
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01 00:00:00,0.586,7.032
2013-01-01 01:00:00,0.58,6.96
2013-01-01 02:00:00,0.572,6.864
2013-01-01 03:00:00,0.596,7.152
2013-01-01 04:00:00,0.592,7.104


In [118]:
df.index.hour.isin(range(17, 24))

array([False, False, False, ...,  True,  True,  True])