In [1]:
import pandas as pd
import numpy as np

In [2]:
dt= pd.date_range(start='2020-01-01',end='2020-01-02',freq='1H')

In [3]:
r_data = np.random.rand(len(dt))

In [4]:
ss = pd.Series(data=r_data,index=dt)

In [5]:
ss

2020-01-01 00:00:00    0.507089
2020-01-01 01:00:00    0.680964
2020-01-01 02:00:00    0.462224
2020-01-01 03:00:00    0.229607
2020-01-01 04:00:00    0.711784
2020-01-01 05:00:00    0.278971
2020-01-01 06:00:00    0.396163
2020-01-01 07:00:00    0.575668
2020-01-01 08:00:00    0.489303
2020-01-01 09:00:00    0.000287
2020-01-01 10:00:00    0.667445
2020-01-01 11:00:00    0.457573
2020-01-01 12:00:00    0.499507
2020-01-01 13:00:00    0.002031
2020-01-01 14:00:00    0.561541
2020-01-01 15:00:00    0.727498
2020-01-01 16:00:00    0.430604
2020-01-01 17:00:00    0.873716
2020-01-01 18:00:00    0.718908
2020-01-01 19:00:00    0.269599
2020-01-01 20:00:00    0.482831
2020-01-01 21:00:00    0.656976
2020-01-01 22:00:00    0.689400
2020-01-01 23:00:00    0.515684
2020-01-02 00:00:00    0.274084
Freq: H, dtype: float64

In [11]:
df = pd.DataFrame(data=zip(dt,r_data),columns=['date_time','energy_kwh'])

In [12]:
df1 = df.set_index('date_time', inplace=False)

In [13]:
peak_hours  = df1.index.hour.isin(range(17,24))

In [14]:
df1.loc[peak_hours, 'cost_cents'] = df1.loc[peak_hours, 'energy_kwh'] * 28

In [15]:
def apply_tariff(kwh, hour):
    """计算每个小时的电费"""    
    if 0 <= hour < 7:
        rate = 12
    elif 7 <= hour < 17:
        rate = 20
    elif 17 <= hour < 24:
        rate = 28
    else:
        raise ValueError(f'Invalid hour: {hour}')
    return rate * kwh

In [16]:
# @timeit(repeat=3, number=100) 
def apply_tariff_loop(df):
     """用for循环计算enery cost，并添加到列表"""
     energy_cost_list = []
     for i in range(len(df)):
         # 获取用电量和时间（小时）
         energy_used = df.iloc[i]['energy_kwh']
         hour = df.iloc[i]['date_time'].hour
         energy_cost = apply_tariff(energy_used, hour)
         energy_cost_list.append(energy_cost)
     df['cost_cents'] = energy_cost_list

In [26]:
%%timeit
apply_tariff_loop(df)

5.88 ms ± 264 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


用 iterrows循环

In [30]:
df.drop(axis=0,columns='cost_cents',inplace=True)

In [27]:
def apply_tariff_iterrows(df):
    energy_cost_list = []
    for index, row in df.iterrows():
        # 获取用电量和时间（小时）
        energy_used = row['energy_kwh']
        hour = row['date_time'].hour
        # 添加cost列表
        energy_cost = apply_tariff(energy_used, hour)
        energy_cost_list.append(energy_cost)
    df['cost_cents'] = energy_cost_list

In [31]:
%%timeit
apply_tariff_iterrows(df)

2.01 ms ± 65.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
