Feature Engineering

In [None]:
import pandas as pd
import numpy as np
df = pd.read_csv("charging_logs.csv")
df['timestamp'] = pd.to_datetime(df['timestamp'])

df = df.sort_values(['station_id', 'timestamp']).reset_index(drop=True)

print(df[['station_id','timestamp']].head())

  station_id           timestamp
0  STATION_0 2024-01-15 01:00:06
1  STATION_0 2024-01-15 01:00:07
2  STATION_0 2024-01-15 01:00:12
3  STATION_0 2024-01-15 01:00:16
4  STATION_0 2024-01-15 01:00:18


In [12]:
df['hour'] = df['timestamp'].dt.hour

df['day_of_week'] = df['timestamp'].dt.dayofweek

df['is_weekend'] = (df['day_of_week'] >= 5).astype(int)

print(df[['timestamp','hour','day_of_week','is_weekend']].head())

            timestamp  hour  day_of_week  is_weekend
0 2024-01-15 01:00:06     1            0           0
1 2024-01-15 01:00:07     1            0           0
2 2024-01-15 01:00:12     1            0           0
3 2024-01-15 01:00:16     1            0           0
4 2024-01-15 01:00:18     1            0           0


In [None]:
# Calculating expected power and energy

df['power_expected'] = df['voltage'] * df['current'] / 1000

df['power_deviation'] = abs(df['power_kw'] - df['power_expected'])

df['energy_expected'] = df['power_kw'] * df['duration_sec'] / 3600

df['energy_deviation'] = abs(df['energy_kwh'] - df['energy_expected'])

print(df[['power_kw','power_expected','power_deviation',
          'energy_kwh','energy_expected','energy_deviation']].head())

   power_kw  power_expected  power_deviation  energy_kwh  energy_expected  \
0     7.269        7.295623         0.026623      0.0384         0.038364   
1     7.815        7.813925         0.001075      0.0825         0.082492   
2     7.421        7.420356         0.000644      0.0866         0.086578   
3     8.417        8.398272         0.018728      0.0655         0.065466   
4     7.385        7.495911         0.110911      0.0492         0.049233   

   energy_deviation  
0          0.000036  
1          0.000008  
2          0.000022  
3          0.000034  
4          0.000033  


In [None]:
#grouping stations by station id
station_stats = df.groupby('station_id')[['voltage','current','power_kw','temperature_c']].agg(['mean','std'])

print(station_stats.head())

               voltage               current            power_kw            \
                  mean        std       mean       std      mean       std   
station_id                                                                   
STATION_0   229.056492  10.628596  31.215631  3.224787  7.123302  1.115963   
STATION_1   227.129223  11.437979  31.171656  3.137334  7.052268  1.117949   
STATION_10  224.611379  11.193183  30.841194  2.936552  6.896894  1.068310   
STATION_11  229.089121  11.512170  29.193829  2.875845  6.661254  0.997319   
STATION_12  223.437181  12.155976  30.679840  3.064235  6.818360  1.129715   

           temperature_c            
                    mean       std  
station_id                          
STATION_0      42.927628  4.132092  
STATION_1      45.376608  3.706938  
STATION_10     35.633497  3.768836  
STATION_11     48.047258  3.585145  
STATION_12     46.547071  4.181571  


In [None]:
# calculating delta values to detect suddent changes

df['power_delta'] = df.groupby('station_id')['power_kw'].diff().fillna(0)

df['temp_delta'] = df.groupby('station_id')['temperature_c'].diff().fillna(0)

df['voltage_delta'] = df.groupby('station_id')['voltage'].diff().fillna(0)

print(df[['power_kw','power_delta','temperature_c','temp_delta','voltage','voltage_delta']].head(10))

   power_kw  power_delta  temperature_c  temp_delta  voltage  voltage_delta
0     7.269        0.000          42.23        0.00   229.35           0.00
1     7.815        0.546          45.29        3.06   225.38          -3.97
2     7.421       -0.394          42.39       -2.90   228.60           3.22
3     8.417        0.996          43.23        0.84   233.87           5.27
4     7.385       -1.032          45.87        2.64   231.07          -2.80
5     7.557        0.172          41.26       -4.61   228.51          -2.56
6     7.360       -0.197          43.16        1.90   233.66           5.15
7     6.988       -0.372          42.48       -0.68   223.64         -10.02
8     6.740       -0.248          41.95       -0.53   228.42           4.78
9     7.015        0.275          46.28        4.33   229.43           1.01


In [None]:
# measuring deviation

df['station_power_mean'] = df.groupby('station_id')['power_kw'].transform('mean')

df['station_power_deviation'] = abs(df['power_kw'] - df['station_power_mean'])

df['station_temp_mean'] = df.groupby('station_id')['temperature_c'].transform('mean')

df['station_temp_deviation'] = abs(df['temperature_c'] - df['station_temp_mean'])

print(df[['power_kw','station_power_mean','station_power_deviation',
          'temperature_c','station_temp_mean','station_temp_deviation']].head())

   power_kw  station_power_mean  station_power_deviation  temperature_c  \
0     7.269            7.123302                 0.145698          42.23   
1     7.815            7.123302                 0.691698          45.29   
2     7.421            7.123302                 0.297698          42.39   
3     8.417            7.123302                 1.293698          43.23   
4     7.385            7.123302                 0.261698          45.87   

   station_temp_mean  station_temp_deviation  
0          42.927628                0.697628  
1          42.927628                2.362372  
2          42.927628                0.537628  
3          42.927628                0.302372  
4          42.927628                2.942372  


In [28]:
df['session_total_energy'] = df.groupby('session_id')['energy_kwh'].transform('sum')

df['session_total_duration'] = df.groupby('session_id')['duration_sec'].transform('sum')

print(df[['session_id','energy_kwh','session_total_energy',
          'duration_sec','session_total_duration']].head())

     session_id  energy_kwh  session_total_energy  duration_sec  \
0  a73a9733a702      0.0384                 1.983            19   
1  a73a9733a702      0.0825                 1.983            38   
2  a73a9733a702      0.0866                 1.983            42   
3  a73a9733a702      0.0655                 1.983            28   
4  a73a9733a702      0.0492                 1.983            24   

   session_total_duration  
0                     987  
1                     987  
2                     987  
3                     987  
4                     987  
