In [18]:
import pandas as pd
df = pd.read_csv('data/atm.csv')

In [19]:
df.head()

Unnamed: 0,ATM_ID,Date,Day_of_Week,Time_of_Day,Total_Withdrawals,Total_Deposits,Location_Type,Holiday_Flag,Special_Event_Flag,Previous_Day_Cash_Level,Weather_Condition,Nearby_Competitor_ATMs,Cash_Demand_Next_Day
0,ATM_0041,2022-04-25,Monday,Morning,57450,9308,Standalone,0,0,112953,Rainy,5,44165
1,ATM_0007,2023-11-24,Friday,Morning,72845,17896,Supermarket,0,0,115348,Clear,0,41249
2,ATM_0014,2022-08-27,Saturday,Morning,42957,12712,Supermarket,0,0,90731,Cloudy,1,29901
3,ATM_0029,2023-08-27,Sunday,Evening,43014,1375,Mall,0,0,88754,Rainy,5,44155
4,ATM_0028,2022-12-15,Thursday,Evening,36379,2938,Bank Branch,0,0,129312,Snowy,0,29784


In [20]:
unique_atm_count = df['ATM_ID'].nunique()
total_rows = len(df)

print(f"Number of unique ATM_IDs: {unique_atm_count}")
print(f"Total rows in dataset: {total_rows}")
print(f"Average records per ATM: {total_rows / unique_atm_count:.1f}")

Number of unique ATM_IDs: 50
Total rows in dataset: 5658
Average records per ATM: 113.2


In [21]:
df['Date'] = pd.to_datetime(df['Date'])

# Sort by ATM and date first
df = df.sort_values(['ATM_ID', 'Date'])

# Create lag features within each ATM
df['Cash_Demand_Lag_2'] = df.groupby('ATM_ID')['Cash_Demand_Next_Day'].shift(2)
df['Cash_Demand_Lag_7'] = df.groupby('ATM_ID')['Cash_Demand_Next_Day'].shift(7)  # Weekly pattern

# Rolling averages
df['Cash_Demand_MA_3'] = df.groupby('ATM_ID')['Cash_Demand_Next_Day'].rolling(3).mean().values
df['Cash_Demand_MA_7'] = df.groupby('ATM_ID')['Cash_Demand_Next_Day'].rolling(7).mean().values

# Withdrawal patterns
df['Withdrawals_MA_7'] = df.groupby('ATM_ID')['Total_Withdrawals'].rolling(7).mean().values

df['Net_Cash_Flow'] = df['Total_Deposits'] - df['Total_Withdrawals']
df['Withdrawal_to_Deposit_Ratio'] = df['Total_Withdrawals'] / (df['Total_Deposits'] + 1)

# Cash level changes
df['Cash_Level_Change'] = df['Previous_Day_Cash_Level'].diff()
df['Cash_Utilization_Rate'] = df['Total_Withdrawals'] / df['Previous_Day_Cash_Level']

In [24]:
pd.set_option('display.max_columns', None)
df.head()

Unnamed: 0,ATM_ID,Date,Day_of_Week,Time_of_Day,Total_Withdrawals,Total_Deposits,Location_Type,Holiday_Flag,Special_Event_Flag,Previous_Day_Cash_Level,Weather_Condition,Nearby_Competitor_ATMs,Cash_Demand_Next_Day,Cash_Demand_Lag_2,Cash_Demand_Lag_7,Cash_Demand_MA_3,Cash_Demand_MA_7,Withdrawals_MA_7,Net_Cash_Flow,Withdrawal_to_Deposit_Ratio,Cash_Level_Change,Cash_Utilization_Rate
2904,ATM_0001,2022-01-05,Wednesday,Night,37573,6224,Standalone,0,1,109776,Clear,4,31478,,,,,,-31349,6.035823,,0.34227
5023,ATM_0001,2022-01-13,Thursday,Morning,48156,10070,Gas Station,0,1,113068,Clear,5,41613,,,,,,-38086,4.78165,3292.0,0.425903
4538,ATM_0001,2022-01-20,Thursday,Evening,24671,13922,Mall,0,0,118171,Rainy,4,11316,31478.0,,28135.666667,,,-10749,1.77196,5103.0,0.208774
4986,ATM_0001,2022-01-28,Friday,Night,46383,12205,Mall,0,0,96931,Clear,4,34587,41613.0,,29172.0,,,-34178,3.800016,-21240.0,0.478516
4064,ATM_0001,2022-02-01,Tuesday,Morning,41972,18452,Bank Branch,0,0,83257,Rainy,5,23665,11316.0,,23189.333333,,,-23520,2.274535,-13674.0,0.504126


In [23]:
df.to_csv('data/atmFeatures.csv')