In [3]:
import pandas as pd
import numpy as np

# Load your dataset
df = pd.read_csv('card_data.csv')

# Convert Date Time to datetime type
df['Date Time'] = pd.to_datetime(df['Date Time'], format='%d/%m/%Y %H:%M:%S', errors='coerce')

# Extract time-based features
df['Hour'] = df['Date Time'].dt.hour
df['Day'] = df['Date Time'].dt.day
df['Month'] = df['Date Time'].dt.month
df['Year'] = df['Date Time'].dt.year
df['DayOfWeek'] = df['Date Time'].dt.dayofweek

# Fill or handle missing values (if necessary)
df.fillna(method='ffill', inplace=True)  # Forward fill as an example

# Create new features from existing ones
# For example, a feature that combines multiple features
df['Feature_Combination'] = df[['V1', 'V2', 'V3']].sum(axis=1)

# Example: Create a feature that represents whether the amount withdrawn is high or low
df['High_Amount_Withdrawn'] = df['Amount Withdrawn'].apply(lambda x: 1 if x > 100 else 0)

# Convert categorical feature to numeric (if necessary)
df['Class '] = df['Class '].map({'F': 0, 'M': 1})

# Drop the original Date Time column if not needed
df.drop(columns=['Date Time'], inplace=True)

# Example: Normalize/Standardize features
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
features = df.drop(columns=['Amount Withdrawn', 'Class '])  # Exclude target variable
scaled_features = scaler.fit_transform(features)
scaled_df = pd.DataFrame(scaled_features, columns=features.columns)

# Add the target variable back
df = pd.concat([scaled_df, df[['Amount Withdrawn', 'Class ']]], axis=1)

# Save the processed data
df.to_csv('processed_data.csv', index=False)

print(df.head())


  df.fillna(method='ffill', inplace=True)  # Forward fill as an example


         V1        V2        V3        V4        V5        V6        V7  \
0 -0.887390 -0.213894  1.609623  0.806995 -0.307760  0.168282  0.157887   
1  0.981398  0.061419 -0.670185  0.059676  0.028180 -0.251818 -0.214395   
2  0.981398 -1.243382  0.875486  0.004732 -0.446810  1.200207  0.803138   
3 -0.599172 -0.305232  0.894518 -0.994159 -0.031130  0.773516  0.155561   
4 -0.739761  0.558207  0.659526  0.023419 -0.365844 -0.114330  0.571023   

         V8        V9       V10  ...       V28      Hour       Day     Month  \
0  0.175123  0.438257  0.030433  ... -0.010147  0.352258 -0.778555 -0.733324   
1  0.158525 -0.281396 -0.240129  ...  0.111940  0.352258 -0.778555 -0.733324   
2  0.356993 -1.744881 -0.240129  ... -0.142203  0.352258 -0.778555 -0.733324   
3  0.515401 -1.596548 -0.122547  ...  0.271413  0.352258 -0.778555 -0.733324   
4 -0.275626  0.965844  0.725582  ...  0.795885  0.352258 -0.778555 -0.733324   

   Year  DayOfWeek  Feature_Combination  High_Amount_Withdrawn  \
0 