In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import pickle

import tensorflow
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from sklearn.utils.class_weight import compute_class_weight
from imblearn.over_sampling import SMOTE

In [None]:

df = pd.read_csv('CreditCardData.csv')

df.drop(["Transaction ID"], axis=1, inplace=True)
df.dropna(axis=0, inplace=True)
df["Amount"] = df["Amount"].str.replace("£", "").astype(float)
df["Date"] = pd.to_datetime(df["Date"])
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day
df.drop(["Date"], axis=1, inplace=True)
df.drop(["Month", "Year"], axis=1, inplace=True)


X = df.drop(columns=['Fraud'])
y = df['Fraud']


categorical_features = X.select_dtypes(include=['object']).columns.tolist()
numerical_features = X.select_dtypes(exclude=['object']).columns.tolist()


numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])


preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)




X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)


X_train = X_train.toarray()
X_test = X_test.toarray()



X_train_reshaped = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test_reshaped = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = {i: weight for i, weight in enumerate(class_weights)}


model = Sequential()
model.add(LSTM(128, input_shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2]), return_sequences=True))
model.add(BatchNormalization())
model.add(Dropout(0.3))

model.add(LSTM(64, return_sequences=True))
model.add(BatchNormalization())
model.add(Dropout(0.3))

model.add(LSTM(32))
model.add(BatchNormalization())
model.add(Dropout(0.3))

model.add(Dense(1, activation='sigmoid'))


model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


model.fit(X_train_reshaped, y_train, epochs=10, batch_size=32, validation_split=0.2)


  df["Date"] = pd.to_datetime(df["Date"])
  super().__init__(**kwargs)


In [None]:

unique_classes = np.unique(y_train)
print("Unique classes in y_train:", unique_classes)


if len(unique_classes) == 2:
    class_mapping = {unique_classes[0]: 0, unique_classes[1]: 1}
    y_train_mapped = np.vectorize(class_mapping.get)(y_train)
    y_test_mapped = np.vectorize(class_mapping.get)(y_test)

    # Computeed class weights using mapped classes
    class_weights = compute_class_weight('balanced', classes=np.unique(y_train_mapped), y=y_train_mapped)
    class_weight_dict = {i: weight for i, weight in enumerate(class_weights)}

    history = model.fit(X_train_reshaped, y_train_mapped, epochs=20, batch_size=64,
                        validation_split=0.2, verbose=2, class_weight=class_weight_dict)
else:
    print("Unexpected number of classes. Please check the labels.")


Unique classes in y_train: [0 1]
Epoch 1/20
1000/1000 - 7s - 7ms/step - accuracy: 0.9334 - loss: 0.1271 - val_accuracy: 0.9309 - val_loss: 0.1335
Epoch 2/20
1000/1000 - 10s - 10ms/step - accuracy: 0.9333 - loss: 0.1218 - val_accuracy: 0.9421 - val_loss: 0.1117
Epoch 3/20
1000/1000 - 12s - 12ms/step - accuracy: 0.9359 - loss: 0.1153 - val_accuracy: 0.9414 - val_loss: 0.1032
Epoch 4/20
1000/1000 - 10s - 10ms/step - accuracy: 0.9362 - loss: 0.1126 - val_accuracy: 0.9398 - val_loss: 0.1057
Epoch 5/20
1000/1000 - 9s - 9ms/step - accuracy: 0.9380 - loss: 0.1110 - val_accuracy: 0.9446 - val_loss: 0.1092
Epoch 6/20
1000/1000 - 11s - 11ms/step - accuracy: 0.9359 - loss: 0.1086 - val_accuracy: 0.9464 - val_loss: 0.1113
Epoch 7/20
1000/1000 - 19s - 19ms/step - accuracy: 0.9391 - loss: 0.1047 - val_accuracy: 0.9462 - val_loss: 0.0947
Epoch 8/20
1000/1000 - 7s - 7ms/step - accuracy: 0.9380 - loss: 0.1032 - val_accuracy: 0.9376 - val_loss: 0.0895
Epoch 9/20
1000/1000 - 9s - 9ms/step - accuracy: 0.93

In [None]:

with open('credit_card_fraud_preprocessor.pkl', 'wb') as f:
    pickle.dump(preprocessor, f)

model.save('credit_card_fraud_model_lstm.h5')

