In [1]:
import pandas as pd

# Replace 'path_to_your_csv.csv' with the actual path to your CSV file
df = pd.read_csv('data.csv')

In [2]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df['FLAG'] = le.fit_transform(df['FLAG'])  # Converts 'Fraud' to 1 and 'Non-Fraud' to 0

In [3]:
from sklearn.preprocessing import StandardScaler

features = df.drop('FLAG', axis=1)  # Exclude the target variable
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

In [4]:
X = features_scaled
y = df['FLAG'].values

In [5]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(sampling_strategy='minority', k_neighbors=5)
X_res, y_res = smote.fit_resample(X, y)

In [6]:
from sklearn.feature_selection import mutual_info_classif

info_gain = mutual_info_classif(X_res, y_res)
top_features_indices = info_gain.argsort()[-10:][::-1]  # Get indices of top 10 features
X_top_features = X_res[:, top_features_indices]

In [26]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_top_features, y_res, test_size=0.2, random_state=42)

In [33]:
new_feature_dim = X_train.shape[1] // time_steps
X_train = X_train.reshape(-1, time_steps, new_feature_dim)

array([[[-0.66719608],
        [-0.7602708 ],
        [-0.23454286],
        ...,
        [-0.35528359],
        [-0.00459432],
        [-0.03885983]],

       [[-0.65182151],
        [ 0.9811532 ],
        [-0.18554121],
        ...,
        [-0.0738586 ],
        [-0.00408136],
        [-0.03627359]],

       [[ 1.47455843],
        [ 1.37617855],
        [-0.08835651],
        ...,
        [-0.31146951],
        [-0.00434287],
        [-0.03875137]],

       ...,

       [[ 1.47455843],
        [-0.20203726],
        [-0.23454286],
        ...,
        [ 0.42213238],
        [-0.00459411],
        [-0.03804355]],

       [[-0.65568127],
        [-0.7602708 ],
        [-0.16144968],
        ...,
        [-0.34748875],
        [-0.00750727],
        [-0.02788582]],

       [[-0.66719608],
        [-0.7602708 ],
        [-0.23454286],
        ...,
        [-0.31039537],
        [-0.00458165],
        [-0.03872809]]])

In [34]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.utils import to_categorical

# Assuming X_train and y_train are already preprocessed and suitable for LSTM input
# For binary classification, y should be categorical (use to_categorical if needed)

# Define the LSTM model
model = Sequential()
model.add(LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(1, activation='sigmoid'))  # Using sigmoid activation for binary classification

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Fit the model to the training data
model.fit(X_train, y_train, epochs=10, batch_size=64, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x21bae0fb8d0>

In [36]:
# Reshape X_test to 3D
# The number of time steps and features per step should match those used for X_train
time_steps = 10  # This should be the same value you used for reshaping X_train
features_per_step = X_test.shape[1] // time_steps

if X_test.shape[1] % time_steps != 0:
    raise ValueError("Number of features in X_test is not divisible by time_steps")

X_test_3D = X_test.reshape(-1, time_steps, features_per_step)

# Now you can evaluate the model with the reshaped test data
loss, accuracy = model.evaluate(X_test_3D, y_test)
print('Test accuracy:', accuracy)

Test accuracy: 0.9557189345359802


In [None]:
# SHAP - Deep Explainer for LSTM models
shap_explainer = shap.DeepExplainer(best_model.model, X_train_reshaped)
shap_values = shap_explainer(X_test_reshaped)

# Generate SHAP plots
shap.summary_plot(shap_values, X_test_reshaped)
shap.plots.force(shap_values[0])
shap.plots.scatter(shap_values[:, 0])
shap.plots.waterfall(shap_values[0])