In [2]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import get_custom_objects  # If you need custom objects

# Load the transaction dataset from JSON
data = pd.read_json('processed_data.json', orient='records')  # Adjust filename as needed

# Ensure the JSON has the same columns as the CSV did:
# For example, 'Amount', 'Latitude', 'Longitude', 'Time', 'Is Fraud?' etc.
# If 'Time' is still a string that can be parsed as a datetime, we can convert it:
data['Time'] = pd.to_datetime(data['Time'])

# Preprocessing the dataset
# Selecting relevant columns for RNN training
features = ['Amount', 'Latitude', 'Longitude', 'Time']
target = 'Is Fraud?'

# Extract time-based features (hour, day of week, etc.)
data['Hour'] = data['Time'].dt.hour
data['DayOfWeek'] = data['Time'].dt.dayofweek
data['Day'] = data['Time'].dt.day

# Update features list to include new time-based features
features = ['Amount', 'Latitude', 'Longitude', 'Hour', 'DayOfWeek', 'Day']

# Scaling numerical features
scaler = MinMaxScaler()
data[['Amount', 'Latitude', 'Longitude', 'Hour', 'DayOfWeek', 'Day']] = scaler.fit_transform(
    data[['Amount', 'Latitude', 'Longitude', 'Hour', 'DayOfWeek', 'Day']]
)

# Create feature and target arrays
X = data[features].values
y = data[target].values

# Reshape target for compatibility
y = np.reshape(y, (-1, 1))

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=78)

# Reshape the input data for LSTM (samples, timesteps, features)
# Assuming each transaction is independent (timesteps=1)
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

# Building the RNN model
model = Sequential()
model.add(LSTM(units=128, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.9))
model.add(LSTM(units=128, return_sequences=False))
model.add(Dropout(0.9))
model.add(Dense(units=256, activation='relu'))
model.add(Dropout(0.9))
model.add(Dense(units=1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Add EarlyStopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}, Test Accuracy: {accuracy}')

# Make predictions on the test set
y_pred = (model.predict(X_test) > 0.5)

# Print classification report and AUC
print(classification_report(y_test, y_pred))
print(f"AUC: {roc_auc_score(y_test, y_pred)}")

# Save the model as a .keras file
model.save('fraud_detection_model.keras', save_format='keras')


  data['Time'] = pd.to_datetime(data['Time'])




Epoch 1/50


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Loss: 0.45411375164985657, Test Accuracy: 0.8291100263595581
              precision    recall  f1-score   support

           0       0.83      0.97      0.90     23849
           1       0.77      0.33      0.46      6814

    accuracy                           0.83     30663
   macro avg       0.80      0.65      0.68     30663
weighted avg       0.82      0.83      0.80     30663

AUC: 0.64930

In [3]:
model.save('fraud_detection_model.h5')


  saving_api.save_model(


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score

# Load the dataset
data = pd.read_csv('mock_transactions.csv')

# Drop sensitive features
data = data.drop(['Card Number', 'CVV', 'Expires'], axis=1)

# Encode categorical features
label_encoders = {}
for col in ['Errors?', 'Has Chip', 'City', 'State']:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Extract time-based features from 'Time'
data['Time'] = pd.to_datetime(data['Time'], format='%H:%M:%S', errors='coerce')
data['Hour'] = data['Time'].dt.hour

# Handle mixed date formats for 'Date'
data['Date'] = pd.to_datetime(data['Date'], format='mixed', errors='coerce')
data['DayOfWeek'] = data['Date'].dt.dayofweek
data['Day'] = data['Date'].dt.day

# Update feature list
# Update feature list to exclude certain columns
features = ['Amount', 'Latitude', 'Longitude', 'Hour', 'DayOfWeek', 'Day']  # Removed specified columns

# Separate features and target
X = data[features]
y = data[target]

# Normalize numerical features
scaler = StandardScaler()
X.loc[:, ['Amount', 'Latitude', 'Longitude']] = scaler.fit_transform(X[['Amount', 'Latitude', 'Longitude']])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=17)

# Define hyperparameters grid
param_grid = {
    'n_estimators': [500, 100],  # Fewer options
    'max_depth': [50, 60],       # Limit depth options
    'min_samples_split': [5],    # Use a single value
    'min_samples_leaf': [2],     # Use a single value
    'max_features': ['sqrt']     # Use a single option
}

# Initialize Random Forest model
rf_model = RandomForestClassifier(random_state=17)

# Apply GridSearchCV
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=3, n_jobs=-8, verbose=2, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Best parameters
print(f"Best Hyperparameters: {grid_search.best_params_}")

# Use best model
best_rf_model = grid_search.best_estimator_

# Train the best model
best_rf_model.fit(X_train, y_train)

# Make predictions
y_pred = best_rf_model.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred))
print(f"AUC: {roc_auc_score(y_test, y_pred)}")


model.save('fraud_detection_RF.h5')