In [25]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import KFold
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.utils.class_weight import compute_class_weight
from imblearn.over_sampling import SMOTE
import joblib
import random

In [26]:
# Simulate loading data
data = pd.DataFrame({
    'amount': np.random.uniform(10, 1000, 1000),
    'oldbalanceOrg': np.random.uniform(0, 1000, 1000),
    'newbalanceOrig': np.random.uniform(0, 1000, 1000),
    'oldbalanceDest': np.random.uniform(0, 1000, 1000),
    'newbalanceDest': np.random.uniform(0, 1000, 1000),
    'transaction_frequency': np.random.randint(1, 100, 1000),
    'transaction_recency': np.random.randint(1, 30, 1000),
    'device_location': np.random.choice([0, 1], 1000),
    'distance': np.random.uniform(0, 1000, 1000),
    'credit_score': np.random.randint(300, 850, 1000),
    'unusual_activity_flag': np.random.choice([0, 1], 1000),
    'isFraud': np.random.choice([0, 1], 1000, p=[0.95, 0.05])
})


In [27]:
# Encoding categorical variables (example for 'device_location')
label_encoder = LabelEncoder()
data['device_location'] = label_encoder.fit_transform(data['device_location'])

essential_features = [
    'amount',
    'oldbalanceOrg',
    'newbalanceOrig',
    'oldbalanceDest',
    'newbalanceDest',
    'transaction_frequency',
    'transaction_recency',
    'device_location',
    'distance',
    'credit_score',
    'unusual_activity_flag'
]


In [28]:
columns_to_keep = essential_features + ['isFraud']

# Ensure all specified columns are in the dataset
for column in columns_to_keep:
    if column not in data.columns:
        raise ValueError(f"Column '{column}' is missing from the dataset.")

# Drop all columns except the essential features
data_filtered = data[columns_to_keep]

# Define feature columns and target column
feature_columns = essential_features
target_column = 'isFraud'

# Split the data into features (X) and target (y)
X = data_filtered[feature_columns]
y = data_filtered[target_column]


In [29]:
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Display the resampled data
print(pd.DataFrame(X_resampled, columns=feature_columns).head())
print(pd.DataFrame(y_resampled, columns=[target_column]).head())

# Display the resampled class distribution
print(pd.Series(y_resampled).value_counts())



       amount  oldbalanceOrg  newbalanceOrig  oldbalanceDest  newbalanceDest  \
0  659.759388     839.075206      939.000706      211.039441      438.218417   
1  117.777082     271.390475      362.751425      250.265118      500.753752   
2  758.213975     875.702742      588.868842      501.493482      340.742724   
3  120.437640     690.163866       89.979182      505.309068      682.940119   
4  431.644272     403.180343      636.012201      592.057067      390.553180   

   transaction_frequency  transaction_recency  device_location    distance  \
0                     38                   25                1  484.788222   
1                     48                    2                1  591.172569   
2                     43                    2                1  727.662731   
3                     98                    7                0   85.538972   
4                      7                   15                0  594.858128   

   credit_score  unusual_activity_flag  
0        

In [30]:
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=3)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [31]:
# print the values of the target column which have one of the values of the target column

In [32]:


def get_user_input():
    return {
        'amount': float(input("Enter transaction amount: ")),
        'balance': float(input("Enter current balance: ")),
        'credit_score': float(input("Enter credit score (300-850): "))
    }

def enrich_transaction_data(user_input):
    # Simulate additional data that would be collected automatically
    transaction = user_input.copy()
    transaction['oldbalanceOrg'] = transaction['balance']
    transaction['newbalanceOrig'] = transaction['balance'] - transaction['amount']
    transaction['oldbalanceDest'] = random.uniform(0, 10000)  # Simulated receiver's old balance
    transaction['newbalanceDest'] = transaction['oldbalanceDest'] + transaction['amount']
    transaction['transaction_frequency'] = random.randint(1, 100)  # Simulated frequency
    transaction['transaction_recency'] = random.randint(1, 30)  # Days since last transaction
    transaction['device_location'] = random.choice([0, 1])  # 0: usual location, 1: unusual
    transaction['distance'] = random.uniform(0, 1000)  # Simulated distance
    transaction['unusual_activity_flag'] = 1 if random.random() < 0.1 else 0  # 10% chance of unusual activity

    return transaction

def preprocess_transaction(transaction, scaler):
    features = ['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest',
                'transaction_frequency', 'transaction_recency', 'device_location', 'distance',
                'credit_score', 'unusual_activity_flag']

    # Ensure transaction dictionary has all required features
    assert all(feature in transaction for feature in features), "Missing features in the transaction data"

    transaction_values = np.array([transaction[feature] for feature in features]).reshape(1, -1)
    transaction_scaled = scaler.transform(transaction_values)
    return transaction_scaled




In [33]:
print(data['isFraud'].value_counts())

isFraud
0    943
1     57
Name: count, dtype: int64


In [34]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Input


user_input = get_user_input()
enriched_transaction = enrich_transaction_data(user_input)
preprocessed_transaction = preprocess_transaction(enriched_transaction, scaler)

joblib.dump(scaler, 'scaler.pkl')

def create_model(input_shape):
    model = Sequential([
        Dense(64, activation='relu', input_shape=(input_shape,)),
        BatchNormalization(),
        Dropout(0.3),
        Dense(32, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(16, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    return model





In [35]:
# Save the scaler



In [36]:
# Calculate class weights
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weights = dict(enumerate(class_weights))

model = create_model(X_train_scaled.shape[1])
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [37]:
def train_model(X, y, model, n_splits=5, epochs=50, batch_size=32, class_weight=None):
    skf = StratifiedKFold(n_splits=n_splits)
    
    X = np.array(X)
    y = np.array(y)
    
    histories = []
    
    for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
        print(f'Training on fold {fold + 1}/{n_splits}')
        
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]
        
        callbacks = [
            EarlyStopping(patience=10, restore_best_weights=True),
            ReduceLROnPlateau(factor=0.5, patience=5, min_lr=0.00001)
        ]
        
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=epochs,
            batch_size=batch_size,
            class_weight=class_weight,
            callbacks=callbacks,
            verbose=1
        )
        
        histories.append(history)
    
    return histories

In [38]:
histories = train_model(X_train_scaled, y_train, model, class_weight=class_weights)
model.save('fraud_detection_model.keras')



Training on fold 1/5
Epoch 1/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5124 - loss: 0.8817 - val_accuracy: 0.5960 - val_loss: 0.6691 - learning_rate: 0.0010
Epoch 2/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5728 - loss: 0.7548 - val_accuracy: 0.6887 - val_loss: 0.6425 - learning_rate: 0.0010
Epoch 3/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 978us/step - accuracy: 0.6122 - loss: 0.7001 - val_accuracy: 0.7252 - val_loss: 0.6179 - learning_rate: 0.0010
Epoch 4/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6363 - loss: 0.6652 - val_accuracy: 0.7318 - val_loss: 0.5942 - learning_rate: 0.0010
Epoch 5/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6259 - loss: 0.6623 - val_accuracy: 0.7351 - val_loss: 0.5740 - learning_rate: 0.0010
Epoch 6/50
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

In [39]:
# Make prediction
prediction = model.predict(preprocessed_transaction)
fraud_score = prediction[0][0]

print("Fraud Score:", fraud_score)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Fraud Score: 0.0


In [40]:
# Load the model
model = load_model('fraud_detection_model.keras')
scaler = joblib.load('scaler.pkl')


In [41]:
# Make prediction
prediction = model.predict(preprocessed_transaction)
fraud_score = prediction[0][0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step


In [42]:
# Make prediction
def generate_transaction_score(transaction_scaled):
    transaction_score = model.predict(transaction_scaled)[0][0]
    return transaction_score

def detect_fraud(transaction_score, threshold=0.5):
    return transaction_score >= threshold



In [43]:
def main():
    user_input = get_user_input()
    enriched_transaction = enrich_transaction_data(user_input)
    preprocessed_transaction = preprocess_transaction(enriched_transaction, scaler)
    transaction_score = generate_transaction_score(preprocessed_transaction)
    is_fraud = detect_fraud(transaction_score)

    print("Transaction Score:", transaction_score)
    print("Fraud Detected:", is_fraud)
    
if __name__ == "__main__":
    main()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
Transaction Score: 1.3458298e-18
Fraud Detected: False


