<a href="https://colab.research.google.com/github/muqadas007-jerry/Fraud-Detection-System/blob/main/Task_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pandas numpy scikit-learn imbalanced-learn

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from imblearn.over_sampling import SMOTE




In [None]:
# Load dataset
df = pd.read_csv("creditcard.csv")

# Display first few rows
print(df.head())


   Time        V1        V2        V3        V4        V5        V6        V7  \
0     0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1     0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2     1 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3     1 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4     2 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267 -0.206010   

        V26       V27       V28 

# New Section

Step 3: Data Preprocessing
Check for missing values
Normalize numerical features
Handle class imbalance using SMOTE

In [None]:
# Check for missing values
print(df.isnull().sum())



Time      0
V1        1
V2        1
V3        1
V4        1
V5        1
V6        1
V7        1
V8        1
V9        1
V10       1
V11       1
V12       1
V13       1
V14       1
V15       1
V16       1
V17       1
V18       1
V19       1
V20       1
V21       1
V22       1
V23       1
V24       1
V25       1
V26       1
V27       1
V28       1
Amount    1
Class     1
dtype: int64


In [None]:
# Checking class distribution
print(df["Class"].value_counts())


Class
0.0    27725
1.0       93
Name: count, dtype: int64


In [None]:
# Separate features and target variable
X = df.drop(columns=["Class"])  # Features
y = df["Class"]                 # Target variable

In [None]:
# Normalize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Handle class imbalance using SMOTE
# ... (previous code) ...

# Drop rows with NaN values in the target variable
df = df.dropna(subset=['Class'])

# Separate features and target variable after dropping NaNs
X = df.drop(columns=["Class"])  # Features
y = df["Class"]                 # Target variable

# Impute or remove NaN values in features before scaling
# Choose one of the following methods:

# 1. Impute NaNs with the mean of each column
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy='mean')  # You can also use 'median' or other strategies
X = imputer.fit_transform(X)

# 2. Remove rows with any NaN values
# X = X.dropna()  # This might remove a lot of data, use with caution

# Normalize numerical features after handling NaNs
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

smote = SMOTE(sampling_strategy=0.5, random_state=42, k_neighbors=2)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

# ... (rest of the code) ...

In [None]:
# Display new class distribution
print(pd.Series(y_resampled).value_counts())

Class
0.0    27725
1.0    13862
Name: count, dtype: int64


Step 4: Train the Model
Split dataset into training (80%) and testing (20%)
Train Random Forest Classifier

In [None]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Train Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


Step 5: Evaluate the Model
Use Accuracy, Precision, Recall, and F1-score for evaluation.

In [None]:
# Predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

# Display full classification report
print(classification_report(y_test, y_pred))


Accuracy: 0.9998797787929791
Precision: 0.9996283909327388
Recall: 1.0
F1 Score: 0.9998141609366289
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      5628
         1.0       1.00      1.00      1.00      2690

    accuracy                           1.00      8318
   macro avg       1.00      1.00      1.00      8318
weighted avg       1.00      1.00      1.00      8318



Step 6: Create a Simple CLI-based Testing Interface
We will take user input for a new transaction and predict whether it is fraudulent or not

In [None]:
# Function to predict fraud from user input
def predict_fraud():
    print("\nEnter transaction details:")

    # Ask user to input transaction details
    user_data = []
    for i in range(X.shape[1]):
        value = float(input(f"Enter value for feature {i+1}: "))
        user_data.append(value)

    # Convert input to a NumPy array
    user_data = np.array(user_data).reshape(1, -1)

    # Normalize input data
    user_data_scaled = scaler.transform(user_data)

    # Predict fraud
    prediction = model.predict(user_data_scaled)

    # Output result
    if prediction[0] == 1:
        print("🚨 Fraudulent Transaction Detected!")
    else:
        print("✅ Legitimate Transaction")

# Run testing interface
predict_fraud()



Enter transaction details:
Enter value for feature 1: 2
Enter value for feature 2: 4
Enter value for feature 3: 6
Enter value for feature 4: 8
Enter value for feature 5: 9
Enter value for feature 6: 6
Enter value for feature 7: 7
Enter value for feature 8: 7
Enter value for feature 9: 4
Enter value for feature 10: 3
