In [None]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
from collections import Counter

# Load the dataset
df = pd.read_csv('creditcard.csv')

# Feature Scaling
df['scaled_amount'] = StandardScaler().fit_transform(df['Amount'].values.reshape(-1, 1))
df['scaled_time'] = StandardScaler().fit_transform(df['Time'].values.reshape(-1, 1))
df.drop(['Amount', 'Time'], axis=1, inplace=True)

# Reorder columns
scaled_cols = ['scaled_time', 'scaled_amount'] + [col for col in df.columns if col not in ['scaled_time', 'scaled_amount']]
df = df[scaled_cols]

# Split into X and y
X = df.drop('Class', axis=1)
y = df['Class']

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# Apply SMOTE to balance the training data
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)
print("Class distribution after SMOTE:", Counter(y_train_smote))

#  Train the RandomForest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_smote, y_train_smote)

#  Predict on test set
y_pred_rf = rf_model.predict(X_test)

#  Evaluate
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_rf))
print("\nClassification Report:\n", classification_report(y_test, y_pred_rf))


Class distribution after SMOTE: Counter({0: 227451, 1: 227451})


In [None]:
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

# Predict probabilities
y_prob = rf_model.predict_proba(X_test)[:, 1]

# Compute ROC curve and AUC
fpr, tpr, thresholds = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)

# Plot ROC Curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'AUC = {roc_auc:.4f}')
plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - Credit Card Fraud Detection')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import joblib

# Save model
joblib.dump(rf_model, 'fraud_rf_model.pkl')

# To load it later:
# loaded_model = joblib.load('fraud_rf_model.pkl')


In [None]:
# Install streamlit first
# pip install streamlit

# Save this in app.py
import streamlit as st
import joblib
import pandas as pd

model = joblib.load('fraud_rf_model.pkl')

st.title("Credit Card Fraud Detection")

# Input features
scaled_time = st.number_input("scaled_time")
scaled_amount = st.number_input("scaled_amount")
input_values = [scaled_time, scaled_amount] + [0.0]*28  # dummy V1–V28

if st.button("Predict"):
    pred = model.predict([input_values])[0]
    if pred == 1:
        st.error("⚠️ Fraud Detected!")
    else:
        st.success("✅ Transaction is Safe.")


In [None]:
streamlit run app.py
