In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from imblearn.over_sampling import SMOTE


In [None]:
# Load dataset
url = "https://raw.githubusercontent.com/plotly/datasets/master/credit-card-fraud.csv"
df = pd.read_csv(url)

# Display dataset sample
print("Dataset Sample:")
print(df.head())

# Dataset info
print("\nDataset Info:")
print(df.info())

# Check class distribution
print("\nClass Distribution:")
print(df['Class'].value_counts())


In [None]:
from sklearn.preprocessing import StandardScaler

# Separate features and target variable
X = df.drop('Class', axis=1)
y = df['Class']

# Normalize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Address class imbalance using SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

# Display new class distribution
print("\nResampled Class Distribution:")
print(pd.Series(y_resampled).value_counts())


In [None]:
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

print(f"\nTraining Samples: {X_train.shape[0]}")
print(f"Test Samples: {X_test.shape[0]}")


In [None]:
# Initialize the model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

print("\nRandom Forest Model Trained Successfully!")


In [None]:
# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"\nAccuracy: {accuracy:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)


In [None]:
# Create a sample transaction
sample_transaction = X_test[0].reshape(1, -1)

# Predict fraud status
fraud_prediction = model.predict(sample_transaction)
fraud_probability = model.predict_proba(sample_transaction)

print(f"\nFraud Prediction: {'Fraud' if fraud_prediction[0] == 1 else 'Not Fraud'}")
print(f"Fraud Probability: {fraud_probability[0][1]:.4f}")


In [None]:
import matplotlib.pyplot as plt

# Get feature importances
feature_importances = model.feature_importances_
features = X.columns

# Plot feature importances
plt.figure(figsize=(10, 6))
plt.barh(features, feature_importances, color="skyblue")
plt.xlabel("Importance")
plt.ylabel("Features")
plt.title("Feature Importance in Fraud Detection")
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Get feature importances
feature_importances = model.feature_importances_
features = X.columns

# Plot feature importances
plt.figure(figsize=(10, 6))
plt.barh(features, feature_importances, color="skyblue")
plt.xlabel("Importance")
plt.ylabel("Features")
plt.title("Feature Importance")
plt.show()
