In [None]:
pip install pandas numpy scikit-learn imbalanced-learn matplotlib seaborn


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from imblearn.over_sampling import SMOTE


In [None]:
from sklearn.model_selection import GridSearchCV


In [None]:
df = pd.read_csv('/content/creditcard.csv')


In [None]:
print(df.info())
print(df.describe())

In [None]:
print(df.isnull().sum())


In [None]:
scaler = StandardScaler()
df['Amount'] = scaler.fit_transform(df['Amount'].values.reshape(-1, 1))

In [None]:
df = df.dropna(subset=['Class'])



In [None]:
X = df.drop(columns=['Class'])
y = df['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [None]:
model = RandomForestClassifier(random_state=42, n_jobs=-1)


In [None]:
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]

In [None]:
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))



In [None]:
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

In [None]:
roc_auc = roc_auc_score(y_test, y_prob)
print(f"ROC-AUC Score: {roc_auc}")

In [None]:
importances = model.feature_importances_
indices = np.argsort(importances)[::-1]

plt.figure(figsize=(10, 6))
plt.title("Feature Importance")
plt.bar(range(X.shape[1]), importances[indices], align="center")
plt.xticks(range(X.shape[1]), X.columns[indices], rotation=90)
plt.xlim([-1, X.shape[1]])
plt.show()

In [None]:
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}



In [None]:
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

Fitting 3 folds for each of 81 candidates, totalling 243 fits


In [None]:
print("Best Hyperparameters:", grid_search.best_params_)


In [None]:
best_model = grid_search.best_estimator_

In [None]:
y_pred_best = best_model.predict(X_test)
y_prob_best = best_model.predict_proba(X_test)[:, 1]

In [None]:
print("\nConfusion Matrix (Tuned Model):")
print(confusion_matrix(y_test, y_pred_best))



In [None]:
print("\nClassification Report (Tuned Model):")
print(classification_report(y_test, y_pred_best))


In [None]:
roc_auc_best = roc_auc_score(y_test, y_prob_best)
print(f"ROC-AUC Score (Tuned Model): {roc_auc_best}")