In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [None]:
data=pd.read_csv('creditcard.csv')

In [None]:
data.info()

In [None]:
data.head()

In [None]:
data.describe()

In [None]:
data.isnull().sum()

In [None]:
plt.figure(figsize=(6, 4))
sns.countplot(data=data, x='Class', palette='Set2')
plt.title('Fraud vs Non-Fraud Transactions')
plt.xlabel('Class (0: Non-Fraud, 1: Fraud)')
plt.ylabel('Count')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(data=data, x='Time', y='Amount', hue='Class', palette='Set1')
plt.title('Time vs Transaction Amount')
plt.xlabel('Time (seconds)')
plt.ylabel('Transaction Amount')
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
sns.boxplot(data=data, x='Class', y='Amount', palette='Set3')
plt.title('Transaction Amount by Class (Fraud vs Non-Fraud)')
plt.xlabel('Class')
plt.ylabel('Transaction Amount')
plt.show()

In [None]:
data['Class'].value_counts()

In [None]:
legit = data[data.Class == 0]
fraud = data[data.Class == 1]


In [None]:
print(legit.shape)
print(fraud.shape)

In [None]:
legit.Amount.describe()

In [None]:
fraud.Amount.describe()

In [None]:
data.groupby('Class').mean()

In [None]:
legit_sample = legit.sample(n=492)

In [None]:
new_dataset = pd.concat([legit_sample, fraud], axis=0)

In [None]:
new_dataset.head()

In [None]:
new_dataset.describe()

In [None]:
new_dataset['Class'].value_counts()

In [None]:
new_dataset.groupby('Class').mean()

In [None]:
X = new_dataset.drop(columns='Class', axis=1)
Y = new_dataset['Class']
X

In [None]:
Y

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

In [None]:
print(X.shape, X_train.shape, X_test.shape)

In [None]:
model = LogisticRegression()

In [None]:
model.fit(X_train, Y_train)

In [None]:
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
print('Accuracy on Training data : ', training_data_accuracy)

In [None]:
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print('Accuracy score on Test Data : ', test_data_accuracy)

In [None]:
def evaluate_model(model, name):
    model.fit(X_train, Y_train)
    Y_pred = model.predict(X_test)
    acc = accuracy_score(Y_test, Y_pred)
    roc_auc = roc_auc_score(Y_test, model.predict_proba(X_test)[:, 1])
    print(f"Model: {name}")
    print(f"Accuracy: {acc:.4f}")
    print("Confusion Matrix:")
    print(confusion_matrix(Y_test, Y_pred))
    print("Classification Report:")
    print(classification_report(Y_test, Y_pred))
    print(f"ROC AUC Score: {roc_auc:.4f}")
    print("-" * 60)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, roc_auc_score

In [None]:
models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42)
}

for name, model in models.items():
    evaluate_model(model, name)

try:
    from xgboost import XGBClassifier
    xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
    evaluate_model(xgb_model, "XGBoost")
except ImportError:
    print("XGBoost not installed. Skipping XGBoost model.")

In [None]:
def plot_conf_matrix(y_true, y_pred, title):
    cm = confusion_matrix(y_true, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'Confusion Matrix: {title}')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()

def plot_roc(model, X_test, Y_test, name):
    fpr, tpr, _ = roc_curve(Y_test, model.predict_proba(X_test)[:, 1])
    plt.plot(fpr, tpr, label=f'{name} (AUC = {roc_auc_score(Y_test, model.predict_proba(X_test)[:, 1]):.2f})')

In [None]:
plt.figure(figsize=(10, 6))
for name, model in models.items():
    model.fit(X_train, Y_train)
    Y_pred = model.predict(X_test)
    print(f"\n{name}")
    print(classification_report(Y_test, Y_pred))
    plot_conf_matrix(Y_test, Y_pred, name)
    plot_roc(model, X_test, Y_test, name)

plt.plot([0, 1], [0, 1], 'k--')
plt.title("ROC Curves")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend()
plt.show()

In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV

In [None]:
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [5, 10],
    'min_samples_split': [2, 5]
}
grid = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, scoring='f1', cv=3, verbose=2, n_jobs=-1)
grid.fit(X_train, Y_train)

print("Best Parameters:", grid.best_params_)
best_model = grid.best_estimator_
Y_pred = best_model.predict(X_test)
print("\nTuned Random Forest")
print(classification_report(Y_test, Y_pred))
plot_conf_matrix(Y_test, Y_pred, "Tuned Random Forest")

