# credit card fraud detection

# AUTHOR   : Richa Mishra

# DOMAIN       : Data Science 

# AIM       : to build a machine learning model to identify fraudelent credit card transactions...

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import pipeline

In [2]:
# Load the dataset
# Assume the dataset is in CSV format and has a 'Class' column where 1 indicates fraud and 0 indicates genuine
data = pd.read_csv('C:\\Users\\richa\\Downloads\\creditcard.csv.zip')


In [3]:
# Check for missing values
print(data.isnull().sum())

Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64


In [4]:
# Separate features and target variable
X = data.drop('Class', axis=1)
y = data['Class']

In [5]:
# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [6]:
# Handle class imbalance using SMOTE for oversampling
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

In [7]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.3, random_state=42, stratify=y_resampled)


In [8]:
# Train a Random Forest classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [9]:
# Predict on the test set
y_pred = model.predict(X_test)

In [10]:
# Evaluate the model
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)


In [11]:
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
print('Confusion Matrix:')
print(conf_matrix)

Precision: 0.9997772906825455
Recall: 1.0
F1 Score: 0.9998886329400318
Confusion Matrix:
[[85276    19]
 [    0 85294]]
