# Credit Card Fraud Detection - Simplified Version

This notebook performs basic data analysis and classification using Logistic Regression and Random Forest.

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

## Load and Inspect Dataset

In [None]:
df = pd.read_csv('/content/drive/MyDrive/capstone project/creditcard.csv')
df = df.drop_duplicates()
df.head()

## Check Class Distribution

In [None]:
print(df['Class'].value_counts())
legit = df[df['Class'] == 0]
fraud = df[df['Class'] == 1]

## Sample Legit Transactions to Balance the Dataset

In [None]:
legit_sample = legit.sample(n=len(fraud), random_state=42)
balanced_df = pd.concat([legit_sample, fraud])
print(balanced_df['Class'].value_counts())

## Prepare Features and Target

In [None]:
X = balanced_df.drop(columns='Class')
y = balanced_df['Class']

## Train-Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

## Scale the Features

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Train Logistic Regression

In [None]:
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
print('Logistic Regression Results:')
print(classification_report(y_test, y_pred_lr))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred_lr))

## Train Random Forest Classifier

In [None]:
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print('Random Forest Results:')
print(classification_report(y_test, y_pred_rf))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred_rf))