<a href="https://colab.research.google.com/github/msr524/msr524_codesoft/blob/main/creditcard.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Load the dataset
data = pd.read_csv('/content/creditcard.csv')

# Drop rows with NaN values in the target variable 'Class'
data.dropna(subset=['Class'], inplace=True)

# Perform data preprocessing
# Assuming 'Amount' and 'Time' are numerical features to be normalized
scaler = StandardScaler()
data[['Amount', 'Time']] = scaler.fit_transform(data[['Amount', 'Time']])

# Split the dataset into features (X) and target variable (y)
X = data.drop('Class', axis=1)
y = data['Class']
# Determine if oversampling or undersampling is required
class_counts = y.value_counts()
if class_counts[0] > class_counts[1]:
    print("Class imbalance detected. Using undersampling.")
    sampler = RandomUnderSampler(random_state=42)
else:
    print("Class imbalance detected. Using oversampling.")
    sampler = SMOTE(random_state=42)

# Resample the dataset to handle class imbalance
X_resampled, y_resampled = sampler.fit_resample(X, y)

# Split the resampled dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Train a logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)
# Evaluate the model
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Calculate and print accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Class imbalance detected. Using undersampling.
Classification Report:
              precision    recall  f1-score   support

         0.0       0.84      0.97      0.90        37
         1.0       0.97      0.81      0.88        37

    accuracy                           0.89        74
   macro avg       0.90      0.89      0.89        74
weighted avg       0.90      0.89      0.89        74

Confusion Matrix:
[[36  1]
 [ 7 30]]
Accuracy: 0.8918918918918919
