project

In [2]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE

# Load dataset
data = pd.read_csv("creditcard.csv")  # Update path

# Check for missing values
print("Missing values per column:\n", data.isnull().sum())

# Drop rows with missing target
data = data.dropna(subset=['Class'])

# Optional: fill missing features if any
data = data.fillna(data.mean())

# Separate features and target
X = data.drop("Class", axis=1)
y = data["Class"]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Handle class imbalance using SMOTE
sm = SMOTE(random_state=42)
X_res, y_res = sm.fit_resample(X_scaled, y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_res, y_res, test_size=0.2, random_state=42
)

# Logistic Regression with balanced class weights
model = LogisticRegression(max_iter=2000, class_weight='balanced')

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Missing values per column:
 Time      0
V1        1
V2        1
V3        1
V4        1
V5        1
V6        1
V7        1
V8        1
V9        1
V10       1
V11       1
V12       1
V13       1
V14       1
V15       1
V16       1
V17       1
V18       1
V19       1
V20       1
V21       1
V22       1
V23       1
V24       1
V25       1
V26       1
V27       1
V28       1
Amount    1
Class     1
dtype: int64
Accuracy: 0.9812443642921551

Classification Report:
               precision    recall  f1-score   support

         0.0       0.98      0.98      0.98      5546
         1.0       0.98      0.98      0.98      5544

    accuracy                           0.98     11090
   macro avg       0.98      0.98      0.98     11090
weighted avg       0.98      0.98      0.98     11090

