In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.impute import SimpleImputer

# Load the dataset
data = pd.read_csv('creditcard.csv')

# Split features and target
X = data.drop('Class', axis=1)
y = data['Class']

# Check for missing values in y
print(y.isnull().sum())

# Remove rows with missing values in y
X = X[~y.isnull()]
y = y.dropna()

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Data Preprocessing: Normalize the features and impute missing values
scaler = StandardScaler()
imputer = SimpleImputer(strategy='mean')

X_train_scaled = scaler.fit_transform(imputer.fit_transform(X_train))
X_test_scaled = scaler.transform(imputer.transform(X_test))

# Handling Class Imbalance: Use Random Oversampling
oversampler = RandomOverSampler(random_state=42)
X_train_resampled, y_train_resampled = oversampler.fit_resample(X_train_scaled, y_train)

# Model Selection and Training: Logistic Regression
model = LogisticRegression(random_state=42)
model.fit(X_train_resampled, y_train_resampled)

# Model Evaluation
y_pred = model.predict(X_test_scaled)
report = classification_report(y_test, y_pred)
print(report)











1
              precision    recall  f1-score   support

         0.0       1.00      0.99      0.99     11471
         1.0       0.16      0.86      0.27        35

    accuracy                           0.99     11506
   macro avg       0.58      0.92      0.63     11506
weighted avg       1.00      0.99      0.99     11506



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
