In [1]:
# Question 1: Load & Explore the Credit Card Fraud Detection Dataset

# Step 1: Load the dataset from a CSV (Assume you have a file named creditcard.csv ).
# Step 2: Split the data.
# Step 3: Train a Logistic Regression model.
# Step 4: Evaluate using ROC AUC score.


In [3]:
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification

# Create a synthetic imbalanced dataset (1% fraud cases)
X, y = make_classification(
    n_samples=10000,        # total samples
    n_features=30,          # number of features (similar to real dataset)
    n_informative=10,       # number of informative features
    n_redundant=10,         # redundant features
    n_clusters_per_class=1, 
    weights=[0.99],         # 99% normal, 1% fraud
    flip_y=0,
    random_state=42
)

# Convert to DataFrame
columns = [f'V{i}' for i in range(1, 29)]
df = pd.DataFrame(X, columns=['Time'] + columns + ['Amount'])
df['Class'] = y

# Save to CSV (optional)
df.to_csv('synthetic_creditcard.csv', index=False)

# Display dataset info
print("✅ Synthetic dataset created:")
print(df['Class'].value_counts())
print(df.head())


✅ Synthetic dataset created:
Class
0    9900
1     100
Name: count, dtype: int64
       Time        V1        V2        V3        V4        V5        V6  \
0  2.200894 -0.011383  2.642692  4.792710 -0.398958 -0.849918 -0.344935   
1  1.175031  1.139599 -0.129027 -2.324081 -1.086215 -0.220720  0.272890   
2  1.287240  0.081522  3.074523  2.267023 -0.091004 -0.509467 -0.297756   
3 -0.969638  1.738438  4.968723  5.667066 -0.637043 -1.986710  0.987154   
4  0.413468  0.242332  4.325274  7.648337  0.335792 -2.644580  2.507860   

         V7        V8        V9  ...       V21       V22       V23       V24  \
0  0.145185 -0.819506  2.393682  ...  2.415158  0.983948  8.002133  2.721928   
1  1.393926  0.512857  3.440417  ...  0.713340  0.323408  3.250362 -1.952514   
2  0.707766  2.836111  4.098937  ...  3.881381  0.210610  8.842068 -0.573507   
3 -0.061643 -0.064501  0.178977  ...  1.255979  1.544178  3.618806  2.319650   
4 -0.990021 -0.369042 -2.515250  ... -3.500755  2.266177 -1.310692  