<h1>Download the Credit Card Fraud Detection dataset. Use the SMOTE (Synthetic
Minority Oversampling Technique) algorithm to balance the dataset. Then, train
and evaluate a Logistic Regression model on the data before and after applying
SMOTE. Compare the models performance in both cases.</h1>

<h1>dataset link</h1>

<h1>https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud</h1>

In [None]:
#!pip install pandas matplotlib seaborn scikit-learn imbalanced-learn --quiet


In [None]:
#pip install -U scikit-learn imbalanced-learn


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
from imblearn.over_sampling import SMOTE
import warnings
warnings.filterwarnings('ignore')
# Load dataset
df = pd.read_csv("creditcard.csv")

# Features and target
X = df.drop(columns=["Class"])
y = df["Class"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Class distribution before SMOTE
plt.figure(figsize=(5, 4))
sns.countplot(x=y)
plt.title("Class Distribution Before SMOTE")
plt.xlabel("Class")
plt.ylabel("Count")
plt.show()



In [None]:
df['Class'].value_counts()

In [None]:
# Logistic Regression without SMOTE
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)


In [None]:
plt.title("Confusion Matrix Before SMOTE")
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt="d")
plt.show()

In [None]:
# Apply SMOTE
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# Class distribution after SMOTE
plt.figure(figsize=(5, 4))
sns.countplot(x=y_train_smote)
plt.title("Class Distribution After SMOTE")
plt.xlabel("Class")
plt.ylabel("Count")
plt.show()

# Logistic Regression with SMOTE
model_smote = LogisticRegression(max_iter=1000)
model_smote.fit(X_train_smote, y_train_smote)
y_pred_smote = model_smote.predict(X_test)


In [None]:
plt.title("Confusion Matrix After SMOTE")
sns.heatmap(confusion_matrix(y_test, y_pred_smote), annot=True, fmt="d")
plt.show()

In [None]:

fpr1, tpr1, _ = roc_curve(y_test, model.predict_proba(X_test)[:, 1])
fpr2, tpr2, _ = roc_curve(y_test, model_smote.predict_proba(X_test)[:, 1])
plt.figure(figsize=(6, 4))
plt.plot(fpr1, tpr1, label=f"Before SMOTE (AUC = {auc(fpr1, tpr1):.2f})")
plt.plot(fpr2, tpr2, label=f"After SMOTE (AUC = {auc(fpr2, tpr2):.2f})")
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve Comparison")
plt.legend()
plt.show()

# Classification reports
print("Before SMOTE:\n", classification_report(y_test, y_pred))
print("After SMOTE:\n", classification_report(y_test, y_pred_smote))


<h1>Load minist data set using the following code:
from tensorflow.keras.datasets import mnist
# Loads the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
Perform minimum of five EDA on the above mentioned data set.</h1>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.datasets import mnist
from tensorflow.image import flip_left_right, rot90

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
# 1. Function to plot images from the dataset
def plot_sample_images(images, labels, count=10):
    plt.figure(figsize=(15, 2))
    for i in range(count):
        plt.subplot(1, count, i+1)
        plt.imshow(images[i], cmap='gray')
        plt.title(f"Label: {labels[i]}")
        plt.axis('off')
    plt.show()

plot_sample_images(x_train, y_train)

In [None]:
# 2.Visualize class distribution
sns.countplot(x=y_train)
plt.title("Class Distribution")
plt.xlabel("Digit")
plt.ylabel("Count")
plt.show()


In [None]:
# 3.Plot the distribution of image sizes
sizes = [(img.shape[0], img.shape[1]) for img in x_train]
sns.histplot(sizes)
plt.title("Image Size Distribution")
plt.xlabel("Size")
plt.ylabel("Frequency")
plt.show()

In [None]:
# 4.Plot the distribution of pixel values (RGB channels) for a sample image
sample_img = x_train[0]
plt.hist(sample_img.ravel(), bins=50, color='blue', alpha=0.7)
plt.title("Pixel Value Distribution")
plt.xlabel("Pixel Intensity")
plt.ylabel("Count")
plt.show()


In [None]:
#5. Function to apply basic augmentation techniques
def augment_image(img):
    flipped = flip_left_right(img[..., np.newaxis])
    rotated = rot90(img[..., np.newaxis])
    return flipped.numpy().squeeze(), rotated.numpy().squeeze()

flip, rot = augment_image(x_train[0])
plt.subplot(1, 3, 1)
plt.imshow(x_train[0], cmap='gray')
plt.title("Original")
plt.axis('off')
plt.subplot(1, 3, 2)
plt.imshow(flip, cmap='gray')
plt.title("Flipped")
plt.axis('off')
plt.subplot(1, 3, 3)
plt.imshow(rot, cmap='gray')
plt.title("Rotated")
plt.axis('off')
plt.show()

In [None]:
# 6.Calculate mean and standard deviation of pixel values
mean = np.mean(x_train)
std = np.std(x_train)
print(f"Mean pixel value: {mean:.2f}, Standard deviation: {std:.2f}")


In [None]:
#7. Display one image from each class
plt.figure(figsize=(12, 4))
for digit in range(10):
    idx = np.where(y_train == digit)[0][0]
    plt.subplot(2, 5, digit+1)
    plt.imshow(x_train[idx], cmap='gray')
    plt.title(f"Digit: {digit}")
    plt.axis('off')
plt.tight_layout()
plt.show()
