### Detecting & Handling Imbalanced Data: Visualizing Class Imbalance
**Question**: Load the Credit Card Fraud Detection dataset and visualize the class imbalance. Then apply random undersampling to balance it.

In [1]:
# write your code from here
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.utils import resample

# 1. Load Dataset
try:
    df = pd.read_csv("creditcard.csv")  # Ensure the file is in the working directory
except FileNotFoundError:
    raise FileNotFoundError("creditcard.csv not found. Please provide the correct path.")

# 2. Visualize Original Class Distribution
plt.figure(figsize=(6, 4))
df['Class'].value_counts().plot(kind='bar', color=['skyblue', 'salmon'])
plt.title("Original Class Distribution")
plt.xlabel("Class (0 = Legit, 1 = Fraud)")
plt.ylabel("Count")
plt.xticks(rotation=0)
plt.grid(axis='y')
plt.show()

# 3. Handle Imbalance with Random Undersampling
# Separate majority and minority classes
df_majority = df[df['Class'] == 0]
df_minority = df[df['Class'] == 1]

# Downsample majority class
df_majority_downsampled = resample(
    df_majority,
    replace=False,
    n_samples=len(df_minority),
    random_state=42
)

# Combine minority and downsampled majority
df_balanced = pd.concat([df_majority_downsampled, df_minority])

# 4. Visualize Balanced Class Distribution
plt.figure(figsize=(6, 4))
df_balanced['Class'].value_counts().plot(kind='bar', color=['skyblue', 'salmon'])
plt.title("Balanced Class Distribution (Random Undersampling)")
plt.xlabel("Class (0 = Legit, 1 = Fraud)")
plt.ylabel("Count")
plt.xticks(rotation=0)
plt.grid(axis='y')
plt.show()


FileNotFoundError: creditcard.csv not found. Please provide the correct path.