### Detecting & Handling Imbalanced Data: Visualizing Class Imbalance
**Question**: Load the Credit Card Fraud Detection dataset and visualize the class imbalance. Then apply random undersampling to balance it.

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.utils import resample
import os

def load_dataset(filepath):
    """
    Load dataset and check for the presence of required columns.
    """
    if not os.path.isfile(filepath):
        raise FileNotFoundError(f"File not found: {filepath}")
    
    df = pd.read_csv(filepath)
    if 'Class' not in df.columns:
        raise ValueError("Required column 'Class' not found in dataset.")
    
    return df

def visualize_class_distribution(df, title="Class Distribution"):
    """
    Plot class distribution.
    """
    plt.figure(figsize=(6, 4))
    df['Class'].value_counts().sort_index().plot(kind='bar', color=['skyblue', 'salmon'])
    plt.title(title)
    plt.xlabel("Class (0 = Legit, 1 = Fraud)")
    plt.ylabel("Count")
    plt.xticks(rotation=0)
    plt.grid(axis='y')
    plt.tight_layout()
    plt.show()

def apply_random_undersampling(df):
    """
    Perform random undersampling to balance the dataset.
    """
    df_majority = df[df['Class'] == 0]
    df_minority = df[df['Class'] == 1]
    
    df_majority_downsampled = resample(
        df_majority,
        replace=False,
        n_samples=len(df_minority),
        random_state=42
    )
    
    return pd.concat([df_majority_downsampled, df_minority])

# Main execution
if __name__ == "__main__":
    try:
        df = load_dataset("creditcard.csv")
        visualize_class_distribution(df, title="Original Class Distribution")
        
        df_balanced = apply_random_undersampling(df)
        visualize_class_distribution(df_balanced, title="Balanced Class Distribution (Random Undersampling)")
    
    except (FileNotFoundError, ValueError) as e:
        print(f"Error: {e}")


Error: File not found: creditcard.csv
