In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.under_sampling import RandomUnderSampler

# Your dataset
data = {
    'ID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank', 'Grace', 'Hannah', 'Ivy', 'Jack'],
    'Age': [30, 25, 28, 35, 40, 30, 25, 40, 28, 35],
    'Salary': [70000, 60000, 60000, 80000, 70000, 60000, 60000, 80000, 70000, 60000],
    'Department': ['HR', 'IT', 'IT', 'Finance', 'HR', 'IT', 'IT', 'Finance', 'HR', 'Finance']
}

df = pd.DataFrame(data)

# Simulate an imbalanced dataset
df['Target'] = np.where(df['Department'] == 'IT', 1, 0)  # 1 for 'IT', 0 for others

# Count the target class distribution
print("Original target class distribution:\n", df['Target'].value_counts())

# Splitting data into features and target
X = df.drop(columns=['ID', 'Name', 'Department', 'Target'])
y = df['Target']

# Splitting into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Method 1: Random Over-Sampling
ros = RandomOverSampler(random_state=42)
X_ros, y_ros = ros.fit_resample(X_train, y_train)
print("\nAfter Random Over-Sampling:\n", pd.Series(y_ros).value_counts())

# Method 2: Random Under-Sampling
rus = RandomUnderSampler(random_state=42)
X_rus, y_rus = rus.fit_resample(X_train, y_train)
print("\nAfter Random Under-Sampling:\n", pd.Series(y_rus).value_counts())

# Method 3: SMOTE with a smaller number of neighbors
smote = SMOTE(k_neighbors=1, random_state=42)  # Reduce k_neighbors to 1
X_smote, y_smote = smote.fit_resample(X_train, y_train)
print("\nAfter SMOTE:\n", pd.Series(y_smote).value_counts())

Original target class distribution:
 Target
0    6
1    4
Name: count, dtype: int64

After Random Over-Sampling:
 Target
0    5
1    5
Name: count, dtype: int64

After Random Under-Sampling:
 Target
0    2
1    2
Name: count, dtype: int64

After SMOTE:
 Target
0    5
1    5
Name: count, dtype: int64
