<a href="https://colab.research.google.com/github/manav616/ADS_EXPS/blob/main/ADS5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Importing Libraries**

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE, BorderlineSMOTE
from imblearn.combine import SMOTEENN


In [None]:
credit_card_data = 	pd.read_csv("creditcard.csv")

In [None]:
X = credit_card_data.drop('Class', axis=1)
y = credit_card_data['Class']


**Drop rows with missing target values **

In [None]:
missing_indices = y.isnull()
X = X[-missing_indices]
y = y.dropna()

**imbalanced class distribution after handling missing values **

In [None]:
print("\nImbalanced Class Distribution after handling missing values:")
print(y.value_counts())


Imbalanced Class Distribution after handling missing values:
0.0    1983
1.0       2
Name: Class, dtype: int64


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Dictionary to store results for different oversampling techniques
results = {}


**Smote**

In [None]:
smote = SMOTE(random_state=42, k_neighbors=1)  # Adjust k_neighbors as needed

# Perform SMOTE resampling
X_resampled_smote, y_resampled_smote = smote.fit_resample(X_train, y_train)

# Store the resampled data in your results dictionary
results['SMOTE'] = (X_resampled_smote, y_resampled_smote)


**Smote borderline**

In [None]:
borderline_smote =	BorderlineSMOTE(random_state=42)
X_resampled_borderline, y_resampled_borderline	 = borderline_smote.fit_resample(X_train, y_train)
results['Borderline-SMOTE'] = 	(X_resampled_borderline, y_resampled_borderline)



**Smote ENN**

In [None]:
smote_enn = SMOTEENN(random_state=42, smote=SMOTE(k_neighbors=1))
X_resampled_smoteenn, y_resampled_smoteenn  = 	smote_enn.fit_resample(X_train, y_train)
results['SMOTE-ENN'] = (X_resampled_smoteenn, y_resampled_smoteenn)


In [None]:
for technique, (X_resampled, y_resampled) in results.items():
  print(f"\nResults for {technique}:")

  clf	 = RandomForestClassifier(random_state=42)
  clf.fit(X_resampled, y_resampled)

  y_pred = clf.predict(X_test)
  print("Classification Report on Test Set:")
  print(classification_report(y_test, y_pred))






Results for SMOTE:
Classification Report on Test Set:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       397

    accuracy                           1.00       397
   macro avg       1.00      1.00      1.00       397
weighted avg       1.00      1.00      1.00       397


Results for SMOTE-ENN:
Classification Report on Test Set:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       397

    accuracy                           1.00       397
   macro avg       1.00      1.00      1.00       397
weighted avg       1.00      1.00      1.00       397


Results for Borderline-SMOTE:
Classification Report on Test Set:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       397

    accuracy                           1.00       397
   macro avg       1.00      1.00      1.00       397
weighted avg       1.00      1.00      1.00       397

