In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_curve, auc
import numpy as np
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler

In [None]:
class GenerateROCAUC():
    def __init__(self,
                 data_path,
                 test_size,
                 seed):
        self.df = pd.read_csv(data_path)
        self.test_size = test_size
        self.seed = seed
    def get_data_Xy(self):
        X = self.df.drop(columns='Class')
        self.df['Class'].replace(['normal','abnormal'],[0,1], inplace=True)
        y = self.df['Class']
        return X,y
    def train_test_split(self,X,y):
        Xtrain, Xtest, ytrain, ytest = train_test_split(X,y,test_size=self.test_size,random_state=self.seed)
        return Xtrain, Xtest, ytrain, ytest
    def undersampling_balance(self,Xtrain,ytrain):
        rus = RandomUnderSampler(random_state=self.seed)
        X_res, y_res = rus.fit_resample(Xtrain,ytrain)
        return X_res, y_res
    def oversampling_balance(self,Xtrain,ytrain):
        sm = SMOTE(random_state=self.seed)
        X_sm, y_sm = sm.fit_resample(Xtrain,ytrain)
        return X_sm, y_sm
    def get_fpr_tpr(self, Xtrain, ytrain, Xtest, ytest):
        rf = RandomForestClassifier(n_estimators = 100, min_samples_split = 2, n_jobs = -1, verbose = 1,random_state=self.seed)
        rf.fit(Xtrain,ytrain)
        yproba = rf.predict_proba(Xtest)
        fpr, tpr, _ = roc_curve(np.array(ytest),yproba[:,1], pos_label=1)
        return fpr, tpr
    def get_auc(fpr,tpr):
        return (auc(fpr,tpr))