In [3]:
import random
import math
import copy
import time
import json
import datetime
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline

from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import f1_score,balanced_accuracy_score

from imblearn.over_sampling import SMOTE,RandomOverSampler,ADASYN
from imblearn.pipeline import make_pipeline as pipe_imblearn
from imblearn.metrics import geometric_mean_score

from oversampling_aco import OVRS_ACO

import warnings
warnings.filterwarnings('ignore')

# Dataset

In [4]:
df = pd.read_csv("data/NR_AB.csv").drop('Unnamed: 0',axis=1)

In [5]:
X = df.drop(['label','drug_no','protein_no'],axis=1)
y = df['label']

# Train Test Split

In [6]:
random_state = 42

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state = random_state)

# Tuning

In [22]:
target = 1
n_ovrs = 2000

In [24]:
model = GradientBoostingClassifier(random_state = random_state)
oversampler = ADASYN(sampling_strategy={target:n_ovrs},random_state=random_state, n_jobs=-1)

In [21]:
init_pheromone_test = [np.round(i,1) for i in np.arange(0.1,1,0.1)] + [1]
rho_test = [np.round(i,1) for i in np.arange(0.1,1,0.1)] + [1]
num_ant_test = [20,30,40,50]

## init_pheromone

In [25]:
test_results = []
for test_case in init_pheromone_test:
    ovrs_aco = OVRS_ACO(init_pheromone=test_case,rho=0.8,num_ant=20,max_idem=10,kfold=5,random_state=random_state)
    ovrs_aco.set_model(X_train, y_train,
                       ovrs_target=target, n_ovrs_target=n_ovrs,
                       model = model, oversampler = oversampler)

    new_X_train,new_y_train,fitness,fitness_history = ovrs_aco.construct_solution()
    
    model_ovrs_aco = model
    
    model_ovrs_aco.fit(new_X_train,new_y_train)
    
    f1 = f1_score(y_test, model_ovrs_aco.predict(X_test))
    test_results.append(f1)
    
    print("test case = ",test_case)
    print("f1 = ", f1)
    print("gm = ", geometric_mean_score(y_test, model_ovrs_aco.predict(X_test)))
    print("bas = ", balanced_accuracy_score(y_test, model_ovrs_aco.predict(X_test)))
    
    print("")

test case =  0.1
f1 =  0.4
gm =  0.5482097623670511
bas =  0.6459082488830772

test case =  0.2
f1 =  0.4
gm =  0.5482097623670511
bas =  0.6459082488830772

test case =  0.3
f1 =  0.4
gm =  0.5482097623670511
bas =  0.6459082488830772



KeyboardInterrupt: 

In [26]:
best_test_case = np.argmax(test_results)
best_init_pheromone = init_pheromone_test[best_test_case] 

## rho

In [None]:
test_results = []
for test_case in rho_test:
    ovrs_aco = OVRS_ACO(init_pheromone=best_init_pheromone,rho=test_case,num_ant=20,max_iter=100,max_idem=10,kfold=5,random_state=random_state)
    ovrs_aco.set_model(X_train, y_train,
                       ovrs_target=target, n_ovrs_target=n_ovrs,
                       model = model, oversampler = oversampler)

    new_X_train,new_y_train,fitness,fitness_history = ovrs_aco.construct_solution()
    
    model_ovrs_aco = model
    
    model_ovrs_aco.fit(new_X_train,new_y_train)
    
    f1 = f1_score(y_test, model_ovrs_aco.predict(X_test))
    test_results.append(f1)
    
    print("test case = ",test_case)
    print("f1 = ", f1)
    print("gm = ", geometric_mean_score(y_test, model_ovrs_aco.predict(X_test)))
    print("bas = ", balanced_accuracy_score(y_test, model_ovrs_aco.predict(X_test)))
    
    print("")

In [None]:
best_test_case = np.argmax(test_results)
best_rho = rho_test[best_test_case] 

## num_ant

In [None]:
test_results = []
for test_case in num_ant_test:
    ovrs_aco = OVRS_ACO(init_pheromone=best_init_pheromone,rho=best_rho,num_ant=test_case,max_iter=100,max_idem=10,kfold=5,random_state=random_state)
    ovrs_aco.set_model(X_train, y_train,
                       ovrs_target=target, n_ovrs_target=n_ovrs,
                       model = model, oversampler = oversampler)

    new_X_train,new_y_train,fitness,fitness_history = ovrs_aco.construct_solution()
    
    model_ovrs_aco = model
    
    model_ovrs_aco.fit(new_X_train,new_y_train)
    
    f1 = f1_score(y_test, model_ovrs_aco.predict(X_test))
    test_results.append(f1)
    
    print("test case = ",test_case)
    print("f1 = ", f1)
    print("gm = ", geometric_mean_score(y_test, model_ovrs_aco.predict(X_test)))
    print("bas = ", balanced_accuracy_score(y_test, model_ovrs_aco.predict(X_test)))
    
    print("")

In [None]:
best_test_case = np.argmax(test_results)
best_num_ant = num_ant_test[best_test_case] 

## Results

In [None]:
print("best init pheromone = ",best_init_pheromone)
print("best rho = ",best_rho)
print("best num_ant = ",best_num_ant)