# Import Optialgo

In [1]:
import pandas as pd
from optialgo import Dataset, Classification

# Read CSV

In [2]:
df = pd.read_csv("dataset_ex/drug200.csv")
df.head()

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,DrugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC
3,28,F,NORMAL,HIGH,7.798,drugX
4,61,F,LOW,HIGH,18.043,DrugY


# Inisiasi Fitur dan Target

In [3]:
features = df.columns.tolist()[:-1]
target = "Drug"

# Dataset

In [4]:
dataset = Dataset(dataframe=df)
dataset.fit(features=features,target=target)

# Classification

## Comparing Model

### with train_val

In [5]:
clf = Classification(dataset=dataset)
clf.compare_model(output="table",train_val=True)

Output()

### using cross-validation

In [6]:
# using cross-validation
clf.compare_model(output="table",train_val=False)

Output()

## Set Model

In [7]:
# Set Model
clf.set_model("K-Nearest Neighbor")
clf.model

('K-Nearest Neighbor', KNeighborsClassifier())

In [8]:
# Mengambil data validation (test) dari object dataset
X_train,X_test,y_train,y_test = dataset.get_x_y()

pred = clf.predict(X_test)

In [9]:
# Evaluasi
clf.score_report(y_test,pred)

{'accuracy': 0.725,
 'precision': 0.6883333333333332,
 'recall': 0.725,
 'f1': 0.7023547505126452,
 'classification_report': {'0': {'precision': 0.9,
   'recall': 1.0,
   'f1-score': 0.9473684210526315,
   'support': 18.0},
  '1': {'precision': 0.5,
   'recall': 0.6,
   'f1-score': 0.5454545454545454,
   'support': 5.0},
  '2': {'precision': 0.5,
   'recall': 0.6666666666666666,
   'f1-score': 0.5714285714285714,
   'support': 3.0},
  '3': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 3.0},
  '4': {'precision': 0.6666666666666666,
   'recall': 0.5454545454545454,
   'f1-score': 0.6,
   'support': 11.0},
  'accuracy': 0.725,
  'macro avg': {'precision': 0.5133333333333333,
   'recall': 0.5624242424242424,
   'f1-score': 0.5328503075871496,
   'support': 40.0},
  'weighted avg': {'precision': 0.6883333333333332,
   'recall': 0.725,
   'f1-score': 0.7023547505126452,
   'support': 40.0}}}

## HyperParameter Tuning

In [10]:
# HyperParameter Tuning
clf.set_model("K-Nearest Neighbor")

clf.model[1].get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [11]:
params = {"n_neighbors":[2,3,4],"metric":["minkowski","euclidean"]}
best_score,best_params = clf.find_best_params(params)
clf.set_params(best_params)
best_score,best_params

(0.63125, {'metric': 'minkowski', 'n_neighbors': 3})

In [12]:
pred = clf.predict(X_test)
clf.score_report(y_test,pred)

{'accuracy': 0.775,
 'precision': 0.7183531746031745,
 'recall': 0.775,
 'f1': 0.7403521478521479,
 'classification_report': {'0': {'precision': 0.8571428571428571,
   'recall': 1.0,
   'f1-score': 0.9230769230769231,
   'support': 18.0},
  '1': {'precision': 0.5,
   'recall': 0.6,
   'f1-score': 0.5454545454545454,
   'support': 5.0},
  '2': {'precision': 0.75,
   'recall': 1.0,
   'f1-score': 0.8571428571428571,
   'support': 3.0},
  '3': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 3.0},
  '4': {'precision': 0.7777777777777778,
   'recall': 0.6363636363636364,
   'f1-score': 0.7,
   'support': 11.0},
  'accuracy': 0.775,
  'macro avg': {'precision': 0.5769841269841269,
   'recall': 0.6472727272727272,
   'f1-score': 0.605134865134865,
   'support': 40.0},
  'weighted avg': {'precision': 0.7183531746031745,
   'recall': 0.775,
   'f1-score': 0.7403521478521479,
   'support': 40.0}}}

In [13]:
clf.compare_model(output="table")

Output()