# 0.0 Imports

In [17]:
import pandas as pd 
from sklearn.tree           import DecisionTreeClassifier   as dt  
from sklearn.linear_model   import LogisticRegression       as lr 
from sklearn.ensemble       import RandomForestClassifier   as rf 
from sklearn.neighbors      import KNeighborsClassifier     as knn
from sklearn.preprocessing  import normalize
from sklearn                import metrics                  as mt 
from tqdm import tqdm

# 1.0 Data Preparation

In [18]:
class normalizer():

    def __init__(self,
                 df):
        
        self.dict_max = {c : df[c].max() for c in df.columns}


    def getDfcolumnNormalized(self, 
                              df):
        
        df_normalized = df.copy()

        for c in df.columns:

            df_normalized.loc[:, c] = df_normalized.loc[:,c].apply(lambda x: x/self.dict_max[c])

        return df_normalized 
    

    def getDfcolumnDeNormalized(self,
                                df):
        
        df_normalized = df.copy()

        for c in df.columns:

            df_normalized.loc[:, c] = df_normalized.loc[:,c].apply(lambda x: x*self.dict_max[c])

        return df_normalized 
    


In [19]:
X_test      = pd.read_csv('../data/X_test.csv') 
X_training  = pd.read_csv('../data/X_training.csv')
X_valid     = pd.read_csv('../data/X_validation.csv')
y_test      = pd.read_csv('../data/y_test.csv')
y_training  = pd.read_csv('../data/y_training.csv')
y_valid     = pd.read_csv('../data/y_validation.csv')


In [20]:
# instancia um normalizador treinado com os parametros de training
normalizer_X = normalizer(X_training)

X_test_normalized   = normalizer_X.getDfcolumnNormalized(X_test)
X_training_norm     = normalizer_X.getDfcolumnNormalized(X_training)
X_valid_norm        = normalizer_X.getDfcolumnNormalized(X_valid)


# 2.0 Model Training

In [21]:
from typing import Literal
class superModelClassifier():
    def __init__(self,
                 model,
                 principalMetric : Literal['ACCURACY', 'F1_SCORE', 'PRECISION', 'RECALL']):
        
        self.model = model 

    def fineTuning():
        ...
    
    ...


In [25]:
knn_classifier = knn()
knn_classifier.fit(X_training_norm, y_training)


## fine tuning baseado no validation 
n_neighbors_fine_tuning = [1,2,3,4,5,6,7,8,9,10]

class ExperimentClassification():

    def __init__(self,
                 training_dataset : dict = None,
                 test_dataset     : dict = None,
                 valid_dataset    : dict = None):


        self.general_metrics = []

        self.x_training = training_dataset['x']
        self.y_training = training_dataset['y']

        self.x_test = test_dataset['x']
        self.y_test = test_dataset['y']
        
        self.x_valid = valid_dataset['x']
        self.y_valid = valid_dataset['y']
        


    
    def computeMetricsSklearnModel(self,
                                   predictions_training,
                                   predictions_test,
                                   predictions_valid) -> dict:

        '''
        returns:
            metrics :{
                accuracy    : dict['training','test','valid'], 
                precision   : dict['training','test','valid'],
                recall      : dict['training','test','valid'],
                f1_score    : dict['training','test','valid']
            }
        
        '''        
            
        accuracies  = { 
                        'training'  : None,
                        'test'      : None,
                        'valid'     : None
                       }
        
        precisions  = {
                        'training'  : None,
                        'test'      : None,
                        'valid'     : None
        }
        recalls     = {
                        'training'  : None,
                        'test'      : None,
                        'valid'     : None}
        
        f1_scores   = {
                        'training'  : None,
                        'test'      : None,
                        'valid'     : None
        }    
        

        # compute accuracy
        accuracies['training']   = mt.accuracy_score(self.y_training, predictions_training)
        accuracies['test']       = mt.accuracy_score(self.y_test, predictions_test)
        accuracies['valid']      = mt.accuracy_score(self.y_valid,  predictions_valid)
        
        #compute precision
        precisions['training']  = mt.precision_score(self.y_training, predictions_training)
        precisions['test']      = mt.precision_score(self.y_test, predictions_test)
        precisions['valid']     = mt.precision_score(self.y_valid, predictions_valid)
        
        # compute recalls
        recalls['training']     = mt.recall_score(self.y_training, predictions_training)
        recalls['test']         = mt.recall_score(self.y_test, predictions_test)
        recalls['valid']        = mt.recall_score(self.y_valid, predictions_valid)
        
        # compute f1_scores
        f1_scores['training']   = mt.f1_score(self.y_training, predictions_training)
        f1_scores['test']       = mt.f1_score(self.y_test, predictions_test)
        f1_scores['valid']      = mt.f1_score(self.y_valid, predictions_valid)


        metrics = {
             'accuracy'  : accuracies,
             'precision' : precisions,
             'recall'    : recalls, 
             'f1_score'  : f1_scores
        }

        return metrics 

    def setResults(self,
                    experimentName,
                    accuracies,
                    recalls,
                    precisions, 
                    f1_scores
                ):
        
            dict_accuracies  = {data_context : accuracies[idx]  for idx, data_context in enumerate(self.data_order)}
            dict_recalls     = {data_context : recalls[idx]     for idx, data_context in enumerate(self.data_order)}
            dict_precisions  = {data_context : precisions[idx]  for idx, data_context in enumerate(self.data_order)}  
            dict_f1_scores   = {data_context : f1_scores[idx]   for idx, data_context in enumerate(self.data_order)}

            self.results[experimentName]['metrics']  = [dict_accuracies, dict_recalls, dict_f1_scores, dict_precisions]



  return self._fit(X, y)


## 2.1 - KNN

In [None]:
class experimentKnn(ExperimentClassification):

    def __init__(self,
                 k_neigbhors : list,
                 training_dataset : dict = None,
                 test_dataset     : dict = None,
                 valid_dataset    : dict = None,
                 verbose          : bool = True,
                 experimentName   : str = 'Experimento'
                 ):
        
        super().__init__(training_dataset,
                 test_dataset,
                 valid_dataset)
        
        self.parameters = k_neigbhors
        self.experimentName = experimentName
        self.verbose        = verbose 


    def log(self,
            message: str):
        
        print('='*45)
        print(f'{self.experimentName} : {message}')
        print('='*45)
        

    def fine_tuning_knn(self):
        ''' 
        dict_results : 
            model : model, 
            metrics : 
                training : 
                    accuracy 
                    precision
                    recall
                    f1_score
                test :  
                    ... 
        
        '''
        
        dict_results = {

        }
        
        
        for k in tqdm(self.parameters):
            experimentName = f'k_neighbors_{k}'
            currentResult = {}
            
           
            knn_classifier = knn(k)
            
            knn_classifier.fit(self.x_training, 
                               self.y_training)
            
            
            predictions_training = knn_classifier.predict(self.x_training)
            predictions_test     = knn_classifier.predict(self.x_test)
            predictions_valid    = knn_classifier.predict(self.x_valid)

            # calcula as principais metricas do modelo
            metrics = self.computeMetricsSklearnModel(
                                                        predictions_training=predictions_training, 
                                                        predictions_test=predictions_test, 
                                                        predictions_valid=predictions_valid
                                                    )
                    
            self.general_metrics.append(metrics)

            if self.verbose:
                #self.log(f"Acurácia com k = {k}: \n {metrics['accuracy']} %" )
                
                self.log(f"F1 Scre com k = {k}: \n {metrics['f1_score']} %")
                

        
        return self.general_metrics
           


In [35]:
training_dataset = {
    'x' : X_training_norm, 
    'y' : y_training
}

test_dataset = {
    'x' : X_test_normalized, 
    'y' : y_test
}


valid_dataset = {
    'x' : X_valid_norm, 
    'y' : y_valid
}


experimento_knn = experimentKnn(k_neigbhors=[1,2,3,4,5,6,7,8,9,10],
                                training_dataset=training_dataset,
                                test_dataset=test_dataset, 
                                valid_dataset=valid_dataset
                                )

resultados = experimento_knn.fine_tuning_knn()
resultados[0]

  return self._fit(X, y)
 10%|█         | 1/10 [00:14<02:07, 14.12s/it]

Experimento : F1 Scre com k = 1: 
 {'training': 1.0, 'test': 0.8975046954655219, 'valid': 0.8971553610503282} %


  return self._fit(X, y)
 20%|██        | 2/10 [00:30<02:02, 15.31s/it]

Experimento : F1 Scre com k = 2: 
 {'training': 0.937345731191885, 'test': 0.8893111638954869, 'valid': 0.8903939406083436} %


  return self._fit(X, y)
 30%|███       | 3/10 [00:44<01:44, 14.96s/it]

Experimento : F1 Scre com k = 3: 
 {'training': 0.9494761881486551, 'test': 0.9118968725886252, 'valid': 0.9076274434649291} %


  return self._fit(X, y)
 40%|████      | 4/10 [00:58<01:27, 14.65s/it]

Experimento : F1 Scre com k = 4: 
 {'training': 0.9305613864212665, 'test': 0.9028271859202397, 'valid': 0.9017423254711391} %


  return self._fit(X, y)
 50%|█████     | 5/10 [01:13<01:12, 14.56s/it]

Experimento : F1 Scre com k = 5: 
 {'training': 0.9381864749051865, 'test': 0.915164514215306, 'valid': 0.9108796296296297} %


  return self._fit(X, y)
 60%|██████    | 6/10 [01:28<00:58, 14.65s/it]

Experimento : F1 Scre com k = 6: 
 {'training': 0.9259339959438848, 'test': 0.9072443512695085, 'valid': 0.9060212514757969} %


  return self._fit(X, y)
 70%|███████   | 7/10 [01:43<00:44, 14.82s/it]

Experimento : F1 Scre com k = 7: 
 {'training': 0.9305780213250586, 'test': 0.9148010804376688, 'valid': 0.9121108874090135} %


  return self._fit(X, y)
 80%|████████  | 8/10 [02:00<00:31, 15.67s/it]

Experimento : F1 Scre com k = 8: 
 {'training': 0.9233423794686233, 'test': 0.908930396803271, 'valid': 0.9080130344313141} %


  return self._fit(X, y)
 90%|█████████ | 9/10 [02:16<00:15, 15.51s/it]

Experimento : F1 Scre com k = 9: 
 {'training': 0.9270181709859994, 'test': 0.913910929688575, 'valid': 0.9127131782945737} %


  return self._fit(X, y)
100%|██████████| 10/10 [02:31<00:00, 15.11s/it]

Experimento : F1 Scre com k = 10: 
 {'training': 0.9217475760671165, 'test': 0.9087196621652976, 'valid': 0.9087488240827846} %





{'accuracy': {'training': 1.0,
  'test': 0.9114818676862473,
  'valid': 0.9122880401557322},
 'precision': {'training': 1.0,
  'test': 0.912521596799127,
  'valid': 0.9120196364194216},
 'recall': {'training': 1.0,
  'test': 0.8829740431148262,
  'valid': 0.882767837255921},
 'f1_score': {'training': 1.0,
  'test': 0.8975046954655219,
  'valid': 0.8971553610503282}}

## 2.2 - Arvores de decisão

In [None]:
randomForestModel = rf(n_estimators=100)


# 3.0 Model Evaluations