# **EXAMPLE NOTEBOOK**

## **Preliminary steps**

In [1]:
### PACKAGES

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


In [2]:
### FUNCTIONS

from src.configs import constants, ml_config, names
from src.libs import preprocessing, evaluation, visualization
from src.embedding.tf_idf import TfIdfEmbedding
from src.classifier.classifier import LightGBMClassifier, RFClassifier, NBClassifier


## **DATA**

In [3]:
### IMPORT DATA

df_train, df_test = preprocessing.load_data(local=True, type="full")


Loading data from local
Data loading done in 0.51 seconds


In [4]:
### CLEAN DATA

df_train = preprocessing.clean_dataset_1(df=df_train)
df_test = preprocessing.clean_dataset_1(df=df_test)


In [5]:
### SPLIT FEATURES AND LABELS

X_train, y_train = preprocessing.split_features_and_labels(df_train)
X_test, y_test = preprocessing.split_features_and_labels(df_test)


## **EXPERIMENT 111 : TF-IDF & LGBM**

In [6]:
ID_EXPERIMENT = 111

In [7]:
### EMBEDDING

embedding = TfIdfEmbedding(ID_EXPERIMENT)

embedding.fit(X=X_train)

X_train_111 = embedding.transform(X=X_train)
X_test_111 = embedding.transform(X=X_test)


In [8]:
### CLASSIFIER

classifier = LightGBMClassifier(ID_EXPERIMENT)

classifier.train(X_train=X_train_111, y_train=y_train)

metrics_111 = classifier.evaluate(X_test_111, y_test)

metrics_111




{'accuracy': 0.9865666434054431,
 'precision': 0.9836975372875477,
 'recall': 0.9895324494068388,
 'f1': 0.9866063663245782,
 'roc_auc': 0.9865666434054432}

## **EXPERIMENT 121 : TF-IDF & RANDOM FOREST**

In [9]:
ID_EXPERIMENT = 121

In [10]:
### EMBEDDING

embedding = TfIdfEmbedding(ID_EXPERIMENT)

embedding.fit(X=X_train)

X_train_121 = embedding.transform(X=X_train)
X_test_121 = embedding.transform(X=X_test)


In [11]:
### CLASSIFIER

classifier = RFClassifier(ID_EXPERIMENT)

classifier.train(X_train=X_train_121, y_train=y_train)

cv_score = classifier.get_cv_score(X_train=X_train_121, y_train=y_train)

print(f"CV Score: {cv_score}")

metrics_121 = classifier.evaluate(X_test_121, y_test)

metrics_121


CV Score: 0.9704377021322044


{'accuracy': 0.971039776692254,
 'precision': 0.9835243553008596,
 'recall': 0.9581297976273552,
 'f1': 0.9706610109579357,
 'roc_auc': 0.971039776692254}

## **EXPERIMENT 131 : TF-IDF & NAIVE BAYES**

In [12]:
ID_EXPERIMENT = 131

In [13]:
### EMBEDDING

embedding = TfIdfEmbedding(ID_EXPERIMENT)

embedding.fit(X=X_train)

X_train_131 = embedding.transform(X=X_train)
X_test_131 = embedding.transform(X=X_test)


In [14]:
### CLASSIFIER

classifier = RFClassifier(ID_EXPERIMENT)

classifier.train(X_train=X_train_131, y_train=y_train)

metrics_131 = classifier.evaluate(X_test_131, y_test)

metrics_131


TypeError: RandomForestClassifier.__init__() got an unexpected keyword argument 'alpha'