# **UNIT TESTS FOR TRADITIONAL ML ALGORITHMS**

### IMPORT PACKAGES & FUNCTIONS

In [None]:
import re
import pandas as pd
import sklearn.svm as svm
from nltk import WordNetLemmatizer
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import LabelEncoder
from pickle import dump, load
from tqdm import tqdm
from sklearn.pipeline import Pipeline
from matplotlib import pyplot as plt
import nltk 
from nltk import download
from sklearn.model_selection import train_test_split
import numpy as np
download('wordnet')
download('omw-1.4')
download('stopwords')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
def preprocessing(text):
    text = text.lower()
    text_cleaned = re.sub(r'[^0-9a-z_+\-*]', ' ', text).strip()
    lemm = WordNetLemmatizer()
    title = []
    for token in text_cleaned.split():
        token_lemm = lemm.lemmatize(token)
        if token_lemm not in stopwords.words('english'):
             title.append(lemm.lemmatize(token))
    return ' '.join(str(elem) for elem in title)

### Import files from Local Directories

In [None]:
from google.colab import files
uploaded = files.upload()

Saving sample_400_Data&Headers.xlsx to sample_400_Data&Headers.xlsx


In [None]:
data = pd.read_excel('/content/sample_400_Data&Headers.xlsx')
train_data, test_data, train_label, test_label = train_test_split(data['headline'],data['labels'] ,random_state=104,test_size=0.25, shuffle=True)

In [None]:
train_data = train_data.apply(lambda x: preprocessing(str(x)))
test_data = test_data.apply(lambda x: preprocessing(str(x)))

### DATA PREPARATION

In [None]:
encoder = LabelEncoder()
train_label = encoder.fit_transform(train_label)
test_label = encoder.fit_transform(test_label)

In [None]:
tfidf_vector = TfidfVectorizer()
tfidf_vector.fit(train_data)
train_data_tfidf = tfidf_vector.transform(train_data)
test_data_tfidf = tfidf_vector.transform(test_data)

In [None]:
models = {
    "Random Forest": RandomForestClassifier(),
    "MNB": MultinomialNB(),
    "SVM": svm.SVC(),
    "KNN": KNeighborsClassifier()
}

In [None]:
output = []
for model_name in models:
  models[model_name].fit(train_data_tfidf, train_label)
  output.append(models[model_name].predict(test_data_tfidf))

##Setting test parameters & executing test

In [None]:
import unittest
class Test(unittest.TestCase):
  def __init__(self, testName, output):
    super(Test, self).__init__(testName) 
    self.output = output

  # check if the prediction is an array
  def test_type_prediction(self):
    error_message = "Output of model is not a dinamic array"
    self.assertEqual(type(self.output), np.ndarray, error_message)
  
  # check if the prediction legth is 100
  #   -> (400 headlines * 25% test size = 100)
  def test_length_predictions(self):
    error_message = "The length of the prediction is not the expected"
    self.assertEqual(len(self.output), 100, error_message)

In [None]:
suite = unittest.TestSuite()
output_list = output
for model in models:
  label_prediction = output_list.pop()
  suite.addTest(Test('test_type_prediction', label_prediction))
  suite.addTest(Test('test_length_predictions', label_prediction))
unittest.TextTestRunner(verbosity=3).run(suite)

test_type_prediction (__main__.Test) ... ok
test_length_predictions (__main__.Test) ... ok
test_type_prediction (__main__.Test) ... ok
test_length_predictions (__main__.Test) ... ok
test_type_prediction (__main__.Test) ... ok
test_length_predictions (__main__.Test) ... ok
test_type_prediction (__main__.Test) ... ok
test_length_predictions (__main__.Test) ... ok

----------------------------------------------------------------------
Ran 8 tests in 0.065s

OK


<unittest.runner.TextTestResult run=8 errors=0 failures=0>