# KNN nearest neighbors

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import confusion_matrix, balanced_accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))


In [2]:
data = pd.read_csv('./datasets/final.csv')

In [3]:
data.head()

Unnamed: 0,comments,label
0,[Baldinger] .@49ers here is my opening script ...,1
1,[49ers on NBCS] .@frankgore is loving what he’...,1
2,Chiefs fans be like,1
3,deal of the day,1
4,This guy made a really complex Python simulati...,1


In [4]:
X = data['comments']
y = data['label']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=.2,
                                                    random_state=42,
                                                    stratify=y)

In [57]:
pipe = Pipeline([
    ('tvec', CountVectorizer(stop_words=stop_words)),
    ('knn', KNeighborsClassifier(n_neighbors=20,leaf_size=20))
])

In [58]:
pipe.fit(X_train,y_train)
accuracy_knn = pipe.score(X_train,y_train)
print(f'K Nearest Neighbors ACCURACY: {round(accuracy_knn,3)}')

K Nearest Neighbors ACCURACY: 0.553


In [41]:
def total_metrics(insta_model,X_test, y_test):
    preds = insta_model.predict(X_test)
    tn, fp, fn, tp = confusion_matrix(y_test, preds).ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn+fp)
    precision =  tp / (tp + fp)
    bas = balanced_accuracy_score(y_test,preds)
    
    print("\u0332".join("RESULTS OF A KNN MODEL "))
    print('')
    print(f"          Accuracy: {round(accuracy_knn,3)}")
    print('')
    print(f" Balance Accuracy: {round(bas,3)}")
    print('')
    print(f"       Sensitivity: {round(sensitivity,3)}")
    print('')
    print(f"       Specificity: {round(specificity,3)}")
    print('')
    print(f"         Precision: {round(precision,3)}")
    print('')    
    
    

In [42]:
total_metrics(pipe,X_test,y_test)

R̲E̲S̲U̲L̲T̲S̲ ̲O̲F̲ ̲A̲ ̲K̲N̲N̲ ̲M̲O̲D̲E̲L̲ 

          Accuracy: 0.553

 Balance Accuracy: 0.53

       Sensitivity: 0.975

       Specificity: 0.085

         Precision: 0.517



In [43]:
pipe.score(X_train,y_train)

0.5526315789473685

In [44]:
pipe.score(X_test,y_test)

0.531328320802005