In [None]:
import IPython.core.display as di

di.display_html("""
$('<style>.code_cell { margin-bottom: 80px !important;}</style>').appendTo('head');
""", raw=True)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer,CountVectorizer
from sklearn.neighbors import KNeighborsClassifier
from nltk.stem.snowball import SnowballStemmer
english_stemmer = SnowballStemmer("english")
class StemmedTfidfVectorizer(TfidfVectorizer):
    def build_analyzer(self):
        analyzer = super().build_analyzer()
        return lambda doc: ([english_stemmer.stem(w) for w in analyzer(doc)])


### Generate KNN Classifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier

# generates and fits classifier to training dataset
classifier = KNeighborsClassifier(n_neighbors=12)

### Load data from TF-IDF Vectorizer using pickle

In [None]:
import pickle

# fit vectorizer
with open("model/idf_fit_vectorizer.pkl", "rb") as idf_fit_file:
    fit_vectorizer = pickle.load(idf_fit_file)

# training data
with open("model/train_transform.pkl", "rb") as train_transform_file:
    train_transform = pickle.load(train_transform_file)
    
with open("model/train_labels.pkl", "rb") as train_labels_file:
    train_labels = pickle.load(train_labels_file)

    
# testing data
with open("model/test_transform.pkl", "rb") as test_transform_file:
    test_transform = pickle.load(test_transform_file)

with open("model/test_labels.pkl", "rb") as test_labels_file:
    test_labels = pickle.load(test_labels_file)

### Generates the classifier model based on labels

In [None]:
classifier.fit(train_transform, train_labels)

### Verify accuracy

In [None]:
def calculate_score(prediction, test_labels):
    return sum([1 if subject1 == subject2 else 0 for subject1, subject2 in zip(prediction, test_labels)]) / len(prediction)

def pretty_list(data):
    for i in range(0, len(data), 5):
        print(f"{data[i]:10} {data[i+1]:10} {data[i+2]:10} {data[i+3]:10} {data[i+4]:10}")

In [None]:
prediction = classifier.predict(test_transform)
pretty_list(prediction)

In [None]:
pretty_list(test_labels)

In [None]:
print(f"The accuracy of our model is {100 * calculate_score(prediction, test_labels)}%")

### Test your own predictions!

In [None]:
def classify_text(text, classifier):
    return classifier.predict(fit_vectorizer.transform([text]))[0]



In [None]:
print(classify_text("Mammals are amazing creatures!", classifier))
print(classify_text("Nike has a very interesting marketing technique", classifier))
print(classify_text("Graphs "))