In [19]:
import pickle
import spacy
import numpy as np
import pandas as pd
from functools import partial

nlp = spacy.load('en_core_web_sm', parser=True)


def tokenize(text):
    doc = nlp(text)
    tokens = [token for token in doc if not token.is_stop]
    tokens = [token for token in tokens if token.pos_ not in ['PUNCT','SYM','NUM','PART','SPACE']]
    tokens = [token for token in tokens if token.text not in [
        "n't","'h",'m','wh','%','rt',"'s","'ve","'ll",'’re',
        "'m",'&',"'ve","'re",'’ve','’ll','’s','’m','n’t','s.','c.','f.','m.'
    ]]
    tokens = [token.lemma_ for token in tokens]
    return tokens


def top_n_important_tokens(text, tfidf, n):
    
    def get_feature(i, elements):
        return elements[i]
    
    feature_names = np.array(tfidf.get_feature_names())
    vector = tfidf.transform([text])
    importance = vector.toarray()[0]
    tfidf_sorting = np.argsort(vector.toarray()).flatten()[::-1]

    get_features_word = partial(get_feature, elements=feature_names)
    get_features_importance = partial(get_feature, elements=importance)

    df = pd.DataFrame()
    df['words'] = list(map(get_features_word, tfidf_sorting))[:n]
    df['importance'] = list(map(get_features_importance, tfidf_sorting))[:n]
    
    return df



with open(r"tfidf.pk1", "rb") as input_file:
    tfidf = pickle.load(input_file)

In [20]:
tfidf.transform(['hi what aboout this'])

<1x36769 sparse matrix of type '<class 'numpy.float64'>'
	with 1 stored elements in Compressed Sparse Row format>

In [23]:
test = 'Face coverings are no longer compulsory in schools in England or Wales, although they are recommended in crowded spaces like school buses. However, Education Secretary Nadhim Zahawi said mask-wearing could be brought back in England under winter contingency plans. Schools in Cambridgeshire have been asked to reintroduce face coverings after cases surged. Local health officials also recommended that staff resume social distancing within school buildings. They said staff meetings and non-essential events with parents should be held virtually where possible. In Scotland, face coverings are required at least until October half-term. In Northern Ireland, are required in class for at least the first six weeks of term.'

In [24]:
top_n_important_tokens(test, tfidf, 5)

Unnamed: 0,words,importance
0,covering,0.502263
1,recommend,0.253384
2,school,0.219476
3,wearing,0.211263
4,contingency,0.211263
