In [1]:
from joblib import load, dump
from gensim.models import Word2Vec
from gensim.models.doc2vec import Doc2Vec
import numpy as np


def load_classifier(model='tfidf'):

    # Load and return the classifier
    if model == 'tfidf':
        print('Loading TFiDF Classifier')
        classifier = load('../code/classifier/models/RF_TFiDF.joblib')
    elif model == 'word2vec':
        print('Loading word2vec Classifier')
        classifier = load('../code/classifier/models/RF_word2vec.joblib')
    elif model == 'doc2vec':
        print('Loading doc2vec Classifier')
        classifier = load('../code/classifier/models/RF_doc2vec.joblib')
    else:
        print('No Model {}'.format(model))
        classifier = None

    return classifier


def load_embedder(model='tfidf'):

    # Load and return the classifier
    if model == 'tfidf':
        print('Loading TFiDF Embedder')
        embedder = load('../code/embedding/models/TF-IFD-ticket-ques.joblib')
    elif model == 'word2vec':
        print('Loading word2vec Embedding')
        embedder = Word2Vec.load('../code/embedding/models/word2vec_ticket_ques.model')
    elif model == 'doc2vec':
        print('Loading doc2vec Embedding')
        embedder = Doc2Vec.load('../code/embedding/models/doc2vec_ticket_ques.model')
    else:
        print('No Model {}'.format(model))
        embedder = None

    return embedder

def predict(text, embedder, classifier, model = 'tfidf'):

    if model =='tfidf':

        embedding = embedder.transform([text])
        probs = classifier.predict_proba(embedding)

        return probs

    # TODO: other model types
    else:
        print('Model {} not found'.format(model))
        return None





In [2]:
embedder = load_embedder(model='tfidf')
classifier = load_classifier(model='tfidf')

Loading TFiDF Embedder
Loading TFiDF Classifier


In [5]:
text = """
"Dear Dr. Sebastiano Rossi   Your vpn service is already expired.  sincerely, Rudolf Friederich  Dear Dr. Sebastiano Rossi   Your vpn-service is ready. Probably you have to change or set the network password at password.ethz.ch.  sincerely, Rudolf Friederich  "
"""
predict(text, embedder, classifier, model='tfidf')




array([[0.8, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0.1, 0. , 0.1, 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
        0. ]])

In [6]:
classifier.classes_

array([ -1,   0,   1,   2,   3,   4,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  18,  19,  21,  22,  23,  24,  27,  28,  29,
        31,  32,  33,  35,  36,  37,  38,  40,  41,  42,  46,  49,  51,
        52,  53,  54,  56,  57,  61,  62,  63,  64,  66,  67,  68,  70,
        72,  73,  76,  78,  79,  80,  84,  85,  86,  87,  88,  89,  90,
        91,  93,  94,  95,  96,  99, 100, 101, 102, 103, 105, 106, 107,
       108, 110, 111, 114, 117, 121, 122, 123, 124, 126, 127, 129, 130,
       131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
       144, 146, 147, 148, 153, 154, 155, 156, 157, 159, 161, 162, 163,
       164, 165, 167, 168, 171, 172, 173, 174, 175, 176, 177, 178, 179,
       180, 181, 183, 185, 186, 188, 189, 190, 191, 192, 193, 195, 196,
       197])

In [7]:
classifier.predict(embedder.transform([text]))

array([-1])