In [12]:
from pickle import load
from tensorflow.keras.models import load_model
import spacy
from pandas import read_csv,DataFrame
from numpy import vectorize
nlp = spacy.load('en_core_web_sm')

In [2]:
cat_model = load_model('../Sup/categorical_model.h5')
cat_model.load_weights('../Sup/categorical_model_weights.h5')

# loading tokenizer
with open('../Sup/categorical_tokenizer.pickle', 'rb') as handle:
    cat_tokenizer = load(handle)


sen_model = load_model('../Sup/sentimental_model.h5')
sen_model.load_weights('../Sup/sentimental_model_weights.h5')

# loading tokenizer
with open('../Sup/sentimental_tokenizer.pickle', 'rb') as handle:
    sen_tokenizer = load(handle)


In [3]:
categories = ['DIRECTING PERFORMANCE', 'CAST PERFORMANCE']

In [33]:
def enc_1(x):
    if x<0.5:
        return 'DIRECTING PERFORMANCE'
    else:
        return 'CAST PERFORMANCE'

def enc_2(x):
    if x<0.5:
        return 'negative'
    else:
        return 'positive'
encoder_1 = vectorize(enc_1)
encoder_2 = vectorize(enc_2)

def predictions(csv_path):
    data = read_csv(csv_path)
    reviews = data['review']

    test_reviews = [review.lower() for review in reviews]
    test_aspect_terms = []
    for review in nlp.pipe(test_reviews):
        chunks = [(chunk.root.text) for chunk in review.noun_chunks if chunk.root.pos_ == 'NOUN']
        test_aspect_terms.append(' '.join(chunks))
    test_aspect_terms = DataFrame(cat_tokenizer.texts_to_matrix(test_aspect_terms))

    # Sentiment preprocessing
    test_sentiment_terms = []
    for review in nlp.pipe(test_reviews):
        if review.is_parsed:
            test_sentiment_terms.append(' '.join([token.lemma_ for token in review if (
                        not token.is_stop and not token.is_punct and (token.pos_ == "ADJ" or token.pos_ == "VERB"))]))
        else:
            test_sentiment_terms.append('')
    test_sentiment_terms = DataFrame(sen_tokenizer.texts_to_matrix(test_sentiment_terms))

    test_aspect_categories = encoder_1(cat_model.predict(test_aspect_terms))
    test_sentiment = encoder_2(sen_model.predict(test_sentiment_terms))

    aspect_categories = list(test_aspect_terms)
    sentiment = list(test_sentiment_terms)

    sen_column = DataFrame(test_sentiment)
    cat_column = DataFrame(test_aspect_categories)
    data['sentiment_results'] = sen_column
    data['categorical_result'] = cat_column

    categorized = [[],[]]
    for ind,i in enumerate(aspect_categories):
        if i > 0.5:
            categorized[0].append(ind)
        else:
            categorized[1].append(ind)


    result = {}
    for ind in range(len(categorized)):
        lis = categorized[ind]
        tol = len(lis)
        pos = 0
        for val in lis:
            if sentiment[val] > 0.5:
                pos+=1
        result[categories[ind]] = pos/tol if tol != 0 else 0.0

    print(result)

    return data,result

In [34]:
predictions('../Sup/test.csv')

{'DIRECTING PERFORMANCE': 1.0, 'CAST PERFORMANCE': 0.0}


(                                               review sentiment_results  \
 0   One of the other reviewers has mentioned that ...          positive   
 1   A wonderful little production. <br /><br />The...          positive   
 2   I thought this was a wonderful way to spend ti...          positive   
 3   Basically there's a family where a little boy ...          positive   
 4   Petter Mattei's "Love in the Time of Money" is...          positive   
 5   Probably my all-time favorite movie, a story o...          positive   
 6   I sure would like to see a resurrection of a u...          positive   
 7   This show was an amazing, fresh & innovative i...          negative   
 8   Encouraged by the positive comments about this...          negative   
 9   If you like original gut wrenching laughter yo...          positive   
 10  Phil the Alien is one of those quirky films wh...          negative   
 11  I saw this movie when I was about 12 when it c...          negative   
 12  So im n