In [6]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

from lexicon_use import form_text_vector, load_models

from gsitk.preprocess import pprocess_twitter, simple, Preprocesser

pp_pipe = Pipeline([
        ('twitter', Preprocesser(pprocess_twitter)),
        ('simple', Preprocesser(simple)),
])

lrs, ngrams = load_models()

moral_options = ('care', 'fairness', 'loyalty', 'authority', 'purity', 'non-moral')

def predict(text, moral):
    if text:
        text = [text]
    else:
        with open('lines.txt', 'r') as f:
            lines = f.readlines()
        text = [line.strip() for line in lines]
        
    text_processed = pp_pipe.transform(text)
    
    X_uni = ngrams[moral].transform([' '.join(t) for t in text_processed])
    Z = [form_text_vector(t) for t in text_processed]
    Z = np.stack(Z)
    
    X_comb = np.hstack((X_uni.toarray(), Z))
    
    preds = lrs[moral].predict(X_comb)
    return preds



Predict a single text

In [2]:
# example from real tweet
text = '''
PLS help #HASHTAG's family. No one prepares for this. They are in need of any assistance you can offer
'''

predict(text, 'care')

array([1])

It is possible to predict from a file, `lines.txt`. The format of this file is a text to analyze per line.

In [3]:
!cat lines.txt

My cat is happy
I really care for my cat
She hates going to the movies
PLS help #HASHTAG's family. No one prepares for this. They are in need of any assistance you can offer


In [7]:
predict(None, 'care')

array([0, 0, 0, 1])