In [1]:
import numpy as np
import pandas as pd
from keras.utils import to_categorical
from keras import models
from keras import layers

In [12]:
import re
import string

# # download missing resource
# import nltk
# nltk.download("stopwords")

from nltk.corpus import stopwords
from emot.emo_unicode import UNICODE_EMOJI  # for emojis
from emot.emo_unicode import EMOTICONS_EMO  # for emoticons

# Converting emojis to words
def convert_emojis(text):
    for i in text:
        if i in UNICODE_EMOJI.keys():
            try:
                text = text.replace(i, f' {"_".join(UNICODE_EMOJI[i].replace(",","").replace(":","").split())}')
            except:
                continue
    
    return text


# Converting emoticons to words    
def convert_emoticons(text):
    for i in text.split():
        if i in EMOTICONS_EMO.keys():
            text = text.replace(i, "_".join(EMOTICONS_EMO[i].replace(",","").split()))
    return text


# Function for removing urls
def remove_urls(text):
    url_pattern = re.compile(r'https?://\S+|www\.\S+')
    return url_pattern.sub(r'', text)


# custom punctuation
punctuations = string.punctuation + "´‘’“”…–€«»"

# Function to replace/remove junk seen during manual labelling
def dejunk(text):
    text = re.sub(r"𝓟𝓻𝓲𝓬𝓮𝓵𝓮𝓼𝓼 𝓲𝓼 𝓽𝓱𝓮 𝓶𝓸𝓼𝓽 𝓿𝓪𝓵𝓾𝓪𝓫𝓵𝓮 𝓪𝓼𝓼𝓮𝓽 𝓽𝓸 𝓸𝔀𝓷 𝓲𝓯 𝔂𝓸𝓾 𝓪𝓻𝓮 𝓵𝓾𝓬𝓴𝔂 𝓮𝓷𝓸𝓾𝓰𝓱 𝓽𝓸 𝓫𝓮 𝓪𝓫𝓵𝓮 𝓽𝓸 𝓪𝓬𝓺𝓾𝓲𝓻𝓮 𝓲𝓽", 
                  "priceless is the most valuable asset to own if you are lucky enough to be able to acquire it", text)
    
    text = re.sub(r"&amp;", "and", text)  # &
    text = re.sub(r"&gt;", "", text)  # >
    text = re.sub(r"&lt;", "", text)  # <
    text = re.sub(r"1⃣", "1. ", text)
    text = re.sub(r"2⃣", "2. ", text)
    text = re.sub(r"3⃣", "3. ", text)
    text = re.sub(r"4⃣", "4. ", text)

    return text

def decontract(text):
    text = re.sub(r"let’s", "let us", text)
    text = re.sub(r"let's", "let us", text)

    text = re.sub(r"’m", " am", text)
    text = re.sub(r"'m", " am", text)
    text = re.sub(r"’re", " are", text)
    text = re.sub(r"'re", " are", text)
    text = re.sub(r"’ll", " will", text)
    text = re.sub(r"'ll", " will", text)
    text = re.sub(r"’d", " would", text)
    text = re.sub(r"'d", " would", text)
    text = re.sub(r"’ve", " have", text)
    text = re.sub(r"'ve", " have", text)

    text = re.sub(r"it’s", "it is", text)
    text = re.sub(r"it's", "it is", text)
    text = re.sub(r"that’s", "that is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"there’s", "there is", text)
    text = re.sub(r"there's", "there is", text)
    text = re.sub(r"he’s", "he is", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she’s", "she is", text)
    text = re.sub(r"she's", "she is", text)

    text = re.sub(r"won’t", "will not", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can’t", "can not", text)
    text = re.sub(r"can't", "can not", text)
    text = re.sub(r"shan’t", "shall not", text)
    text = re.sub(r"shan't", "shall not", text)
    # isn't, aren't, don't, wouldn't, couldn't, shouldn't, haven't, hasn't, hadn't
    text = re.sub(r"n’t", " not", text)
    text = re.sub(r"n't", " not", text)

    return text

#making a text-cleaning function
def preprocess_text(text):
    
    #convert lowercase
    cleaned_text = text.lower()

    # Replace/remove junk text
    cleaned_text = dejunk(cleaned_text)
    
    #convert emoji into words
    cleaned_text = convert_emojis(cleaned_text)
    
    # convert emoticons into words
    cleaned_text = convert_emoticons(cleaned_text)
    
    # remove urls
    cleaned_text = remove_urls(cleaned_text)
    
    #remove punctuations
    nopunc = [char for char in cleaned_text if char not in punctuations]
    nopunc = ''.join(nopunc)
    
    #remove stopwords
    clean_words = [word for word in nopunc.split() if word.lower() not in stopwords.words('english')]

    #return cleaned text
    return ' '.join(clean_words)

#making a text-cleaning function
def preprocess_text_v2(text):
    
    #convert lowercase
    cleaned_text = text.lower()

    # Replace/remove junk text
    cleaned_text = dejunk(cleaned_text)
    
    #convert emoji into words
    cleaned_text = convert_emojis(cleaned_text)
    
    # convert emoticons into words
    cleaned_text = convert_emoticons(cleaned_text)
    
    # remove urls
    cleaned_text = remove_urls(cleaned_text)

    # Decontract
    cleaned_text = decontract(cleaned_text)
    
    #remove punctuations
    nopunc = [char for char in cleaned_text if char not in punctuations]
    nopunc = ''.join(nopunc)
    
    return nopunc

In [2]:
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
porter = PorterStemmer()
def stem_text(text):
    
    #stem
    stemmed_sentence = [porter.stem(word) for word in word_tokenize(text)]
    
    #return cleaned text
    return ' '.join(stemmed_sentence)

In [11]:
df0 = pd.read_csv('Labeled Data/data_labelling_comments.csv', header=0,)
df0 = df0[["text", "label"]]
df1= pd.read_csv('Labeled Data/data_labelling_subs.csv', header=0,)
df1 = df1[["title", "label"]]
df1 = df1.rename(columns={'title': 'text'})
df2= pd.read_csv('Labeled Data/data_labelling_tweets.csv', header=0,)
df2 = df2[["text", "label"]]
df = (df0.append(df1)).append(df2)
df

  df = (df0.append(df1)).append(df2)


Unnamed: 0,text,label
0,"Mmmm yes, forward thinking, like underground t...",-1
1,Probably not. It's like if attaching horse to ...,0
2,Cool in any decade ever! A rocket going to spa...,1
3,"He is on of the most famous people out there, ...",1
4,The **average** rent for an apartment in Los A...,1
...,...,...
595,@elonmusk Finish Neuralink so humans can compete.,0
596,"I hated every minute of training, but I said, ...",1
597,Wth is this @elonmusk 😭😭 https://t.co/hbXL3WZpMf,-1
598,@DerpyMudkip02 @DasNaga1 @JoeBiden @MichelleOb...,0


In [13]:
# Cleaned text without stopwords
df["cleaned_text"] = df["text"].apply(preprocess_text)

df

Unnamed: 0,text,label,cleaned_text
0,"Mmmm yes, forward thinking, like underground t...",-1,mmmm yes forward thinking like underground tub...
1,Probably not. It's like if attaching horse to ...,0,probably like attaching horse automobile would...
2,Cool in any decade ever! A rocket going to spa...,1,cool decade ever rocket going space come would...
3,"He is on of the most famous people out there, ...",1,famous people like steve jobs never used among...
4,The **average** rent for an apartment in Los A...,1,average rent apartment los angeles 2786 month ...
...,...,...,...
595,@elonmusk Finish Neuralink so humans can compete.,0,elonmusk finish neuralink humans compete
596,"I hated every minute of training, but I said, ...",1,hated every minute training said dont quit suf...
597,Wth is this @elonmusk 😭😭 https://t.co/hbXL3WZpMf,-1,wth elonmusk loudlycryingface loudlycryingface
598,@DerpyMudkip02 @DasNaga1 @JoeBiden @MichelleOb...,0,derpymudkip02 dasnaga1 joebiden michelleobama ...


In [14]:
df["stem"] = df["cleaned_text"].astype("str").apply(stem_text)
df

Unnamed: 0,text,label,cleaned_text,stem
0,"Mmmm yes, forward thinking, like underground t...",-1,mmmm yes forward thinking like underground tub...,mmmm ye forward think like underground tube ca...
1,Probably not. It's like if attaching horse to ...,0,probably like attaching horse automobile would...,probabl like attach hors automobil would save ...
2,Cool in any decade ever! A rocket going to spa...,1,cool decade ever rocket going space come would...,cool decad ever rocket go space come would imp...
3,"He is on of the most famous people out there, ...",1,famous people like steve jobs never used among...,famou peopl like steve job never use among peo...
4,The **average** rent for an apartment in Los A...,1,average rent apartment los angeles 2786 month ...,averag rent apart lo angel 2786 month tesla lo...
...,...,...,...,...
595,@elonmusk Finish Neuralink so humans can compete.,0,elonmusk finish neuralink humans compete,elonmusk finish neuralink human compet
596,"I hated every minute of training, but I said, ...",1,hated every minute training said dont quit suf...,hate everi minut train said dont quit suffer l...
597,Wth is this @elonmusk 😭😭 https://t.co/hbXL3WZpMf,-1,wth elonmusk loudlycryingface loudlycryingface,wth elonmusk loudlycryingfac loudlycryingfac
598,@DerpyMudkip02 @DasNaga1 @JoeBiden @MichelleOb...,0,derpymudkip02 dasnaga1 joebiden michelleobama ...,derpymudkip02 dasnaga1 joebiden michelleobama ...


In [16]:
df.to_csv("full_lab.csv")

<h1> raw text

In [15]:
df_text = df[["text", "label"]]
df_text

Unnamed: 0,text,label
0,"Mmmm yes, forward thinking, like underground t...",-1
1,Probably not. It's like if attaching horse to ...,0
2,Cool in any decade ever! A rocket going to spa...,1
3,"He is on of the most famous people out there, ...",1
4,The **average** rent for an apartment in Los A...,1
...,...,...
595,@elonmusk Finish Neuralink so humans can compete.,0
596,"I hated every minute of training, but I said, ...",1
597,Wth is this @elonmusk 😭😭 https://t.co/hbXL3WZpMf,-1
598,@DerpyMudkip02 @DasNaga1 @JoeBiden @MichelleOb...,0


<h1> Neutral vs opiniated

In [3]:
import pickle

# loading
with open('tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

In [18]:
X = df_text['text'].astype("str")
pred_df = pd.DataFrame({'pred' : []})
y = df_text['label']

In [3]:
ood_model = models.load_model("ood_model_small_stem.h5")

In [5]:
bleh = ["I love this module it is wonderful"]
t = tokenizer.texts_to_matrix(bleh, mode='count')
pred = ood_model.predict(t, verbose=0)
print(pred[0])

[0.9997962]


In [20]:
#count = 0
predictions = []
for i in X:
    t = [str(i)]
    #print(t)
    t = tokenizer.texts_to_matrix(t, mode='count')
    #print(t.shape[1])
    pred = ood_model.predict(t, verbose=0)
    
    if (pred[0] < 0.5):
        predictions.append(0)
    elif (pred[0] >= 0.5):
        predictions.append(1)
    
#    if count == 5:
#        break
#    count+=1
print(predictions)

[0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 

In [22]:
df['pred'] = predictions
df

Unnamed: 0,text,label,cleaned_text,stem,pred
0,"Mmmm yes, forward thinking, like underground t...",-1,mmmm yes forward thinking like underground tub...,mmmm ye forward think like underground tube ca...,0
1,Probably not. It's like if attaching horse to ...,0,probably like attaching horse automobile would...,probabl like attach hors automobil would save ...,0
2,Cool in any decade ever! A rocket going to spa...,1,cool decade ever rocket going space come would...,cool decad ever rocket go space come would imp...,1
3,"He is on of the most famous people out there, ...",1,famous people like steve jobs never used among...,famou peopl like steve job never use among peo...,1
4,The **average** rent for an apartment in Los A...,1,average rent apartment los angeles 2786 month ...,averag rent apart lo angel 2786 month tesla lo...,1
...,...,...,...,...,...
595,@elonmusk Finish Neuralink so humans can compete.,0,elonmusk finish neuralink humans compete,elonmusk finish neuralink human compet,0
596,"I hated every minute of training, but I said, ...",1,hated every minute training said dont quit suf...,hate everi minut train said dont quit suffer l...,1
597,Wth is this @elonmusk 😭😭 https://t.co/hbXL3WZpMf,-1,wth elonmusk loudlycryingface loudlycryingface,wth elonmusk loudlycryingfac loudlycryingfac,0
598,@DerpyMudkip02 @DasNaga1 @JoeBiden @MichelleOb...,0,derpymudkip02 dasnaga1 joebiden michelleobama ...,derpymudkip02 dasnaga1 joebiden michelleobama ...,0


<h1> Pos v Negative

In [4]:
# loading
with open('tokenizer_opi.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

In [5]:
ood_model = models.load_model("ood_opi_model_small_stem.h5")

In [11]:
bleh = ["I hate this food it is not nice"]
t = tokenizer.texts_to_matrix(bleh, mode='count')
pred = ood_model.predict(t, verbose=0)
print(pred[0])

[0.01595408]


In [25]:
#count = 0
predictions = []
for index, row in df_text.iterrows():
#    print(row['text'], row['label'])
    if (row['label'] == 0):
        predictions.append("na")
    else:
        i = row['text']
        t = [str(i)]
        t = tokenizer.texts_to_matrix(t, mode='count')
#        print(t.shape[1])
        pred = ood_model.predict(t, verbose=0)
#        print(pred)
        if (pred[0] < 0.5):
            predictions.append(0)
        elif (pred[0] >= 0.5):
            predictions.append(1)    
#     if count == 5:
#         break
#     count+=1
print(predictions)

[1, 'na', 1, 1, 0, 0, 0, 1, 0, 1, 0, 'na', 1, 'na', 'na', 1, 0, 'na', 1, 0, 'na', 0, 1, 0, 1, 'na', 'na', 'na', 'na', 'na', 1, 'na', 0, 0, 0, 1, 1, 'na', 1, 'na', 'na', 0, 0, 'na', 'na', 'na', 'na', 1, 0, 1, 1, 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 0, 'na', 'na', 'na', 1, 0, 1, 0, 'na', 'na', 1, 'na', 'na', 1, 'na', 'na', 'na', 1, 'na', 1, 1, 'na', 1, 'na', 'na', 'na', 'na', 'na', 'na', 1, 1, 0, 'na', 'na', 'na', 1, 'na', 0, 1, 'na', 'na', 'na', 'na', 1, 0, 'na', 'na', 'na', 1, 'na', 0, 1, 'na', 'na', 'na', 'na', 1, 1, 1, 'na', 'na', 'na', 'na', 'na', 1, 'na', 'na', 'na', 1, 'na', 'na', 1, 0, 0, 0, 'na', 'na', 1, 'na', 'na', 'na', 'na', 0, 0, 'na', 'na', 1, 'na', 'na', 'na', 0, 'na', 0, 'na', 'na', 'na', 1, 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 1, 'na', 0, 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 0, 1, 1, 'na', 'na', 'na', 'na', 'na', 'na', 'na', 1, 'na', 0, 'na', 0, 0, 0, 1, 0, 'na', 0, 1, 'na', 1, 0, 'na', 0, 'na', 1, 'na', 1, 0, 'na', 'na

In [26]:
df['pred_opi'] = predictions
df

Unnamed: 0,text,label,cleaned_text,stem,pred,pred_opi
0,"Mmmm yes, forward thinking, like underground t...",-1,mmmm yes forward thinking like underground tub...,mmmm ye forward think like underground tube ca...,0,1
1,Probably not. It's like if attaching horse to ...,0,probably like attaching horse automobile would...,probabl like attach hors automobil would save ...,0,na
2,Cool in any decade ever! A rocket going to spa...,1,cool decade ever rocket going space come would...,cool decad ever rocket go space come would imp...,1,1
3,"He is on of the most famous people out there, ...",1,famous people like steve jobs never used among...,famou peopl like steve job never use among peo...,1,1
4,The **average** rent for an apartment in Los A...,1,average rent apartment los angeles 2786 month ...,averag rent apart lo angel 2786 month tesla lo...,1,0
...,...,...,...,...,...,...
595,@elonmusk Finish Neuralink so humans can compete.,0,elonmusk finish neuralink humans compete,elonmusk finish neuralink human compet,0,na
596,"I hated every minute of training, but I said, ...",1,hated every minute training said dont quit suf...,hate everi minut train said dont quit suffer l...,1,1
597,Wth is this @elonmusk 😭😭 https://t.co/hbXL3WZpMf,-1,wth elonmusk loudlycryingface loudlycryingface,wth elonmusk loudlycryingfac loudlycryingfac,0,1
598,@DerpyMudkip02 @DasNaga1 @JoeBiden @MichelleOb...,0,derpymudkip02 dasnaga1 joebiden michelleobama ...,derpymudkip02 dasnaga1 joebiden michelleobama ...,0,na


In [27]:
df.to_csv('Accuracy/df_text_predictions.csv')

<h1> For cleaned text

In [28]:
df = df.drop(columns=['pred', 'pred_opi'])
df

Unnamed: 0,text,label,cleaned_text,stem
0,"Mmmm yes, forward thinking, like underground t...",-1,mmmm yes forward thinking like underground tub...,mmmm ye forward think like underground tube ca...
1,Probably not. It's like if attaching horse to ...,0,probably like attaching horse automobile would...,probabl like attach hors automobil would save ...
2,Cool in any decade ever! A rocket going to spa...,1,cool decade ever rocket going space come would...,cool decad ever rocket go space come would imp...
3,"He is on of the most famous people out there, ...",1,famous people like steve jobs never used among...,famou peopl like steve job never use among peo...
4,The **average** rent for an apartment in Los A...,1,average rent apartment los angeles 2786 month ...,averag rent apart lo angel 2786 month tesla lo...
...,...,...,...,...
595,@elonmusk Finish Neuralink so humans can compete.,0,elonmusk finish neuralink humans compete,elonmusk finish neuralink human compet
596,"I hated every minute of training, but I said, ...",1,hated every minute training said dont quit suf...,hate everi minut train said dont quit suffer l...
597,Wth is this @elonmusk 😭😭 https://t.co/hbXL3WZpMf,-1,wth elonmusk loudlycryingface loudlycryingface,wth elonmusk loudlycryingfac loudlycryingfac
598,@DerpyMudkip02 @DasNaga1 @JoeBiden @MichelleOb...,0,derpymudkip02 dasnaga1 joebiden michelleobama ...,derpymudkip02 dasnaga1 joebiden michelleobama ...


In [29]:
# loading
with open('tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)
X = df['cleaned_text'].astype("str")
pred_df = pd.DataFrame({'pred' : []})
ood_model = models.load_model("ood_model_small_stem.h5")
#count = 0
predictions = []
for i in X:
    t = [str(i)]
    #print(t)
    t = tokenizer.texts_to_matrix(t, mode='count')
    #print(t.shape[1])
    pred = ood_model.predict(t, verbose=0)
    
    if (pred[0] < 0.5):
        predictions.append(0)
    elif (pred[0] >= 0.5):
        predictions.append(1)
    
#    if count == 5:
#        break
#    count+=1
#print(predictions)
df['pred'] = predictions
df

Unnamed: 0,text,label,cleaned_text,stem,pred
0,"Mmmm yes, forward thinking, like underground t...",-1,mmmm yes forward thinking like underground tub...,mmmm ye forward think like underground tube ca...,0
1,Probably not. It's like if attaching horse to ...,0,probably like attaching horse automobile would...,probabl like attach hors automobil would save ...,0
2,Cool in any decade ever! A rocket going to spa...,1,cool decade ever rocket going space come would...,cool decad ever rocket go space come would imp...,1
3,"He is on of the most famous people out there, ...",1,famous people like steve jobs never used among...,famou peopl like steve job never use among peo...,1
4,The **average** rent for an apartment in Los A...,1,average rent apartment los angeles 2786 month ...,averag rent apart lo angel 2786 month tesla lo...,1
...,...,...,...,...,...
595,@elonmusk Finish Neuralink so humans can compete.,0,elonmusk finish neuralink humans compete,elonmusk finish neuralink human compet,0
596,"I hated every minute of training, but I said, ...",1,hated every minute training said dont quit suf...,hate everi minut train said dont quit suffer l...,1
597,Wth is this @elonmusk 😭😭 https://t.co/hbXL3WZpMf,-1,wth elonmusk loudlycryingface loudlycryingface,wth elonmusk loudlycryingfac loudlycryingfac,0
598,@DerpyMudkip02 @DasNaga1 @JoeBiden @MichelleOb...,0,derpymudkip02 dasnaga1 joebiden michelleobama ...,derpymudkip02 dasnaga1 joebiden michelleobama ...,0


In [31]:
# loading
with open('tokenizer_opi.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)
ood_model = models.load_model("ood_opi_model_small_stem.h5")
#count = 0
predictions = []
for index, row in df.iterrows():
#    print(row['text'], row['label'])
    if (row['label'] == 0):
        predictions.append("na")
    else:
        i = row['cleaned_text']
        t = [str(i)]
        t = tokenizer.texts_to_matrix(t, mode='count')
#        print(t.shape[1])
        pred = ood_model.predict(t, verbose=0)
#        print(pred)
        if (pred[0] < 0.5):
            predictions.append(0)
        elif (pred[0] >= 0.5):
            predictions.append(1)    
#     if count == 5:
#         break
#     count+=1
#print(predictions)
df['pred_opi'] = predictions
df

Unnamed: 0,text,label,cleaned_text,stem,pred,pred_opi
0,"Mmmm yes, forward thinking, like underground t...",-1,mmmm yes forward thinking like underground tub...,mmmm ye forward think like underground tube ca...,0,0
1,Probably not. It's like if attaching horse to ...,0,probably like attaching horse automobile would...,probabl like attach hors automobil would save ...,0,na
2,Cool in any decade ever! A rocket going to spa...,1,cool decade ever rocket going space come would...,cool decad ever rocket go space come would imp...,1,1
3,"He is on of the most famous people out there, ...",1,famous people like steve jobs never used among...,famou peopl like steve job never use among peo...,1,1
4,The **average** rent for an apartment in Los A...,1,average rent apartment los angeles 2786 month ...,averag rent apart lo angel 2786 month tesla lo...,1,0
...,...,...,...,...,...,...
595,@elonmusk Finish Neuralink so humans can compete.,0,elonmusk finish neuralink humans compete,elonmusk finish neuralink human compet,0,na
596,"I hated every minute of training, but I said, ...",1,hated every minute training said dont quit suf...,hate everi minut train said dont quit suffer l...,1,1
597,Wth is this @elonmusk 😭😭 https://t.co/hbXL3WZpMf,-1,wth elonmusk loudlycryingface loudlycryingface,wth elonmusk loudlycryingfac loudlycryingfac,0,1
598,@DerpyMudkip02 @DasNaga1 @JoeBiden @MichelleOb...,0,derpymudkip02 dasnaga1 joebiden michelleobama ...,derpymudkip02 dasnaga1 joebiden michelleobama ...,0,na


In [32]:
df.to_csv('Accuracy/df_cleaned_text_predictions.csv')

<h1> On stemmed words

In [33]:
df = df.drop(columns=['pred', 'pred_opi'])
df

Unnamed: 0,text,label,cleaned_text,stem
0,"Mmmm yes, forward thinking, like underground t...",-1,mmmm yes forward thinking like underground tub...,mmmm ye forward think like underground tube ca...
1,Probably not. It's like if attaching horse to ...,0,probably like attaching horse automobile would...,probabl like attach hors automobil would save ...
2,Cool in any decade ever! A rocket going to spa...,1,cool decade ever rocket going space come would...,cool decad ever rocket go space come would imp...
3,"He is on of the most famous people out there, ...",1,famous people like steve jobs never used among...,famou peopl like steve job never use among peo...
4,The **average** rent for an apartment in Los A...,1,average rent apartment los angeles 2786 month ...,averag rent apart lo angel 2786 month tesla lo...
...,...,...,...,...
595,@elonmusk Finish Neuralink so humans can compete.,0,elonmusk finish neuralink humans compete,elonmusk finish neuralink human compet
596,"I hated every minute of training, but I said, ...",1,hated every minute training said dont quit suf...,hate everi minut train said dont quit suffer l...
597,Wth is this @elonmusk 😭😭 https://t.co/hbXL3WZpMf,-1,wth elonmusk loudlycryingface loudlycryingface,wth elonmusk loudlycryingfac loudlycryingfac
598,@DerpyMudkip02 @DasNaga1 @JoeBiden @MichelleOb...,0,derpymudkip02 dasnaga1 joebiden michelleobama ...,derpymudkip02 dasnaga1 joebiden michelleobama ...


In [34]:
# loading
with open('tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)
X = df['stem'].astype("str")
pred_df = pd.DataFrame({'pred' : []})
ood_model = models.load_model("ood_model_small_stem.h5")
#count = 0
predictions = []
for i in X:
    t = [str(i)]
    #print(t)
    t = tokenizer.texts_to_matrix(t, mode='count')
    #print(t.shape[1])
    pred = ood_model.predict(t, verbose=0)
    
    if (pred[0] < 0.5):
        predictions.append(0)
    elif (pred[0] >= 0.5):
        predictions.append(1)
    
#    if count == 5:
#        break
#    count+=1
#print(predictions)
df['pred'] = predictions
df

Unnamed: 0,text,label,cleaned_text,stem,pred
0,"Mmmm yes, forward thinking, like underground t...",-1,mmmm yes forward thinking like underground tub...,mmmm ye forward think like underground tube ca...,0
1,Probably not. It's like if attaching horse to ...,0,probably like attaching horse automobile would...,probabl like attach hors automobil would save ...,1
2,Cool in any decade ever! A rocket going to spa...,1,cool decade ever rocket going space come would...,cool decad ever rocket go space come would imp...,1
3,"He is on of the most famous people out there, ...",1,famous people like steve jobs never used among...,famou peopl like steve job never use among peo...,1
4,The **average** rent for an apartment in Los A...,1,average rent apartment los angeles 2786 month ...,averag rent apart lo angel 2786 month tesla lo...,1
...,...,...,...,...,...
595,@elonmusk Finish Neuralink so humans can compete.,0,elonmusk finish neuralink humans compete,elonmusk finish neuralink human compet,0
596,"I hated every minute of training, but I said, ...",1,hated every minute training said dont quit suf...,hate everi minut train said dont quit suffer l...,1
597,Wth is this @elonmusk 😭😭 https://t.co/hbXL3WZpMf,-1,wth elonmusk loudlycryingface loudlycryingface,wth elonmusk loudlycryingfac loudlycryingfac,0
598,@DerpyMudkip02 @DasNaga1 @JoeBiden @MichelleOb...,0,derpymudkip02 dasnaga1 joebiden michelleobama ...,derpymudkip02 dasnaga1 joebiden michelleobama ...,0


In [35]:
# loading
with open('tokenizer_opi.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)
ood_model = models.load_model("ood_opi_model_small_stem.h5")
#count = 0
predictions = []
for index, row in df.iterrows():
#    print(row['text'], row['label'])
    if (row['label'] == 0):
        predictions.append("na")
    else:
        i = row['stem']
        t = [str(i)]
        t = tokenizer.texts_to_matrix(t, mode='count')
#        print(t.shape[1])
        pred = ood_model.predict(t, verbose=0)
#        print(pred)
        if (pred[0] < 0.5):
            predictions.append(0)
        elif (pred[0] >= 0.5):
            predictions.append(1)    
#     if count == 5:
#         break
#     count+=1
#print(predictions)
df['pred_opi'] = predictions
df

Unnamed: 0,text,label,cleaned_text,stem,pred,pred_opi
0,"Mmmm yes, forward thinking, like underground t...",-1,mmmm yes forward thinking like underground tub...,mmmm ye forward think like underground tube ca...,0,0
1,Probably not. It's like if attaching horse to ...,0,probably like attaching horse automobile would...,probabl like attach hors automobil would save ...,1,na
2,Cool in any decade ever! A rocket going to spa...,1,cool decade ever rocket going space come would...,cool decad ever rocket go space come would imp...,1,1
3,"He is on of the most famous people out there, ...",1,famous people like steve jobs never used among...,famou peopl like steve job never use among peo...,1,1
4,The **average** rent for an apartment in Los A...,1,average rent apartment los angeles 2786 month ...,averag rent apart lo angel 2786 month tesla lo...,1,0
...,...,...,...,...,...,...
595,@elonmusk Finish Neuralink so humans can compete.,0,elonmusk finish neuralink humans compete,elonmusk finish neuralink human compet,0,na
596,"I hated every minute of training, but I said, ...",1,hated every minute training said dont quit suf...,hate everi minut train said dont quit suffer l...,1,1
597,Wth is this @elonmusk 😭😭 https://t.co/hbXL3WZpMf,-1,wth elonmusk loudlycryingface loudlycryingface,wth elonmusk loudlycryingfac loudlycryingfac,0,1
598,@DerpyMudkip02 @DasNaga1 @JoeBiden @MichelleOb...,0,derpymudkip02 dasnaga1 joebiden michelleobama ...,derpymudkip02 dasnaga1 joebiden michelleobama ...,0,na


In [36]:
df.to_csv('Accuracy/df_stemmed_text_predictions.csv')