In [74]:
from nrclex import NRCLex

import nltk
import pandas as pd
import numpy as np
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
import joblib
from pandarallel import pandarallel

from sklearn import utils
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.linear_model import LogisticRegression
from sklearn import model_selection, naive_bayes, svm
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report


from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
stopwords = stopwords.words('english')

from gensim.models.doc2vec import Doc2Vec, TaggedDocument

In [75]:
df = pd.read_pickle("../FINAL_DATASET.pkl")

In [76]:
doc2vec = joblib.load("../01_svm_model.pkl")
behavioral = joblib.load("../02_svm_model.pkl")
emotional = joblib.load("../03_Emotional/03_svm_model.pkl")
dbow_model_doc = Doc2Vec.load("../doc2vec_first.model")
dbow_model_beh = Doc2Vec.load('../doc2vec_second.model')

In [77]:
df.head()

Unnamed: 0,index,narrative,label,behavioral,emotional,array
0,5,\nI (20 F) have been abused by different peopl...,unwell,abused very young,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25..."
1,6,I grew up with my dad laying on top of me when...,unwell,grew laying woke started continues continued c...,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.3333333333333333, 0.0, 0.0, 0.33333333..."
2,7,He would call me mommy and ask me to come wipe...,unwell,call ask come wipe,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,9,I never did anything when he said those things...,unwell,never did said away stayed,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,10,\n\nWhen I was in seventh grade I became depre...,unwell,became started,"{'fear': 0.25, 'anger': 0.25, 'anticip': 0.0, ...","[0.25, 0.0, 0.25, 0.25, 0.0, 0.25, 0.0, 0.0, 0..."


In [78]:
def tokenize_text(text):
    tokens = []
    for sent in nltk.sent_tokenize(text):
        for word in nltk.word_tokenize(sent):
            if len(word) < 2:
                continue
            tokens.append(word.lower())
    return tokens

In [79]:
df["tokenized"] = df.narrative.apply(lambda x: tokenize_text(x))

In [80]:
df["vector_doc"] = df.tokenized.apply(lambda x: dbow_model_doc.infer_vector(x))

In [81]:
df["vector_beh"] = df.tokenized.apply(lambda x: dbow_model_beh.infer_vector(x))

In [82]:
df["doc2vec_predicted"] = df.vector_doc.apply(lambda x: doc2vec.predict([x])[0])

In [83]:
df["behavioral_predicted"] = df.vector_beh.apply(lambda x: behavioral.predict([x])[0])

In [84]:
df["emotional_predicted"] = df.array.apply(lambda x: emotional.predict([x])[0])

In [85]:
df.head()

Unnamed: 0,index,narrative,label,behavioral,emotional,array,tokenized,vector_doc,vector_beh,doc2vec_predicted,behavioral_predicted,emotional_predicted
0,5,\nI (20 F) have been abused by different peopl...,unwell,abused very young,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25...","[20, have, been, abused, by, different, people...","[-0.7071003, 0.18042387, 0.27642563, -0.308040...","[0.013840742, -0.08797427, -0.069450594, -0.01...",well,unwell,well
1,6,I grew up with my dad laying on top of me when...,unwell,grew laying woke started continues continued c...,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.3333333333333333, 0.0, 0.0, 0.33333333...","[grew, up, with, my, dad, laying, on, top, of,...","[-0.7444648, 0.18941903, 0.2893485, -0.3056205...","[0.0034878626, -0.119477116, -0.10543022, -0.0...",well,unwell,well
2,7,He would call me mommy and ask me to come wipe...,unwell,call ask come wipe,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[he, would, call, me, mommy, and, ask, me, to,...","[-0.7959792, 0.19273587, 0.31130674, -0.279445...","[0.011212734, -0.081257135, -0.066858284, -0.0...",unwell,unwell,well
3,9,I never did anything when he said those things...,unwell,never did said away stayed,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[never, did, anything, when, he, said, those, ...","[-0.7342275, 0.18872157, 0.28573027, -0.320781...","[0.017421095, -0.08737397, -0.06933086, -0.017...",well,unwell,well
4,10,\n\nWhen I was in seventh grade I became depre...,unwell,became started,"{'fear': 0.25, 'anger': 0.25, 'anticip': 0.0, ...","[0.25, 0.0, 0.25, 0.25, 0.0, 0.25, 0.0, 0.0, 0...","[when, was, in, seventh, grade, became, depres...","[-0.78756076, 0.20219587, 0.30520847, -0.34853...","[0.01106629, -0.079141095, -0.06427619, -0.012...",well,unwell,unwell


In [86]:
df["doc2vec_predicted"].value_counts()

well      2413
unwell    2339
Name: doc2vec_predicted, dtype: int64

In [87]:
df["behavioral_predicted"].value_counts()

unwell    2632
well      2120
Name: behavioral_predicted, dtype: int64

In [88]:
df["emotional_predicted"].value_counts()

well      3139
unwell    1613
Name: emotional_predicted, dtype: int64

In [89]:
def finalize(a, b, c):
    li = [a, b, c]
    
    def most_frequent(List): 
        counter = 0
        num = List[0] 
      
        for i in List: 
            curr_frequency = List.count(i) 
            if(curr_frequency> counter): 
                counter = curr_frequency 
                num = i 
  
        return num

    return most_frequent(li)

In [90]:
df["final"] = df.apply(lambda x: finalize(x.doc2vec_predicted, x.behavioral_predicted, x.emotional_predicted), axis=1)

In [91]:
df

Unnamed: 0,index,narrative,label,behavioral,emotional,array,tokenized,vector_doc,vector_beh,doc2vec_predicted,behavioral_predicted,emotional_predicted,final
0,5,\nI (20 F) have been abused by different peopl...,unwell,abused very young,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25...","[20, have, been, abused, by, different, people...","[-0.7071003, 0.18042387, 0.27642563, -0.308040...","[0.013840742, -0.08797427, -0.069450594, -0.01...",well,unwell,well,well
1,6,I grew up with my dad laying on top of me when...,unwell,grew laying woke started continues continued c...,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.3333333333333333, 0.0, 0.0, 0.33333333...","[grew, up, with, my, dad, laying, on, top, of,...","[-0.7444648, 0.18941903, 0.2893485, -0.3056205...","[0.0034878626, -0.119477116, -0.10543022, -0.0...",well,unwell,well,well
2,7,He would call me mommy and ask me to come wipe...,unwell,call ask come wipe,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[he, would, call, me, mommy, and, ask, me, to,...","[-0.7959792, 0.19273587, 0.31130674, -0.279445...","[0.011212734, -0.081257135, -0.066858284, -0.0...",unwell,unwell,well,unwell
3,9,I never did anything when he said those things...,unwell,never did said away stayed,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[never, did, anything, when, he, said, those, ...","[-0.7342275, 0.18872157, 0.28573027, -0.320781...","[0.017421095, -0.08737397, -0.06933086, -0.017...",well,unwell,well,well
4,10,\n\nWhen I was in seventh grade I became depre...,unwell,became started,"{'fear': 0.25, 'anger': 0.25, 'anticip': 0.0, ...","[0.25, 0.0, 0.25, 0.25, 0.0, 0.25, 0.0, 0.0, 0...","[when, was, in, seventh, grade, became, depres...","[-0.78756076, 0.20219587, 0.30520847, -0.34853...","[0.01106629, -0.079141095, -0.06427619, -0.012...",well,unwell,unwell,unwell
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4747,2371,Taking my morning walk and having a cup of cof...,well,Taking walk having,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[taking, my, morning, walk, and, having, cup, ...","[-0.7967633, 0.19517145, 0.31255, -0.2912302, ...","[0.017431678, -0.07374661, -0.053709652, -0.01...",unwell,unwell,well,unwell
4748,2372,"I ate my favorite meal, hot chicken.",well,ate,"{'fear': 0.2, 'anger': 0.2, 'anticip': 0.0, 't...","[0.0, 0.2, 0.2, 0.2, 0.2, 0.0, 0.0, 0.0, 0.0, ...","[ate, my, favorite, meal, hot, chicken]","[-0.7588926, 0.18442743, 0.2965322, -0.2589249...","[0.0216399, -0.011001789, 0.00028593466, -0.03...",unwell,well,unwell,unwell
4749,2373,I was happy to have a taco Tuesday with my two...,well,happy have,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.25, 0.0, 0.0, 0.25, 0.0, 0.0, 0.0, 0.0...","[was, happy, to, have, taco, tuesday, with, my...","[-0.69037926, 0.17797795, 0.27124876, -0.29908...","[0.030084144, -0.05725486, -0.034149442, -0.03...",unwell,well,well,well
4750,2374,Buying a new TV.,well,Buying,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[buying, new, tv]","[-0.77321446, 0.18372186, 0.3030013, -0.241204...","[0.03484661, -0.022767615, -0.0018727666, -0.0...",well,well,well,well


In [93]:
df.final.value_counts()

well      2609
unwell    2143
Name: final, dtype: int64

In [95]:
print(classification_report(df.label, df.final))

              precision    recall  f1-score   support

      unwell       0.84      0.76      0.80      2376
        well       0.78      0.86      0.82      2376

    accuracy                           0.81      4752
   macro avg       0.81      0.81      0.81      4752
weighted avg       0.81      0.81      0.81      4752



In [102]:
dbow_model_doc.infer_vector(["I want to die"]).shape

(300,)

In [106]:
df.array[0]to_numpy()

AttributeError: 'list' object has no attribute 'to_numpy'