In [60]:
from nrclex import NRCLex

import nltk
import pandas as pd
import numpy as np
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
import joblib
from pandarallel import pandarallel

from sklearn import utils
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.linear_model import LogisticRegression
from sklearn import model_selection, naive_bayes, svm
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report


from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
stopwords = stopwords.words('english')

from gensim.models.doc2vec import Doc2Vec, TaggedDocument

In [61]:
df = pd.read_pickle("../FINAL_DATASET.pkl")

In [62]:
doc2vec = joblib.load("../01_svm_model.pkl")
behavioral = joblib.load("../02_svm_model.pkl")
emotional = joblib.load("../03_Emotional/03_svm_model.pkl")
dbow_model_beh = Doc2Vec.load('../doc2vec_first.model')
dbow_model_doc = Doc2Vec.load('../doc2vec_second.model')

In [63]:
df.head()

Unnamed: 0,index,narrative,label,behavioral,emotional,array
0,5,\nI (20 F) have been abused by different peopl...,unwell,abused very young,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25..."
1,6,I grew up with my dad laying on top of me when...,unwell,grew laying woke started continues continued c...,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.3333333333333333, 0.0, 0.0, 0.33333333..."
2,7,He would call me mommy and ask me to come wipe...,unwell,call ask come wipe,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,9,I never did anything when he said those things...,unwell,never did said away stayed,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,10,\n\nWhen I was in seventh grade I became depre...,unwell,became started,"{'fear': 0.25, 'anger': 0.25, 'anticip': 0.0, ...","[0.25, 0.0, 0.25, 0.25, 0.0, 0.25, 0.0, 0.0, 0..."


In [64]:
def tokenize_text(text):
    tokens = []
    for sent in nltk.sent_tokenize(text):
        for word in nltk.word_tokenize(sent):
            if len(word) < 2:
                continue
            tokens.append(word.lower())
    return tokens

In [65]:
df["tokenized"] = df.narrative.apply(lambda x: tokenize_text(x))

In [66]:
df["vector_doc"] = df.tokenized.apply(lambda x: dbow_model_doc.infer_vector(x)[0])

In [67]:
df["vector_beh"] = df.tokenized.apply(lambda x: dbow_model_beh.infer_vector(x)[0])

In [68]:
df["doc2vec_predicted"] = df.vector_doc.apply(lambda x: doc2vec.predict([x])[0])

In [69]:
df["behavioral_predicted"] = df.vector_beh.apply(lambda x: behavioral.predict([x])[0])

In [70]:
df["emotional_predicted"] = df.array.apply(lambda x: emotional.predict([x]))

In [71]:
df.head()

Unnamed: 0,index,narrative,label,behavioral,emotional,array,tokenized,vector_doc,vector_beh,doc2vec_predicted,behavioral_predicted,emotional_predicted
0,5,\nI (20 F) have been abused by different peopl...,unwell,abused very young,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.25...","[20, have, been, abused, by, different, people...","[0.012846371, -0.08506929, -0.07072686, -0.012...","[-0.18715128, 0.07399863, -0.17872168, -0.1661...",[unwell],[well],[well]
1,6,I grew up with my dad laying on top of me when...,unwell,grew laying woke started continues continued c...,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.3333333333333333, 0.0, 0.0, 0.33333333...","[grew, up, with, my, dad, laying, on, top, of,...","[0.0058108703, -0.12144101, -0.10623647, -0.00...","[-0.5442939, -0.13103275, -0.2532134, -0.41732...",[unwell],[unwell],[well]
2,7,He would call me mommy and ask me to come wipe...,unwell,call ask come wipe,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[he, would, call, me, mommy, and, ask, me, to,...","[0.00891198, -0.07894919, -0.065649904, -0.008...","[-0.116394244, 0.18037713, -0.21665742, -0.126...",[unwell],[well],[well]
3,9,I never did anything when he said those things...,unwell,never did said away stayed,"{'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 't...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[never, did, anything, when, he, said, those, ...","[0.016087206, -0.08783096, -0.07025361, -0.017...","[-0.3267334, 0.32563692, -0.46998626, -0.32578...",[unwell],[unwell],[well]
4,10,\n\nWhen I was in seventh grade I became depre...,unwell,became started,"{'fear': 0.25, 'anger': 0.25, 'anticip': 0.0, ...","[0.25, 0.0, 0.25, 0.25, 0.0, 0.25, 0.0, 0.0, 0...","[when, was, in, seventh, grade, became, depres...","[0.012577527, -0.0782866, -0.061359443, -0.013...","[0.10743977, -0.032537453, 0.09743308, 0.09312...",[unwell],[well],[unwell]


In [72]:
df["doc2vec_predicted"].value_counts()

TypeError: unhashable type: 'numpy.ndarray'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas/_libs/hashtable_class_helper.pxi", line 4588, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'numpy.ndarray'


[unwell]    4751
[unwell]       1
Name: doc2vec_predicted, dtype: int64

In [73]:
df["behavioral_predicted"].value_counts()

TypeError: unhashable type: 'numpy.ndarray'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas/_libs/hashtable_class_helper.pxi", line 4588, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'numpy.ndarray'


[well]      3628
[unwell]    1123
[well]         1
Name: behavioral_predicted, dtype: int64

In [74]:
df["emotional_predicted"].value_counts()

TypeError: unhashable type: 'numpy.ndarray'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas/_libs/hashtable_class_helper.pxi", line 4588, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'numpy.ndarray'


[well]      3138
[unwell]    1613
[well]         1
Name: emotional_predicted, dtype: int64

In [75]:
doc2vec.predict([dbow_model.infer_vector(tokenize_text("I am raped"), epochs=400)])

NameError: name 'dbow_model' is not defined

In [None]:
doc2vec