In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns



In [2]:
def transitions(x,o,iw):
    """for a given essay, return the probability shifts for each word
    this has been adjusted to just return probabilities sequentially,
    instead of adding all probabilities from the same word"""
    worddeltas=[]
    endpoint = o.shape[0]
    startpoint = endpoint-np.count_nonzero(x)
    initval=o[startpoint-1]
    words = []
    probs = np.empty((0,4))
    for i in range(startpoint,endpoint):
        deltacuzofx=np.subtract(o[i],initval)
        words.append(iw[x[i]])
        probs = np.append(probs, deltacuzofx.reshape(1,4), axis=0)
        #if word in worddeltas:
        #    worddeltas[word]=worddeltas[word]+deltacuzofx
        #else:
        #    worddeltas[word]=deltacuzofx
        initval=o[i]
    return (words, probs)

In [3]:
def addtransitions(x,o,iw):
    """for each essay, get the transitions associated with each word
    return a dictionary of word transitions summed across the corpus"""
    addworddeltas={}
    for i in range(x.shape[0]):
        wd=transitions(x[i],o[i],iw)
        for key, value in wd.iteritems():
            if key in addworddeltas:
                addworddeltas[key]=addworddeltas[key]+value
            else:
                addworddeltas[key]=value
    return addworddeltas

In [4]:
def filtertestdata(x,y,o,t,pos):
    t=np.asarray(t)
    ynew=y[(y[:,pos]==1)]
    xnew=x[(y[:,pos]==1)]
    onew=o[(y[:,pos]==1)]
    tnew=t[(y[:,pos]==1)]
    return xnew,ynew,onew,tnew.tolist()

In [7]:
def filterdictionary(d,pos):
    newdic={}
    values=[]
    words=[]
    for key, value in d.iteritems():
        words.append(key)
        values.append(value[pos])
    words=[x for (y,x) in sorted(zip(values,words))]
    values=[y for (y,x) in sorted(zip(values,words))]
    newdic = dict(zip(words, values))
    return words,values,newdic
    

In [8]:
# main function - Values(-1-ALL, 0-control male, 1-control female, 2-affirmation male, 3-affirmation female)
from __future__ import division
index_word=np.load('output/finalgenderdict.npy').item()
word_index=np.load('Data/finalgenderdictinv.npy').item()
testdata_output=np.load('Data/finalgendertestoutput.npy')
testdata_x=np.load('Data/finalgenderxtestdata.npy')
testdata_y=np.load('Data/finalgenderytestdata.npy')
with open('Data/finalgendertextsinput.txt') as f:
    text_content = f.readlines()
filtertestdatavalue=-1
filterdictionaryvalue=1
if filtertestdatavalue!=-1:
    testdata_x,testdata_y,testdata_output,text_content=filtertestdata(testdata_x,testdata_y,testdata_output,text_content,filtertestdatavalue)
maxindexoutput=np.argmax(testdata_output[:,-1,:],axis=1)
maxindexyval=np.argmax(testdata_y,axis=1)
accuracy=(np.count_nonzero(maxindexoutput == maxindexyval)/maxindexyval.shape[0])*100
#addwd=addtransitions(testdata_x,testdata_y,testdata_output,index_word)
#words,values,dic=filterdictionary(addwd,filterdictionaryvalue)
accuracy

58.8

- index_word is pairing of index numbers with words
- word_index is the pairing of words with index numbers
- testdata_output is the sequential probabilities for each of the 500 test essays
- testdata_x is a matrix of index_word values (i.e. tells you what word is in the essay
- testdata_y is a matrix that indicates the class for each essay
- accuracy is not scored in the same manner as essay classification (f1 score)

# Model comparison model information

In [None]:
words=[]
essay=[]
probs = np.empty((0,4))
for i in range(0, len(testdata_x)):
    word_dists = transitions(testdata_x[i],testdata_y[i],testdata_output[i],index_word)
    words.append(word_dists[0])
    essay.append([i]*len(word_dists[0]))
    probs = np.append(probs, word_dists[1], axis=0)
df1 = pd.DataFrame({'feature': [w for e in words for w in e],
                    'prob': probs[:,0],
                    'class': ['control_m']*len(probs),
                    'essay_num': [w for e in essay for w in e]})
df2 = pd.DataFrame({'feature': [w for e in words for w in e],
                    'prob': probs[:,1],
                    'class': ['control_f']*len(probs),
                    'essay_num': [w for e in essay for w in e]})
df3 = pd.DataFrame({'feature': [w for e in words for w in e],
                    'prob': probs[:,2],
                    'class': ['aff_m']*len(probs),
                    'essay_num': [w for e in essay for w in e]})
df4 = pd.DataFrame({'feature': [w for e in words for w in e],
                    'prob': probs[:,3],
                    'class': ['aff_f']*len(probs),
                    'essay_num': [w for e in essay for w in e]})
df = pd.concat([df1,df2,df3,df4])
df.to_csv('output/nn_probs.csv', index=False, encoding='utf-8')

# Social words

In [33]:
index_word = np.load('Gender/testsentiw_soc.npy').item()
word_index = np.load('Gender/testsentiw_inv_soc.npy').item()
preds = np.load('Gender/testsentpreds_soc.npy')
x_dat = np.load('Gender/testsentxdat_soc.npy')
df = np.load('Gender/testdat_soc.npy')

index_word_wo = np.load('Gender/testsentiw_wo_soc.npy').item()
word_index_wo = np.load('Gender/testsentiw_inv_wo_soc.npy').item()
preds_wo = np.load('Gender/testsentpreds_wo_soc.npy')
x_dat_wo = np.load('Gender/testsentxdat_wo_soc.npy')

In [34]:
df = pd.DataFrame(df, columns=['word', 'pos', 'lemma', 'sent', 'test_sent', 'sent_wo', 'test_sent_wo'])

In [35]:
words=[]
essay=[]
probs = np.empty((0,4))
pos = []
for i in range(0, len(x_dat)):
    word_dists = transitions(x_dat[i],preds[i],index_word)
    words.append(word_dists[0])
    essay.append([i]*len(word_dists[0]))
    pos.append([df.pos.loc[i]]*len(word_dists[0]))
    probs = np.append(probs, word_dists[1], axis=0)
df1 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,0],
                    'class': ['control_m']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df2 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,1],
                    'class': ['control_f']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df3 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,2],
                    'class': ['aff_m']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df4 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,3],
                    'class': ['aff_f']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df_w = pd.concat([df1,df2,df3,df4])
#df.to_csv('output/nn_probs.csv', index=False, encoding='utf-8')

In [36]:
words=[]
essay=[]
probs = np.empty((0,4))
pos = []
for i in range(0, len(x_dat)):
    word_dists = transitions(x_dat_wo[i],preds_wo[i],index_word_wo)
    words.append(word_dists[0])
    essay.append([i]*len(word_dists[0]))
    pos.append([df.pos.loc[i]]*len(word_dists[0]))
    probs = np.append(probs, word_dists[1], axis=0)
df1 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,0],
                    'class': ['control_m']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df2 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,1],
                    'class': ['control_f']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df3 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,2],
                    'class': ['aff_m']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df4 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,3],
                    'class': ['aff_f']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df_wo = pd.concat([df1,df2,df3,df4])

In [37]:
df_wo['cond'] = 'without'
df_w['cond'] = 'with'
outdat = pd.concat([df_wo, df_w])
outdat.to_csv('output/w_wo_nnprobs_soc.csv', index=False, encoding='utf-8')

# Positive Emotion

In [38]:
index_word = np.load('Gender/testsentiw_posem.npy').item()
word_index = np.load('Gender/testsentiw_inv_posem.npy').item()
preds = np.load('Gender/testsentpreds_posem.npy')
x_dat = np.load('Gender/testsentxdat_posem.npy')
df = np.load('Gender/testdat_posem.npy')

index_word_wo = np.load('Gender/testsentiw_wo_posem.npy').item()
word_index_wo = np.load('Gender/testsentiw_inv_wo_posem.npy').item()
preds_wo = np.load('Gender/testsentpreds_wo_posem.npy')
x_dat_wo = np.load('Gender/testsentxdat_wo_posem.npy')

In [39]:
df = pd.DataFrame(df, columns=['word', 'pos', 'lemma', 'sent', 'test_sent', 'sent_wo', 'test_sent_wo'])

In [40]:
words=[]
essay=[]
probs = np.empty((0,4))
pos = []
for i in range(0, len(x_dat)):
    word_dists = transitions(x_dat[i],preds[i],index_word)
    words.append(word_dists[0])
    essay.append([i]*len(word_dists[0]))
    pos.append([df.pos.loc[i]]*len(word_dists[0]))
    probs = np.append(probs, word_dists[1], axis=0)
df1 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,0],
                    'class': ['control_m']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df2 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,1],
                    'class': ['control_f']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df3 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,2],
                    'class': ['aff_m']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df4 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,3],
                    'class': ['aff_f']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df_w = pd.concat([df1,df2,df3,df4])
#df.to_csv('output/nn_probs.csv', index=False, encoding='utf-8')

In [41]:
words=[]
essay=[]
probs = np.empty((0,4))
pos = []
for i in range(0, len(x_dat)):
    word_dists = transitions(x_dat_wo[i],preds_wo[i],index_word_wo)
    words.append(word_dists[0])
    essay.append([i]*len(word_dists[0]))
    pos.append([df.pos.loc[i]]*len(word_dists[0]))
    probs = np.append(probs, word_dists[1], axis=0)
df1 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,0],
                    'class': ['control_m']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df2 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,1],
                    'class': ['control_f']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df3 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,2],
                    'class': ['aff_m']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df4 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,3],
                    'class': ['aff_f']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df_wo = pd.concat([df1,df2,df3,df4])

In [42]:
df_wo['cond'] = 'without'
df_w['cond'] = 'with'
outdat = pd.concat([df_wo, df_w])
outdat.to_csv('output/w_wo_nnprobs_posem.csv', index=False, encoding='utf-8')

# Negative emotion

In [47]:
index_word = np.load('Gender/testsentiw_negem.npy').item()
word_index = np.load('Gender/testsentiw_inv_negem.npy').item()
preds = np.load('Gender/testsentpreds_negem.npy')
x_dat = np.load('Gender/testsentxdat_negem.npy')
df = np.load('Gender/testdat_negem.npy')

index_word_wo = np.load('Gender/testsentiw_wo_negem.npy').item()
word_index_wo = np.load('Gender/testsentiw_inv_wo_negem.npy').item()
preds_wo = np.load('Gender/testsentpreds_wo_negem.npy')
x_dat_wo = np.load('Gender/testsentxdat_wo_negem.npy')

In [48]:
df = pd.DataFrame(df, columns=['word', 'pos', 'lemma', 'sent', 'test_sent', 'sent_wo', 'test_sent_wo'])

In [49]:
words=[]
essay=[]
probs = np.empty((0,4))
pos = []
for i in range(0, len(x_dat)):
    word_dists = transitions(x_dat[i],preds[i],index_word)
    words.append(word_dists[0])
    essay.append([i]*len(word_dists[0]))
    pos.append([df.pos.loc[i]]*len(word_dists[0]))
    probs = np.append(probs, word_dists[1], axis=0)
df1 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,0],
                    'class': ['control_m']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df2 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,1],
                    'class': ['control_f']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df3 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,2],
                    'class': ['aff_m']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df4 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,3],
                    'class': ['aff_f']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df_w = pd.concat([df1,df2,df3,df4])
#df.to_csv('output/nn_probs.csv', index=False, encoding='utf-8')

In [50]:
words=[]
essay=[]
probs = np.empty((0,4))
pos = []
for i in range(0, len(x_dat)):
    word_dists = transitions(x_dat_wo[i],preds_wo[i],index_word_wo)
    words.append(word_dists[0])
    essay.append([i]*len(word_dists[0]))
    pos.append([df.pos.loc[i]]*len(word_dists[0]))
    probs = np.append(probs, word_dists[1], axis=0)
df1 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,0],
                    'class': ['control_m']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df2 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,1],
                    'class': ['control_f']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df3 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,2],
                    'class': ['aff_m']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df4 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,3],
                    'class': ['aff_f']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'pos': [p for w in pos for p in w]})
df_wo = pd.concat([df1,df2,df3,df4])

In [51]:
df_wo['cond'] = 'without'
df_w['cond'] = 'with'
outdat = pd.concat([df_wo, df_w])
outdat.to_csv('output/w_wo_nnprobs_negem.csv', index=False, encoding='utf-8')

# Values

In [43]:
index_word = np.load('Gender/testsentiw_val.npy').item()
word_index = np.load('Gender/testsentiw_inv_val.npy').item()
preds = np.load('Gender/testsentpreds_val.npy')
x_dat = np.load('Gender/testsentxdat_val.npy')
df = np.load('Gender/testdat_val.npy')

index_word_wo = np.load('Gender/testsentiw_wo_val.npy').item()
word_index_wo = np.load('Gender/testsentiw_inv_wo_val.npy').item()
preds_wo = np.load('Gender/testsentpreds_wo_val.npy')
x_dat_wo = np.load('Gender/testsentxdat_wo_val.npy')

df = pd.DataFrame(df, columns=['text', 'value'])

In [44]:
words=[]
essay=[]
probs = np.empty((0,4))
val = []
for i in range(0, len(x_dat)):
    word_dists = transitions(x_dat[i],preds[i],index_word)
    words.append(word_dists[0])
    essay.append([i]*len(word_dists[0]))
    val.append([df.value.loc[i]]*len(word_dists[0]))
    probs = np.append(probs, word_dists[1], axis=0)
df1 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,0],
                    'class': ['control_m']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'val': [p for w in val for p in w]})
df2 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,1],
                    'class': ['control_f']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'val': [p for w in val for p in w]})
df3 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,2],
                    'class': ['aff_m']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'val': [p for w in val for p in w]})
df4 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,3],
                    'class': ['aff_f']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'val': [p for w in val for p in w]})
df_w = pd.concat([df1,df2,df3,df4])
#df.to_csv('output/nn_probs.csv', index=False, encoding='utf-8')

In [45]:
words=[]
essay=[]
probs = np.empty((0,4))
val = []
for i in range(0, len(x_dat)):
    word_dists = transitions(x_dat_wo[i],preds_wo[i],index_word_wo)
    words.append(word_dists[0])
    essay.append([i]*len(word_dists[0]))
    val.append([df.value.loc[i]]*len(word_dists[0]))
    probs = np.append(probs, word_dists[1], axis=0)
df1 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,0],
                    'class': ['control_m']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'val': [p for w in val for p in w]})
df2 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,1],
                    'class': ['control_f']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'val': [p for w in val for p in w]})
df3 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,2],
                    'class': ['aff_m']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'val': [p for w in val for p in w]})
df4 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,3],
                    'class': ['aff_f']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'val': [p for w in val for p in w]})
df_wo = pd.concat([df1,df2,df3,df4])

In [46]:
df_wo['cond'] = 'without'
df_w['cond'] = 'with'
outdat = pd.concat([df_wo, df_w])
outdat.to_csv('output/w_wo_nnprobs_val.csv', index=False, encoding='utf-8')

# Justifications
(with others)

In [61]:
index_word = np.load('Gender/testsentiw_just.npy').item()
word_index = np.load('Gender/testsentiw_inv_just.npy').item()
preds = np.load('Gender/testsentpreds_just.npy')
x_dat = np.load('Gender/testsentxdat_just.npy')
df = np.load('Gender/testdat_just.npy')

df = pd.DataFrame(df, columns=['justification', 'text', 'value'])

In [62]:
words=[]
essay=[]
probs = np.empty((0,4))
val = []
just = []
for i in range(0, len(x_dat)):
    word_dists = transitions(x_dat[i],preds[i],index_word)
    words.append(word_dists[0])
    essay.append([i]*len(word_dists[0]))
    val.append([df.value.loc[i]]*len(word_dists[0]))
    just.append([df.justification.loc[i]]*len(word_dists[0]))
    probs = np.append(probs, word_dists[1], axis=0)
df1 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,0],
                    'class': ['control_m']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'just': [w for e in just for w in e],
                    'val': [p for w in val for p in w]})
df2 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,1],
                    'class': ['control_f']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'just': [w for e in just for w in e],
                    'val': [p for w in val for p in w]})
df3 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,2],
                    'class': ['aff_m']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'just': [w for e in just for w in e],
                    'val': [p for w in val for p in w]})
df4 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,3],
                    'class': ['aff_f']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'just': [w for e in just for w in e],
                    'val': [p for w in val for p in w]})
df_w = pd.concat([df1,df2,df3,df4])
#df.to_csv('output/nn_probs.csv', index=False, encoding='utf-8')

In [63]:
df_w.to_csv('output/nnprobs_just.csv', index=False, encoding='utf-8')

# Justifications
(with self)

In [64]:
index_word = np.load('Gender/testsentiw_justself.npy').item()
word_index = np.load('Gender/testsentiw_inv_justself.npy').item()
preds = np.load('Gender/testsentpreds_justself.npy')
x_dat = np.load('Gender/testsentxdat_justself.npy')
df = np.load('Gender/testdat_justself.npy')

df = pd.DataFrame(df, columns=['justification', 'text', 'value'])

In [65]:
words=[]
essay=[]
probs = np.empty((0,4))
val = []
just = []
for i in range(0, len(x_dat)):
    word_dists = transitions(x_dat[i],preds[i],index_word)
    words.append(word_dists[0])
    essay.append([i]*len(word_dists[0]))
    val.append([df.value.loc[i]]*len(word_dists[0]))
    just.append([df.justification.loc[i]]*len(word_dists[0]))
    probs = np.append(probs, word_dists[1], axis=0)
df1 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,0],
                    'class': ['control_m']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'just': [w for e in just for w in e],
                    'val': [p for w in val for p in w]})
df2 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,1],
                    'class': ['control_f']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'just': [w for e in just for w in e],
                    'val': [p for w in val for p in w]})
df3 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,2],
                    'class': ['aff_m']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'just': [w for e in just for w in e],
                    'val': [p for w in val for p in w]})
df4 = pd.DataFrame({'word': [w for e in words for w in e],
                    'prob': probs[:,3],
                    'class': ['aff_f']*len(probs),
                    'sentence_num': [w for e in essay for w in e],
                    'just': [w for e in just for w in e],
                    'val': [p for w in val for p in w]})
df_w = pd.concat([df1,df2,df3,df4])
#df.to_csv('output/nn_probs.csv', index=False, encoding='utf-8')

In [66]:
df_w.to_csv('output/nnprobs_justself.csv', index=False, encoding='utf-8')