# Sentiment Analysis

In [6]:
import pandas as pd
import warnings, sys, os
from pathlib import Path

path = Path(os.getcwd())
sys.path.append(str(path.parent))
warnings.filterwarnings('ignore')  
%matplotlib inline
data_directory = '../data'

In [14]:
res = pd.read_json(f'{data_directory}/precios.json', orient='records',lines=True)
print(res)

ValueError: Expected object or value

### Tenemos dos lexicon para trabajar los análisis:

**Emotion Lexicon**: Tiene todas las palabras del lexicon. Si una palabra tiene aunque sea una minima relación con una emoción, el flag de la emoción va a estar en 1 para esa palabra. Si no tiene ninguna relación el flag va a ser 0

In [3]:
emotion_lexicon=pd.read_csv(f'{data_directory}/sa/Emotion_Lexicon.csv',delimiter=',')
emotion_lexicon

Unnamed: 0,English_a,Positive,Negative,Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust
0,abacus,0,0,0,0,0,0,0,0,0,1
1,abandon,0,1,0,0,0,1,0,1,0,0
2,abandoned,0,1,1,0,0,1,0,1,0,0
3,abandonment,0,1,1,0,0,1,0,1,1,0
4,abba,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
6480,zany,0,0,0,0,0,0,0,0,1,0
6481,zeal,1,0,0,1,0,0,1,0,1,1
6482,zealous,1,0,0,0,0,0,1,0,0,1
6483,zest,1,0,0,1,0,0,1,0,0,1


**Emotion Intensity**: Tiene todas las palabras con la particularidad de tener un variable que indica el grado de asociación de la palabra con la emoción (entre 0 y 1), entre mas alto, mas asociación.

In [4]:
emotion_intensity=pd.read_csv(f'{data_directory}/sa/Emotion_Intensity.csv',delimiter=',')
emotion_intensity

Unnamed: 0,word,emotion,emotion-intensity-score
0,outraged,anger,964
1,brutality,anger,959
2,hatred,anger,953
3,hateful,anger,940
4,terrorize,anger,939
...,...,...,...
9916,fugitive,trust,141
9917,divorce,trust,133
9918,mistakes,trust,133
9919,bait,trust,133


Selecciono un grupo preliminar de sueños para comenzar el análisis (por tiempos de procesamiento). Podemos hacerlo para todos los grupos si es necesario.

In [5]:
emotion_list=["Positive","Negative","Anger","Anticipation","Disgust","Fear","Joy","Sadness","Surprise","Trust"]
intensity_list=["anger","anticipation","disgust","fear","joy","sadness","surprise","trust"]

In [6]:
#Todos los sueños
dreams_groups=dreams_clean.copy()
dreams_groups

Unnamed: 0,code,note,description,words,group_id
0,1,1957,"The one at the Meads's house, where it's bigge...",154.0,1
1,2,8/11/67,I'm at a family reunion in a large fine house ...,248.0,1
2,3,8/1/85,I watch a plane fly past and shortly realize i...,303.0,1
3,4,1985?,Me pulling the green leaves and berries off so...,468.0,1
4,5,1985?,I'm in a room that reminds me of (but definite...,561.0,1
...,...,...,...,...,...
43221,85,"F, age 18",The dream was about me and my boyfriend going ...,138.0,89
43222,86,"F, age 18",Two weeks ago this guy asked me to Senior Ball...,96.0,89
43223,87,"F, age 18",My boyfriend just broke up with me so he was o...,139.0,89
43224,88,"F, age 18",I was in my backyard and I was flying. I would...,104.0,89


### Pre-Procesamiento sueños

Elimino sueños vacios

In [7]:
len(dreams_groups)

43226

In [8]:
dreams_groups=dreams_groups[dreams_groups.code.notnull()]
len(dreams_groups)


43132

Paso todas las palabras de los sueños a minusculas

In [10]:
dreams_groups['description']=dreams_groups['description'].str.lower()

Elimino caracteres especiales

In [11]:
remove = ["!","(",")","@",".",",","{","}","[","]",".",'"',"'","?","-","1","2","3","4","5","6","7","8","9","0"]

for ch in remove:
  dreams_groups['description'] = dreams_groups['description'].str.replace(ch, "")

In [12]:
dreams_groups

Unnamed: 0,code,note,description,words,group_id
0,1,1957,the one at the meadss house where its bigger i...,154.0,1
1,2,8/11/67,im at a family reunion in a large fine house w...,248.0,1
2,3,8/1/85,i watch a plane fly past and shortly realize i...,303.0,1
3,4,1985?,me pulling the green leaves and berries off so...,468.0,1
4,5,1985?,im in a room that reminds me of but definitely...,561.0,1
...,...,...,...,...,...
43221,85,"F, age 18",the dream was about me and my boyfriend going ...,138.0,89
43222,86,"F, age 18",two weeks ago this guy asked me to senior ball...,96.0,89
43223,87,"F, age 18",my boyfriend just broke up with me so he was o...,139.0,89
43224,88,"F, age 18",i was in my backyard and i was flying i would ...,104.0,89


Divido los sueños en palabras (preparo un dataset para el análisis con Emotion_Lexicon y copio uno igual para Emotion_Intensity)

In [13]:
df_words_emotions = dreams_groups.description.str.split(expand=True).stack().reset_index(level=1,drop=True).reset_index(name='description')

df_words_intensity=df_words_emotions.copy()

df_words_emotions

Unnamed: 0,index,description
0,0,the
1,0,one
2,0,at
3,0,the
4,0,meadss
...,...,...
7003553,43225,parents
7003554,43225,would
7003555,43225,kill
7003556,43225,us


### Sentiment Analysis con Emotion_Lexicon

**Pre-Procesamiento**:
    Creo un diccionario que va a tener cómo indice las emociones y cada una con su lista de palaras asociadas

In [14]:
for i in emotion_list:
    exec("{} = pd.DataFrame(emotion_lexicon[emotion_lexicon[i] == 1].English_a)".format(i))
    
emotions = {
    'Positive': list(Positive["English_a"]),
    "Negative": list(Negative["English_a"]),
    "Anger": list(Anger["English_a"]),
    "Anticipation": list(Anticipation["English_a"]),
    "Disgust": list(Disgust["English_a"]),
    "Fear": list(Fear["English_a"]),
    "Joy": list(Joy["English_a"]),
    "Sadness": list(Sadness["English_a"]),
    "Surprise": list(Surprise["English_a"]),
    "Trust": list(Trust["English_a"])
}

#emotions

**Sentiment Analysis**: Matcheo el diccionario de las palaras y emociones con las palabras de los sueños. Despues agrupo y sumo la cantidad de matchs por sueños

In [15]:
for k, v in emotions.items():
    df_words_emotions[k] = df_words_emotions.description.str.contains('|'.join(emotions[k]))

df_words_emo_groups=df_words_emotions.groupby('index').sum().astype(int)
df_words_emo_groups

Unnamed: 0_level_0,Positive,Negative,Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,18,11,8,6,8,6,3,6,3,7
1,28,22,12,10,11,14,5,13,1,15
2,27,25,12,16,11,16,4,15,8,19
3,51,41,22,18,20,19,19,29,12,25
4,59,40,19,26,17,18,16,14,10,30
...,...,...,...,...,...,...,...,...,...,...
43221,9,13,6,13,8,6,6,6,5,6
43222,11,3,3,2,0,1,4,2,4,7
43223,17,10,4,5,5,3,6,5,4,12
43224,14,6,5,9,3,4,9,6,7,11


**Sentiment Analysis**:  Hago un join entre las palabras que hicieron match con las emociones y la base de los sueños. Creo la base final:

In [16]:
dreams_groups_emotions = dreams_groups.merge(df_words_emo_groups, how='inner', 
                                      left_index=True,
                                      right_index=True)
dreams_groups_emotions

Unnamed: 0,code,note,description,words,group_id,Positive,Negative,Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust
0,1,1957,the one at the meadss house where its bigger i...,154.0,1,18,11,8,6,8,6,3,6,3,7
1,2,8/11/67,im at a family reunion in a large fine house w...,248.0,1,28,22,12,10,11,14,5,13,1,15
2,3,8/1/85,i watch a plane fly past and shortly realize i...,303.0,1,27,25,12,16,11,16,4,15,8,19
3,4,1985?,me pulling the green leaves and berries off so...,468.0,1,51,41,22,18,20,19,19,29,12,25
4,5,1985?,im in a room that reminds me of but definitely...,561.0,1,59,40,19,26,17,18,16,14,10,30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43221,85,"F, age 18",the dream was about me and my boyfriend going ...,138.0,89,9,13,6,13,8,6,6,6,5,6
43222,86,"F, age 18",two weeks ago this guy asked me to senior ball...,96.0,89,11,3,3,2,0,1,4,2,4,7
43223,87,"F, age 18",my boyfriend just broke up with me so he was o...,139.0,89,17,10,4,5,5,3,6,5,4,12
43224,88,"F, age 18",i was in my backyard and i was flying i would ...,104.0,89,14,6,5,9,3,4,9,6,7,11


In [None]:
#dreams_groups_emotions.to_csv(f'{data_directory}/sa/out/emotions.csv', index = False)

### Sentiment Analysis con Emotion_Intensity

**Pre-Procesamiento**: Me quedo solo con las palabras que tienen > 0.6 de asociación con la emoción y transpongo la tabla

In [19]:
threshold = 0.6


tmp=emotion_intensity[emotion_intensity["emotion-intensity-score"]>=threshold]
tmp["emotion-intensity-score"]=1
emotion_intensity_filter=tmp.pivot(index='word', columns='emotion')
emotion_intensity_filter=emotion_intensity_filter.droplevel(0,axis=1).reset_index("word")
emotion_intensity_filter

emotion,word,anger,anticipation,disgust,fear,joy,sadness,surprise,trust
0,aaaaaaah,,,,1.0,,,,
1,aaaah,,,,1.0,,,,
2,abacus,,,,,,,,1.0
3,abandon,,,,1.0,,1.0,,
4,abandoned,1.0,,,1.0,,1.0,,
...,...,...,...,...,...,...,...,...,...
5970,zany,,,,,,,1.0,
5971,zeal,,1.0,,,1.0,,1.0,1.0
5972,zealous,,,,,1.0,,,1.0
5973,zen,,,,,1.0,,,


**Pre-Procesamiento**:
    Creo un diccionario que va a tener cómo indice las emociones y cada una con su lista de palaras asociadas

In [20]:
for j in intensity_list:
    exec("{} = pd.DataFrame(emotion_intensity_filter[emotion_intensity_filter[j] == 1].word)".format(j))
    
intensity = {
    "Anger": list(anger["word"]),
    "Anticipation": list(anticipation["word"]),
    "Disgust": list(disgust["word"]),
    "Fear": list(fear["word"]),
    "Joy": list(joy["word"]),
    "Sadness": list(sadness["word"]),
    "Surprise": list(surprise["word"]),
    "Trust": list(trust["word"])
}

#intensity

**Sentiment Analysis**: Matcheo el diccionario de las palaras y emociones con las palabras de los sueños. Despues agrupo y sumo la cantidad de matchs por sueños

In [21]:
for l, w in intensity.items():
    df_words_intensity[l] = df_words_intensity.description.str.contains('|'.join(intensity[l]))
    
df_words_ints_groups=df_words_intensity.groupby('index').sum().astype(int)

In [22]:
df_words_ints_groups.head()

Unnamed: 0_level_0,Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,7,6,9,8,3,9,3,11
1,12,10,12,14,12,21,1,17
2,12,16,11,17,12,15,9,22
3,22,18,21,21,26,29,12,30
4,18,26,22,20,32,18,10,39


**Sentiment Analysis**:  Hago un join entre las palabras que hicieron match con las emociones y la base de los sueños. Creo la base final:

In [23]:
dreams_groups_intensity = dreams_groups.merge(df_words_ints_groups, how='inner', 
                                      left_index=True,
                                      right_index=True)
dreams_groups_intensity

Unnamed: 0,code,note,description,words,group_id,Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust
0,1,1957,the one at the meadss house where its bigger i...,154.0,1,7,6,9,8,3,9,3,11
1,2,8/11/67,im at a family reunion in a large fine house w...,248.0,1,12,10,12,14,12,21,1,17
2,3,8/1/85,i watch a plane fly past and shortly realize i...,303.0,1,12,16,11,17,12,15,9,22
3,4,1985?,me pulling the green leaves and berries off so...,468.0,1,22,18,21,21,26,29,12,30
4,5,1985?,im in a room that reminds me of but definitely...,561.0,1,18,26,22,20,32,18,10,39
...,...,...,...,...,...,...,...,...,...,...,...,...,...
43221,85,"F, age 18",the dream was about me and my boyfriend going ...,138.0,89,6,13,8,6,15,10,5,7
43222,86,"F, age 18",two weeks ago this guy asked me to senior ball...,96.0,89,3,2,0,2,6,2,4,7
43223,87,"F, age 18",my boyfriend just broke up with me so he was o...,139.0,89,4,5,6,4,7,4,4,14
43224,88,"F, age 18",i was in my backyard and i was flying i would ...,104.0,89,5,9,3,4,10,6,7,14


In [None]:
#dreams_groups_intensity.to_csv(f'{data_directory}/sa/out/intensity.csv', index = False)

## Primeras conclusiones y próximos pasos

El análisis parece ser mas efectivo utilizando la base de Emotion_Intensity, por lo siguiete:

Emotion_Lexicon puede tener muchas palabras que estan asociadas a mas de una emoción, lo cual ensucia un poco los resultados.

Por otro lado, con Emotion_Intensity podemos controlar la efectividad de la asociación con la variable que tiene el % de asociación. En un principio, estariamos usando un punto de corte de asociación > 0.6, por ahora parece andar bien a falta analizar mejor los resultados con visualizaciones.

Los próximos pasos que se me ocurre que podriamos hacer:

- Calcular la proporción de emociones presentes en cada sueño, asi podríamos determinar cual es la emoción predominante.

- Hacer algún análisis exploratorio con el punto anterior para determinar que emociones predominan en cada grupo que quisieramos elegir.

- En caso de avanzar con el Sentiment Analysis que utiliza el % de asociación, podríamos calcular qué emociones califican como positivas (Joy y Trust) cuales como negativas (Anger, Fear, Disgust y Sadness) y cuales como neutrales (Surprise y Anticipation). Calculamos las proporciones y vemos cual categoría es la predominante.]

- Como algo adicional pero que también puede estar copado, podríamos gráficar cómo se presentan e interactuan las emociones a lo largo del tiempo para cada grupo. Así podríamos ver por ejemplo como se diferencian las emociones que experimenta el grupo de sueños del veterano y el de Phil.

## Parte 2a

Defino categorias de palabras positivas  (Joy, Trust y Anticipation),  negativas (Anger, Fear, Disgust y Sadness) y  neutrales (Surprise)

In [24]:
positive_emotions=["Joy","Trust","Anticipation"]
negative_emotions=["Anger","Fear","Disgust","Sadness"]
neutral_emotions=["Surprise"]

Calculo cantidad de palabras positivas, negativas y neutrales

In [25]:
dreams_groups_intensity['Positive']= dreams_groups_intensity.loc[:,positive_emotions].sum(axis=1)
dreams_groups_intensity['Negative']= dreams_groups_intensity.loc[:,negative_emotions].sum(axis=1)
dreams_groups_intensity['Neutral']= dreams_groups_intensity.loc[:,neutral_emotions].sum(axis=1)
dreams_groups_intensity.head(3)

Unnamed: 0,code,note,description,words,group_id,Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust,Positive,Negative,Neutral
0,1,1957,the one at the meadss house where its bigger i...,154.0,1,7,6,9,8,3,9,3,11,20,33,3
1,2,8/11/67,im at a family reunion in a large fine house w...,248.0,1,12,10,12,14,12,21,1,17,39,59,1
2,3,8/1/85,i watch a plane fly past and shortly realize i...,303.0,1,12,16,11,17,12,15,9,22,50,55,9


In [26]:
dreams_groups_intensity.iloc[2,2]

'i watch a plane fly past and shortly realize it was too low it crashes not half a block away rather as though it fell from a height of a few yards than flying straight into the ground and i think there might be survivors and that i should call the police before i get the number dialed i realize i want the fire department instead i get no ring try again and find the line is dead some people   at least one a woman are coming toward the house and i understand they are responsible for the dead phone and i should be suspicious of them i try to leave the house  run into a pregnant girl at the back; she seems to be in some sort of a bad way and has maybe started labor the woman from the plane comes this way calls the others to come get the pregnant girl and i pretend to be asleep or dead they dont buy it i guess and we all go inside â\xa0â\xa0â\xa0â\xa0â\xa0â\xa0 im not a prisoner i dont think so much as being detained for some reason and i go to my brothers old room he moved out of it a cou

Hago una lista de todas las variables de conteos de emociones para calcular las proporciones de cada una

In [27]:
all_emotions=positive_emotions+negative_emotions+neutral_emotions+["Positive","Negative","Neutral"]
all_emotions

['Joy',
 'Trust',
 'Anticipation',
 'Anger',
 'Fear',
 'Disgust',
 'Sadness',
 'Surprise',
 'Positive',
 'Negative',
 'Neutral']

Calculo las proporciones

In [28]:
for emo in all_emotions:
    exec("dreams_groups_intensity['{}_pct'] = dreams_groups_intensity['{}'] / dreams_groups_intensity['words']".format(emo,emo))

Hago 2 listas con las variables de las proporciones, una para las emociones y otra para los sentimientos positivos, negativos y neutrales

In [29]:
sentiment_pct=dreams_groups_intensity.columns[-3:]
emotions_pct=dreams_groups_intensity.columns[-11:-4]

In [30]:
sentiment_pct

Index(['Positive_pct', 'Negative_pct', 'Neutral_pct'], dtype='object')

In [31]:
emotions_pct

Index(['Joy_pct', 'Trust_pct', 'Anticipation_pct', 'Anger_pct', 'Fear_pct',
       'Disgust_pct', 'Sadness_pct'],
      dtype='object')

Calculo cual es la emoción y el sentimiento con mayor proporción

In [32]:
dreams_groups_intensity["Dominant_Emotion_pct"] = dreams_groups_intensity[emotions_pct].max(axis=1)
dreams_groups_intensity["Dominant_Sentiment_pct"] = dreams_groups_intensity[sentiment_pct].max(axis=1)
dreams_groups_intensity.iloc[:,[-2,-1]].head(3)

Unnamed: 0,Dominant_Emotion_pct,Dominant_Sentiment_pct
0,0.071429,0.214286
1,0.084677,0.237903
2,0.072607,0.181518


Calculo a que emoción/sentimiento pertenece esa mayor proporción

In [33]:
def emotions_f(row):
    if row['Dominant_Emotion_pct'] == row['Anger_pct']:
        val = "Anger"
    elif row['Dominant_Emotion_pct'] == row['Anticipation_pct']:
        val = "Anticipation"
    elif row['Dominant_Emotion_pct'] == row['Fear_pct']:
        val = "Fear"
    elif row['Dominant_Emotion_pct'] == row['Disgust_pct']:
        val = "Disgust"
    elif row['Dominant_Emotion_pct'] == row['Sadness_pct']:
        val = "Sadness"
    elif row['Dominant_Emotion_pct'] == row['Surprise_pct']:
        val = "Surprise"
    elif row['Dominant_Emotion_pct'] == row['Joy_pct']:
        val = "Joy"
    else:
        val = "Trust"
    return val

def sentiment_f(row):
    if row['Dominant_Sentiment_pct'] == row['Positive_pct']:
        val = "Positive"
    elif row['Dominant_Sentiment_pct'] == row['Negative_pct']:
        val = "Negative"
    else:
        val="Neutral"
    return val

In [34]:
dreams_groups_intensity['Dominant_Emotion'] = dreams_groups_intensity.apply(emotions_f, axis=1)
dreams_groups_intensity['Dominant_Sentment'] = dreams_groups_intensity.apply(sentiment_f, axis=1)

In [35]:
dreams_groups_intensity.head(3)

Unnamed: 0,code,note,description,words,group_id,Anger,Anticipation,Disgust,Fear,Joy,...,Disgust_pct,Sadness_pct,Surprise_pct,Positive_pct,Negative_pct,Neutral_pct,Dominant_Emotion_pct,Dominant_Sentiment_pct,Dominant_Emotion,Dominant_Sentment
0,1,1957,the one at the meadss house where its bigger i...,154.0,1,7,6,9,8,3,...,0.058442,0.058442,0.019481,0.12987,0.214286,0.019481,0.071429,0.214286,Trust,Negative
1,2,8/11/67,im at a family reunion in a large fine house w...,248.0,1,12,10,12,14,12,...,0.048387,0.084677,0.004032,0.157258,0.237903,0.004032,0.084677,0.237903,Sadness,Negative
2,3,8/1/85,i watch a plane fly past and shortly realize i...,303.0,1,12,16,11,17,12,...,0.036304,0.049505,0.029703,0.165017,0.181518,0.029703,0.072607,0.181518,Trust,Negative


In [36]:
dreams_groups_intensity.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 43132 entries, 0 to 43225
Data columns (total 31 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   code                    43132 non-null  object 
 1   note                    40997 non-null  object 
 2   description             43132 non-null  object 
 3   words                   43132 non-null  float64
 4   group_id                43132 non-null  int64  
 5   Anger                   43132 non-null  int64  
 6   Anticipation            43132 non-null  int64  
 7   Disgust                 43132 non-null  int64  
 8   Fear                    43132 non-null  int64  
 9   Joy                     43132 non-null  int64  
 10  Sadness                 43132 non-null  int64  
 11  Surprise                43132 non-null  int64  
 12  Trust                   43132 non-null  int64  
 13  Positive                43132 non-null  int64  
 14  Negative                43132 non-null

In [37]:
dreams_groups_intensity.to_csv(f'{data_directory}/sa/out/dreams_groups_intensity.csv', index = False)

## Punto 2b: Lexicon

In [38]:
dreams_groups_emotions['Positive']= dreams_groups_emotions.loc[:,positive_emotions].sum(axis=1)
dreams_groups_emotions['Negative']= dreams_groups_emotions.loc[:,negative_emotions].sum(axis=1)
dreams_groups_emotions['Neutral']= dreams_groups_emotions.loc[:,neutral_emotions].sum(axis=1)
dreams_groups_emotions.head(3)

Unnamed: 0,code,note,description,words,group_id,Positive,Negative,Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust,Neutral
0,1,1957,the one at the meadss house where its bigger i...,154.0,1,16,28,8,6,8,6,3,6,3,7,3
1,2,8/11/67,im at a family reunion in a large fine house w...,248.0,1,30,50,12,10,11,14,5,13,1,15,1
2,3,8/1/85,i watch a plane fly past and shortly realize i...,303.0,1,39,54,12,16,11,16,4,15,8,19,8


In [39]:
dreams_groups_emotions.iloc[2,2]

'i watch a plane fly past and shortly realize it was too low it crashes not half a block away rather as though it fell from a height of a few yards than flying straight into the ground and i think there might be survivors and that i should call the police before i get the number dialed i realize i want the fire department instead i get no ring try again and find the line is dead some people   at least one a woman are coming toward the house and i understand they are responsible for the dead phone and i should be suspicious of them i try to leave the house  run into a pregnant girl at the back; she seems to be in some sort of a bad way and has maybe started labor the woman from the plane comes this way calls the others to come get the pregnant girl and i pretend to be asleep or dead they dont buy it i guess and we all go inside â\xa0â\xa0â\xa0â\xa0â\xa0â\xa0 im not a prisoner i dont think so much as being detained for some reason and i go to my brothers old room he moved out of it a cou

In [40]:
for emo in all_emotions:
    exec("dreams_groups_emotions['{}_pct'] = dreams_groups_emotions['{}'] / dreams_groups_emotions['words']".format(emo,emo))

In [41]:
sentiment_pct=dreams_groups_emotions.columns[-3:]
emotions_pct=dreams_groups_emotions.columns[-11:-4]

In [42]:
sentiment_pct

Index(['Positive_pct', 'Negative_pct', 'Neutral_pct'], dtype='object')

In [43]:
emotions_pct

Index(['Joy_pct', 'Trust_pct', 'Anticipation_pct', 'Anger_pct', 'Fear_pct',
       'Disgust_pct', 'Sadness_pct'],
      dtype='object')

In [44]:
dreams_groups_emotions["Dominant_Emotion_pct"] = dreams_groups_emotions[emotions_pct].max(axis=1)
dreams_groups_emotions["Dominant_Sentiment_pct"] = dreams_groups_emotions[sentiment_pct].max(axis=1)
dreams_groups_emotions.iloc[:,[-2,-1]].head(3)

Unnamed: 0,Dominant_Emotion_pct,Dominant_Sentiment_pct
0,0.051948,0.181818
1,0.060484,0.201613
2,0.062706,0.178218


In [45]:
dreams_groups_emotions['Dominant_Emotion'] = dreams_groups_emotions.apply(emotions_f, axis=1)
dreams_groups_emotions['Dominant_Sentment'] = dreams_groups_emotions.apply(sentiment_f, axis=1)

In [46]:
dreams_groups_emotions.head(3)

Unnamed: 0,code,note,description,words,group_id,Positive,Negative,Anger,Anticipation,Disgust,...,Disgust_pct,Sadness_pct,Surprise_pct,Positive_pct,Negative_pct,Neutral_pct,Dominant_Emotion_pct,Dominant_Sentiment_pct,Dominant_Emotion,Dominant_Sentment
0,1,1957,the one at the meadss house where its bigger i...,154.0,1,16,28,8,6,8,...,0.051948,0.038961,0.019481,0.103896,0.181818,0.019481,0.051948,0.181818,Anger,Negative
1,2,8/11/67,im at a family reunion in a large fine house w...,248.0,1,30,50,12,10,11,...,0.044355,0.052419,0.004032,0.120968,0.201613,0.004032,0.060484,0.201613,Trust,Negative
2,3,8/1/85,i watch a plane fly past and shortly realize i...,303.0,1,39,54,12,16,11,...,0.036304,0.049505,0.026403,0.128713,0.178218,0.026403,0.062706,0.178218,Trust,Negative


In [47]:
dreams_groups_emotions.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 43132 entries, 0 to 43225
Data columns (total 31 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   code                    43132 non-null  object 
 1   note                    40997 non-null  object 
 2   description             43132 non-null  object 
 3   words                   43132 non-null  float64
 4   group_id                43132 non-null  int64  
 5   Positive                43132 non-null  int64  
 6   Negative                43132 non-null  int64  
 7   Anger                   43132 non-null  int64  
 8   Anticipation            43132 non-null  int64  
 9   Disgust                 43132 non-null  int64  
 10  Fear                    43132 non-null  int64  
 11  Joy                     43132 non-null  int64  
 12  Sadness                 43132 non-null  int64  
 13  Surprise                43132 non-null  int64  
 14  Trust                   43132 non-null

In [48]:
dreams_groups_emotions.to_csv(f'{data_directory}/sa/out/dreams_groups_emotions.csv', index = False)