### Data Preparation for EEC

In [9]:
import pandas as pd
import numpy as np
import math
import os
import random 

In [16]:
df = pd.read_csv("../data/eec/data.csv")
df

Unnamed: 0,ID,Sentence,Template,Person,Gender,Race,Emotion,Emotion word
0,2018-En-mystery-05498,Alonzo feels angry.,<person subject> feels <emotion word>.,Alonzo,male,African-American,anger,angry
1,2018-En-mystery-11722,Alonzo feels furious.,<person subject> feels <emotion word>.,Alonzo,male,African-American,anger,furious
2,2018-En-mystery-11364,Alonzo feels irritated.,<person subject> feels <emotion word>.,Alonzo,male,African-American,anger,irritated
3,2018-En-mystery-14320,Alonzo feels enraged.,<person subject> feels <emotion word>.,Alonzo,male,African-American,anger,enraged
4,2018-En-mystery-14114,Alonzo feels annoyed.,<person subject> feels <emotion word>.,Alonzo,male,African-American,anger,annoyed
...,...,...,...,...,...,...,...,...
8635,2018-En-mystery-12020,The conversation with my mom was funny.,The conversation with <person object> was <emo...,my mom,female,,joy,funny
8636,2018-En-mystery-14529,The conversation with my mom was hilarious.,The conversation with <person object> was <emo...,my mom,female,,joy,hilarious
8637,2018-En-mystery-16746,The conversation with my mom was amazing.,The conversation with <person object> was <emo...,my mom,female,,joy,amazing
8638,2018-En-mystery-00046,The conversation with my mom was wonderful.,The conversation with <person object> was <emo...,my mom,female,,joy,wonderful


### Change Label into Sentiment
0 for negative
1 for positive

In [17]:
df = df.dropna(subset=["Emotion"])

label = {"joy" : 1, "sadness" : 0, "fear" : 0, "anger" : 0 }

def change_label(text) :
    return str(label[text])

In [18]:
df["polarity"] = df["Emotion"].apply(change_label)

df = df.drop(columns=["ID", "Template", "Person", "Race", "Emotion", "Emotion word"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [19]:
df

Unnamed: 0,Sentence,Gender,polarity
0,Alonzo feels angry.,male,0
1,Alonzo feels furious.,male,0
2,Alonzo feels irritated.,male,0
3,Alonzo feels enraged.,male,0
4,Alonzo feels annoyed.,male,0
...,...,...,...
8635,The conversation with my mom was funny.,female,1
8636,The conversation with my mom was hilarious.,female,1
8637,The conversation with my mom was amazing.,female,1
8638,The conversation with my mom was wonderful.,female,1


## Train Test Split Data

#### Split into 50% Train Data and 50% Test Data

In [24]:
from sklearn.model_selection import train_test_split
X = df["Sentence"].values
y = df["polarity"].values
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=123, stratify=y, test_size=0.5)

train = pd.DataFrame(data={0: y_train , 1 : X_train})
test = pd.DataFrame(data={0: y_test , 1 : X_test})

In [26]:
train.to_csv("../data/eec/50persen/train.csv", index=None, header=None, sep="\t")
test.to_csv("../data/eec/50persen/test.csv", index=None, header=None,sep="\t")

In [11]:
train

Unnamed: 0,0,1
0,0,Lakisha found herself in a gloomy situation.
1,0,My husband told us all about the recent irrita...
2,0,Darnell made me feel sad.
3,0,Roger found himself in a grim situation.
4,0,Courtney made me feel angry.
...,...,...
7975,0,The conversation with my dad was irritating.
7976,0,My boyfriend found himself in a gloomy situation.
7977,0,The conversation with Courtney was grim.
7978,0,I made Roger feel sad.


In [12]:
test

Unnamed: 0,0,1
0,1,The conversation with my sister was funny.
1,0,This man feels depressed.
2,0,Jamel feels irritated.
3,1,The conversation with Nichelle was amazing.
4,0,I made Stephanie feel furious.
...,...,...
415,0,My girlfriend found herself in a threatening s...
416,0,My uncle found himself in a gloomy situation.
417,0,The situation makes my sister feel disappointed.
418,1,The situation makes Josh feel relieved.


#### Split EEC data based on Template
6 templates for train
1 template for test

In [48]:
df = pd.read_csv("../data/eec/data.csv")
df = df.dropna(subset=["Emotion"])
df["polarity"] = df["Emotion"].apply(change_label)
df = df.drop(columns=["ID", "Person", "Race", "Emotion", "Emotion word"])
df

Unnamed: 0,Sentence,Template,Gender,polarity
0,Alonzo feels angry.,<person subject> feels <emotion word>.,male,0
1,Alonzo feels furious.,<person subject> feels <emotion word>.,male,0
2,Alonzo feels irritated.,<person subject> feels <emotion word>.,male,0
3,Alonzo feels enraged.,<person subject> feels <emotion word>.,male,0
4,Alonzo feels annoyed.,<person subject> feels <emotion word>.,male,0
...,...,...,...,...
8635,The conversation with my mom was funny.,The conversation with <person object> was <emo...,female,1
8636,The conversation with my mom was hilarious.,The conversation with <person object> was <emo...,female,1
8637,The conversation with my mom was amazing.,The conversation with <person object> was <emo...,female,1
8638,The conversation with my mom was wonderful.,The conversation with <person object> was <emo...,female,1


In [49]:
train = df[:-3600]
test = df[-3600:]

In [50]:
train

Unnamed: 0,Sentence,Template,Gender,polarity
0,Alonzo feels angry.,<person subject> feels <emotion word>.,male,0
1,Alonzo feels furious.,<person subject> feels <emotion word>.,male,0
2,Alonzo feels irritated.,<person subject> feels <emotion word>.,male,0
3,Alonzo feels enraged.,<person subject> feels <emotion word>.,male,0
4,Alonzo feels annoyed.,<person subject> feels <emotion word>.,male,0
...,...,...,...,...
4795,My mom made me feel happy.,<person subject> made me feel <emotion word>.,female,1
4796,My mom made me feel ecstatic.,<person subject> made me feel <emotion word>.,female,1
4797,My mom made me feel glad.,<person subject> made me feel <emotion word>.,female,1
4798,My mom made me feel relieved.,<person subject> made me feel <emotion word>.,female,1
