# Sentiment Analysis Thing

In [2]:
import pandas as pd

# Download the test data
test_data_url = "https://raw.githubusercontent.com/google-research/google-research/master/goemotions/data/test.tsv"
# Download and load the train data:
train_data_url = 'https://raw.githubusercontent.com/google-research/google-research/2adf640a14f11025ae5a9d0ec493b78530d276d3/goemotions/data/train.tsv'
# Download and load the validation data:
dev_data_url = 'https://raw.githubusercontent.com/google-research/google-research/2adf640a14f11025ae5a9d0ec493b78530d276d3/goemotions/data/dev.tsv'

# Load the files into dataframes
train_data = pd.read_csv(train_data_url, sep='\t')
dev_data = pd.read_csv(dev_data_url, sep='\t')
test_data = pd.read_csv(test_data_url, sep='\t')

# Data Understanding

## Checking each dataset

In [3]:
# Checking each dataset
train_data.head(2)

Unnamed: 0,My favourite food is anything I didn't have to cook myself.,27,eebbqej
0,"Now if he does off himself, everyone will thin...",27,ed00q6i
1,WHY THE FUCK IS BAYLESS ISOING,2,eezlygj


In [4]:
dev_data.head(2)

Unnamed: 0,Is this in New Orleans?? I really feel like this is New Orleans.,27,edgurhb
0,"You know the answer man, you are programmed to...",427,ee84bjg
1,I've never been this sad in my life!,25,edcu99z


In [5]:
test_data.head(2)

Unnamed: 0,"I’m really sorry about your situation :( Although I love the names Sapphira, Cirilla, and Scarlett!",25,eecwqtt
0,It's wonderful because it's awful. At not with.,0,ed5f85d
1,"Kings fan here, good luck to you guys! Will be...",13,een27c3


## Describing the Data

In [6]:
# checking shapes
print("train_data", train_data.shape)

train_data (43409, 3)


In [7]:
print("dev_data", dev_data.shape)

dev_data (5425, 3)


In [8]:
print("test_data", test_data.shape)

test_data (5426, 3)


## Adding a header

We add a header to the test data since it does not have one

In [9]:
header = ["comment", "emotion", "id"]
# Add header to test data
test_data.columns = header
# Print the data now
test_data.head(4)

Unnamed: 0,comment,emotion,id
0,It's wonderful because it's awful. At not with.,0,ed5f85d
1,"Kings fan here, good luck to you guys! Will be...",13,een27c3
2,"I didn't know that, thank you for teaching me ...",15,eelgwd1
3,They got bored from haunting earth for thousan...,27,eem5uti


# Understanding Emotions

We have 28 emotions?

In [10]:
# Check emotions in the code. the "emotion" column
test_data.emotion.unique()

array(['0', '13', '15', '27', '24', '25', '3,10', '1,18', '8', '0,7',
       '14', '10', '25,27', '1', '6,27', '0,18', '7', '20', '4,27', '0,1',
       '5', '17,26', '4', '10,27', '14,27', '18', '6,20', '9', '3',
       '5,20', '10,18', '0,22', '2,3', '7,27', '6', '7,17', '22', '26',
       '13,27', '5,18', '3,27', '2', '17,18', '5,18,20', '0,26', '7,8',
       '11', '17,20', '3,25', '12', '15,18', '1,4', '4,15', '5,24',
       '24,25', '22,27', '15,22', '19', '17', '21', '3,10,11', '3,4',
       '0,17', '2,11', '1,3', '8,20', '14,25', '20,26', '7,26', '7,12',
       '3,7', '0,20', '0,4', '2,27', '8,26', '4,18', '25,26', '3,11',
       '5,8,20', '9,25', '1,27', '1,6', '3,6,7', '13,26', '15,26', '4,5',
       '4,10', '13,20', '8,27', '4,8', '5,10', '0,1,4', '5,15', '6,15',
       '5,8', '10,11', '4,17', '15,27', '0,21', '0,10', '4,9', '3,5',
       '15,20', '0,1,17', '3,9', '23', '17,27', '1,15', '0,27', '0,11',
       '19,27', '16', '15,17', '0,13,18', '0,13', '17,23', '0,15', '3,14',


### Getting all emotions into a list

In [11]:
test_data.emotion.str.split(',').head(10)
# append all the emotions in the test data in a list but convert each one into integer
emotions = []
for i in test_data.emotion.str.split(','):
    for j in i:
        emotions.append(int(j))
emotions[:10]

[0, 13, 15, 27, 15, 15, 15, 24, 25, 3]

### All emotions into a set to check all emotions available

In [12]:
# There are 28 emotions in the test data
set(emotions)
# Show only 10
list(set(emotions))[:10]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [13]:
"""
Below are the 28 emotions in the test data
admiration
amusement
anger
annoyance
approval
caring
confusion
curiosity
desire
disappointment
disapproval
disgust
embarrassment
excitement
fear
gratitude
grief
joy
love
nervousness
optimism
pride
realization
relief
remorse
sadness
surprise
neutral
"""
# Making a dictionary of the emotions
emotions_dict = {
    0: "admiration",
    1: "amusement",
    2: "anger",
    3: "annoyance",
    4: "approval",
    5: "caring",
    6: "confusion",
    7: "curiosity",
    8: "desire",
    9: "disappointment",
    10: "disapproval",
    11: "disgust",
    12: "embarrassment",
    13: "excitement",
    14: "fear",
    15: "gratitude",
    16: "grief",
    17: "joy",
    18: "love",
    19: "nervousness",
    20: "optimism",
    21: "pride",
    22: "realization",
    23: "relief",
    24: "remorse",
    25: "sadness",
    26: "surprise",
    27: "neutral"
}