In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import plotly.graph_objects as go
import plotly.express as px

from wordcloud import WordCloud
from IPython.display import Image

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
%matplotlib inline

In [None]:
df_train = pd.read_csv("../input/goemotions/data/train.tsv", sep='\t', header=None, names=['Text', 'Class', 'ID'])
df_dev = pd.read_csv("../input/goemotions/data/dev.tsv", sep='\t', header=None, names=['Text', 'Class', 'ID'])
df_test = pd.read_csv("../input/goemotions/data/test.tsv", sep='\t', header=None, names=['Text', 'Class', 'ID'])

In [None]:
df_train.head()

In [None]:
df_train['List of classes'] = df_train['Class'].apply(lambda x: x.split(','))

In [None]:
df_train['Len of classes'] = df_train['List of classes'].apply(lambda x: len(x))

### Maximum Number of labels assigned to a piece of text

In [None]:
df_train['Len of classes'].max()

In [None]:
df_train[df_train['Len of classes'] == 5]

Maximum number of labels assigned to a text are 5  

## Let's Check the distribution of the number of labels for each piece of text

In [None]:
temp_df = df_train["Len of classes"].value_counts().sort_index()

trace1 = go.Bar(
                x = ['1','2','3','4','5'],
                y = temp_df.tolist(),
                marker = dict(color = 'rgb(250,13,92)',
                              line=dict(color='rgb(0,0,0)',width=1.5)),
                text=temp_df.tolist(), textposition='outside',
                width=[0.5, 0.5, 0.5, 0.5, 0.5])
layout = go.Layout(template= "plotly_dark",title = 'Number of classes' , xaxis = dict(title = 'Class Numbers'), yaxis = dict(title = 'Count'))
fig = go.Figure(data = [trace1], layout = layout)
fig.show()

Maximum texts are assigned **SINGLE** label

## Let's Convert our dataframe to the desired format for Multi-Label Classification 

In [None]:
Image(filename='../input/goemotions/GoEmotionsFormat.PNG')

In [None]:
emotions_map = {}

In [None]:
with open('../input/goemotions/data/emotions.txt', 'r') as f:
    for i,emotion in enumerate(f):
        emotions_map[str(i)] = emotion.strip()

In [None]:
emotions_map

In [None]:
for i in emotions_map:
    df_train[emotions_map[i]] = df_train['List of classes'].apply(lambda x: 1 if i in x else 0)

In [None]:
df_train.drop(['Class', 'ID', 'List of classes', 'Len of classes'], axis=1, inplace =True)

In [None]:
df_train.head()

In [None]:
df_train.to_csv("processed_train.csv", index=False)

In [None]:
emotion_list = list(emotions_map.values())

In [None]:
temp_list = df_train.drop(['Text'], axis=1).sum(axis=0).tolist()
trace1 = go.Bar(
                x = emotion_list,
                y = temp_list,
                marker = dict(color = 'rgb(127, 16, 238)',
                              line=dict(color='rgb(0,0,0)',width=1.5)),
                text=temp_list, textposition='outside')
layout = go.Layout(template= "plotly_dark",title = 'NUMBER OF EMOTIONS' , xaxis = dict(title = 'Emotion'), yaxis = dict(title = 'Count'))
fig = go.Figure(data = [trace1], layout = layout)
fig.show()

Grief has the lowest Occurence, while neutral has the highest ocuurence followed by admiration 

# Now Let's Check the Wordclouds related to each Emotion

In [None]:
df_emotions = pd.read_csv("../input/goemotions/tables/emotion_words.csv")

In [None]:
df_emotions.head()

In [None]:
wc_dict = {}

In [None]:
for i in df_emotions['emotion'].unique():
    wc_dict[i] = {}
    odds_list = df_emotions[df_emotions['emotion'] == i]['odds'].tolist()
    word_list = df_emotions[df_emotions['emotion'] == i]['word'].tolist()
    wc_dict[i] = dict(dict(zip(word_list,odds_list)))

In [None]:
wc_dict

In [None]:
# Define a function to plot word cloud
def plot_cloud(wordcloud):
    # Set figure size
    plt.figure(figsize=(40, 30))
    # Display image
    plt.imshow(wordcloud) 
    # No axis details
    plt.axis("off");

In [None]:
wordcloud = WordCloud(width = 1000, height = 500, random_state=1, background_color='black', colormap='Set2', collocations=False)

## Admiration

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['admiration']))

## Amusement

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['amusement']))

## Anger

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['anger']))

## Annoyance

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['annoyance']))

## Approval

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['approval']))

## Caring

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['caring']))

## Confusion

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['confusion']))

## Curiosity

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['curiosity']))

## Desire

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['desire']))

## Disappointment

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['disappointment']))

## Disapproval

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['disapproval']))

## Disgust

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['disgust']))

## Embarrassment

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['embarrassment']))

## Excitement

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['excitement']))

## Fear

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['fear']))

## Gratitude

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['gratitude']))

## Grief

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['grief']))

## Joy

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['joy']))

## Love

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['love']))

## Nervousness

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['nervousness']))

## Optimism

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['optimism']))

## Pride

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['pride']))

## Realization

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['realization']))

## Relief

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['relief']))

## Remorse

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['remorse']))

## Sadness

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['sadness']))

## Surprise

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['surprise']))

## Neutral

In [None]:
plot_cloud(wordcloud.generate_from_frequencies(wc_dict['neutral']))