## 9. sentiment_barplot
Plot distribution of sentiment for incoming tweets for each airline

**input**  :pickle file at /obj folder  
**output** :plot of sentiment distribution for each airline;  
            Pandas dataframe with sentiment category ratios for each airline

In [None]:
# Importing the necessary libraries.

import pandas as pd
import matplotlib.pyplot as plt
import pickle
import operator
import seaborn as sns

In [None]:
# Setting default specifications for our plots.

sns.set(font_scale = 2)
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = 20,15

In [None]:
# We open our pickle file with useful tweet data.

scores_pickle = pickle.load(open("obj/conversations_with_scores.p", "rb"))

In [None]:
# We create a class MyDict which will allow us to perform more operations on our dictionaries.

class MyDict(dict):
    def __add__(self, oth):
        r = self.copy()

        try:
            for key, val in oth.items():
                if key in r:
                    r[key] += val  # You can custom it here
                else:
                    r[key] = val
        except AttributeError:  # In case oth isn't a dict
            return NotImplemented  # The convention when a case isn't handled

        return r


In [None]:
# We count the number of tweets in which each airline is tagged or mentioned.
# We also create a list (mention_tweets) of all tweets tagging or mentioning any of the airlines under investigation.

airline_tags = ['@RyanAir', '@EtihadAirways', '@SingaporeAir', '@KLM', '@Qantas', '@VirginAtlantic', '@Lufthansa', '@AirBerlinassist', '@AirFrance', '@AirBerlin', '@British_Airways', '@easyJet', '@AmericanAir']
airline_names = ['Ryanair', 'Etihad Airways', 'Singapore Air', 'KLM', 'Qantas', 'Virgin Atlantic', 'Lufthansa', 'Air Berlin assist', 'Air France', 'Air Berlin', 'British Airways', 'easyJet', 'American Airlines']

total_tweet_count = {airline_names[i]:0 for i in range(len(airline_names))}
mention_tweets = []

for convo in scores_pickle:
    for tweet in convo:
        for i in range(len(airline_tags)):
            if (airline_tags[i].lower() or airline_names[i].lower()) in tweet[3].lower():
                mention_tweets.append(tweet)
                total_tweet_count[airline_names[i]] += 1


In [None]:
# We divide our tweets from mention_tweets into five categories for sentiment.

# Very negative tweets have a sentiment score below -0.6.
# Negative tweets have a sentiment score between -0.6 (inclusive) and -0.2.
# Neutral tweets have a sentiment score between -0.2 (inclusive) and 0.2 (inclusive).
# Positive tweets have a sentiment score between 0.2 and 0.6 (inclusive).
# Very positive tweets have a sentiment score above 0.6.

maxneg = []
minneg = []
neutral = []
minpos = []
maxpos = []

for tweet in mention_tweets:
    if tweet[4] < -0.6:
        maxneg.append(tweet)
    elif tweet[4] >= -0.6 and tweet[4] < -0.2:
        minneg.append(tweet)
    elif tweet[4] >= -0.2 and tweet[4] <= 0.2:
        neutral.append(tweet)
    elif tweet[4] > 0.2 and tweet[4] <= 0.6:
        minpos.append(tweet)
    elif tweet[4] > 0.6:
        maxpos.append(tweet)



In [None]:
# We create a dictionary with the total number of incoming tweets for each airline.
# This will be used when scaling our final plot such that the ratio of a certain sentiment,
# and not the nominal number of tweets, is shown.

total_tweets = {airline_names[i]:0 for i in range(len(airline_names))}

cat_list = [maxneg, minneg, neutral, minpos, maxpos]

for cat in cat_list:
    for tweet in cat:
        for i in range(len(airline_tags)):
            if (airline_tags[i].lower() or airline_names[i].lower()) in tweet[3].lower():
                total_tweets[airline_names[i]] += 1


In [None]:
# Now we are ready to compile our plot with the sentiment distribution for all airlines.

tweet_sum_by_air = {airline_names[i]:0 for i in range(len(airline_names))}

color_list = ['#FF0000', '#FFA500', '#FFD700', '#9ACD32', '#008000']
cat_name_list = ['Very negative', 'Negative', 'Neutral', 'Positive', 'Very positive']

index = 0

ratios_dict = {}

for cat in cat_list:
    cat_container = {airline_names[i]:0 for i in range(len(airline_names))}
    for tweet in cat:
        for i in range(len(airline_tags)):
            if (airline_tags[i].lower() or airline_names[i].lower()) in tweet[3].lower():
                cat_container[airline_names[i]] += 1
    for i in range(len(cat_container)):
        cat_container[airline_names[i]] = cat_container[airline_names[i]] / total_tweets[airline_names[i]]
    plt.bar(range(len(cat_container)), list(cat_container.values()), align='center', bottom=tweet_sum_by_air.values(), color=color_list[cat_list.index(cat)])
    plt.xticks(range(len(cat_container)), list(cat_container.keys()), rotation='vertical')
    tweet_sum_by_air = MyDict(tweet_sum_by_air) + MyDict(cat_container)
    ratios_dict.update({cat_name_list[index]:list(cat_container.values())})
    index += 1

plt.show()

In [None]:
# In order to get the numerical values of the ratios displayed in the plot, we compile a dataframe.

ratios_dict.update({'category':[*cat_container]})

ratios_df = pd.DataFrame(ratios_dict, columns=ratios_dict.keys())
ratios_df.set_index('category')
ratios_df = ratios_df.transpose()
ratios_df.columns = ratios_df.iloc[5]
ratios_df = ratios_df.drop(['category'])
ratios_df = ratios_df.iloc[::-1]
ratios_df

In [None]:
# Now, what are the sentiment ratios for American Airlines?

ratios_df['American Airlines']

In [None]:
print('Done')