# **Trump Twitter Insults**

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install chart_studio

In [None]:
from matplotlib import pyplot as plt
import seaborn as sns
import chart_studio.plotly as py
import cufflinks as cf
import plotly.graph_objs as go
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
from PIL import Image
%matplotlib inline

from plotly.offline import download_plotlyjs, plot, init_notebook_mode, iplot
init_notebook_mode(connected=True)# initiate notebook for offline plot
cf.go_offline()

**Import Data from csv**

In [None]:
trump_df = pd.read_csv("../input/all-trumps-twitter-insults-20152021/trump_insult_tweets_2014_to_2021.csv")
trump_df.head(10)

**Check Data Shape**

In [None]:
trump_df.shape

**Data Cleaning** - Removing missing or null values

In [None]:
trump_df.isnull().sum()

We can observe that out of **10360 rows** there are **only 2 null values** in target column. Delete them

In [None]:
trump_df.dropna(inplace=True)
trump_df.shape

In [None]:
# verify
trump_df.isnull().sum()

Count of unique topic, people, community, etc. targeted.

In [None]:
trump_df.target.unique().shape

We can observe that *out of 10358 targets only 866 are unique*. So people, community etc. are targeted multiple times by his tweets

In [None]:
unique_targetcounts = trump_df.target.value_counts()
unique_targetcounts

A bar plot to visualize Top 25 Targets:

In [None]:
trace = go.Bar(x=unique_targetcounts.index[:25], y=unique_targetcounts,
              marker=dict(
                  opacity=0.8,
                  color=np.arange(25)
              ))

fig = go.Figure(data=[trace])
fig.update_layout(title="Top 25 Targets")
fig.update_xaxes(title="Target")
fig.update_yaxes(title="Frequency")

iplot(fig)

WordCloud to visualize commonly used Targets

In [None]:
target_text = " ".join(trump_df.target)
wordcloud = WordCloud(width=1400, height=700).generate(text=target_text)
plt.figure(figsize=(30,18))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.show()

WordCloud to visualize commonly used Insults in his Tweets

In [None]:
insult_text = " ".join(trump_df.insult)
wordcloud = WordCloud(width=1400, height=700).generate(text=insult_text)
plt.figure(figsize=(30,18))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.show()

We can observe from the above WordCloud that commonly used insults were Fake, News, corrupt people, crooked etc. He tried to declare everything Fake and corrupt who criticized them - mainly the media

In [None]:
# set date as index and then convert it from str into DateTime
trump_df.set_index(trump_df.date, inplace=True)
trump_df.index = pd.to_datetime(trump_df.index, format="%Y/%m/%d")
trump_df.index

Count of Tweets made year on year

In [None]:
datecount_data = trump_df.index.value_counts().resample("Y").count()
datecount_data

In [None]:
 countsbydate = trump_df.index.value_counts().resample("m").count()
trace1 = go.Scatter(x=datecount_data.index, y=datecount_data)
fig = go.Figure(data=[trace1])

fig.update_xaxes(title="Year")
fig.update_yaxes(title="Tweet Count")

iplot(fig)

As observed above both from Bar Plot and WordCloud maximum tweets were made about **the-media**

In [None]:
# filtering tweets where target is "the-media"
media_insults = trump_df.target[trump_df.target == "the-media"].count()
other_insults = trump_df.target.count() - media_insults

In [None]:
# visualization of other insults vs The Media insults
explode=[0, 0.2]
plt.style.use('fivethirtyeight')
plt.pie([other_insults, media_insults],labels=["Other Insults", "Media Insults"], explode=explode, 
        shadow=True, wedgeprops={"edgecolor" : "#2d2d2d"},
       radius=1.2, autopct="%1.1f%%")

plt.show()

**Hillary-Clinton** and **Joe-Biden** as Targets in Trump's Tweets

In [None]:
hillary_clinton_target = trump_df.index[trump_df.target == "hillary-clinton"].value_counts().resample("m").count()
joe_biden_target = trump_df.index[trump_df.target == "joe-biden"].value_counts().resample("m").count()

In [None]:
trace1 = go.Scatter(x=hillary_clinton_target.index, y=hillary_clinton_target, 
                    mode="markers", name="Hillary-Clinton", marker=dict(
                    size=hillary_clinton_target*2
                    ))
trace2 = go.Scatter(x=joe_biden_target.index, y=joe_biden_target, 
                    mode="markers", name="Joe-Biden", marker=dict(
                    size=joe_biden_target*2
                    ))

fig = go.Figure(data=[trace1, trace2])

fig.update_layout(title="Hillary-Clinton vs Joe-Biden Tweet Target (2016-2021)")
fig.update_xaxes(title="Year")
fig.update_yaxes(title="Times Targeted")

iplot(fig)

We can **observe a shift** from Hillary-Clinton as target (in 2016 elections) to Joe-Biden as target (in 2020 elections)