<a href="https://imgur.com/shNBFGq"><img src="https://i.imgur.com/shNBFGq.jpg" title="source: imgur.com" /></a>

# Libraries 

In [None]:
import numpy as np 
import pandas as pd 
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from PIL import Image
from wordcloud import WordCloud, STOPWORDS , ImageColorGenerator

In [None]:
url='../input/all-trumps-twitter-insults-20152021/trump_insult_tweets_2014_to_2021.csv'
df=pd.read_csv(url)
df.head()

### Drop Nan

In [None]:
df.dropna(inplace=True)

# Data Manipulation

## Time Series columns 

In [None]:
from datetime import date
df['date']=pd.to_datetime(df['date'])
#====
L = ['year', 'month', 'day', 'dayofweek', 'dayofyear', 'weekofyear', 'quarter']
df = df.join(pd.concat((getattr(df['date'].dt, i).rename(i) for i in L), axis=1))
df['year'].value_counts()

### Count Hashtags in Tweets 

In [None]:
df['hash'] = df['tweet'].apply(lambda word:word.count('#'))

### Count Mentions in Tweets 

In [None]:
df['men'] = df['tweet'].apply(lambda word:word.count('@'))

### Tweet Length Characters & Class

In [None]:
df['tweet_length_ch']=df['tweet'].apply(lambda x:len(x))
df=df.loc[df['tweet_length_ch']<=280]

#=== 
df['tweet_length']=df['tweet_length_ch'].apply(lambda x:'short' if x <=130 else 'long')

###  Media 

In [None]:
df['med'] = df['tweet'].apply(lambda word:word.count('https://t.co/'))
df['med'].unique()

In [None]:
df_copy=df.copy()
df_copy2=df.copy()

# EDA 

## Check Tweets Length 

In [None]:
sns.boxplot(df['tweet_length_ch'])

## Tweet Length Distribution 

In [None]:
plt.figure(figsize=(14,5))
iris = df_copy['tweet_length_ch']
sns.kdeplot(data=iris)

## Most Targets in a Tweet 

In [None]:
insult_tw=df_copy.groupby('tweet',as_index=False).agg({'insult':'count'})
insult_tw.describe()

In [None]:
insult_tw_75 = insult_tw.loc[insult_tw['insult']==16]
print('Most tweet have insulted Targets is : ',insult_tw_75.values)

<a href="https://imgur.com/8kjeKny"><img src="https://i.imgur.com/8kjeKny.png" title="source: imgur.com" /></a>

## The Media

In [None]:
df_media=df_copy.loc[df_copy['target']=='the-media']
print('Most insult word with The Media was : ',df_media['insult'].value_counts()[:1])
#==============
tweet_All = " ".join(insul for insul in df_media.insult)


fig, ax = plt.subplots(1, 1, figsize  = (12,10))

wordcloud_ALL = WordCloud(max_font_size=50, max_words=100,colormap="inferno", background_color="white").generate(tweet_All)

ax.imshow(wordcloud_ALL, interpolation='bilinear')

ax.axis('off');

## Joe Biden

<a href="https://imgur.com/mXAQKkV"><img src="https://i.imgur.com/mXAQKkV.jpg" title="source: imgur.com" /></a>

In [None]:
df_bide=df_copy.loc[df_copy['target']=='joe-biden']
print('Most insult word with joe biden was : ',df_bide['insult'].value_counts()[:1])
#==============
tweet_All = " ".join(insul for insul in df_bide.insult)


fig, ax = plt.subplots(1, 1, figsize  = (12,10))

wordcloud_ALL = WordCloud(max_font_size=50, max_words=100,colormap='gray', background_color="white").generate(tweet_All)

ax.imshow(wordcloud_ALL, interpolation='bilinear')

ax.axis('off');

## Hillary-Clinton

<a href="https://imgur.com/NzM9SSq"><img src="https://i.imgur.com/NzM9SSq.png" title="source: imgur.com" /></a>

In [None]:

df_hc=df_copy.loc[df_copy['target']=='hillary-clinton']
print('Most insult word with hillary-clinton was : ',df_hc['insult'].value_counts()[:1])
#==============
tweet_All = " ".join(insul for insul in df_hc.insult)


fig, ax = plt.subplots(1, 1, figsize  = (12,10))

wordcloud_ALL = WordCloud(max_font_size=50, max_words=100,colormap="Blues", background_color="skyblue").generate(tweet_All)

ax.imshow(wordcloud_ALL, interpolation='bilinear')

ax.axis('off');

## Russia-Trump

<a href="https://imgur.com/avUKB1f"><img src="https://i.imgur.com/avUKB1f.png" title="source: imgur.com" /></a>

In [None]:

df_trump_russia =df_copy.loc[df_copy['target']=='trump-russia']
print('Most insult word with df_trump_russia was : ',df_trump_russia['insult'].value_counts()[:1])
#==============

tweet_All = " ".join(insul for insul in df_trump_russia.insult)

fig, ax = plt.subplots(1, 1, figsize  = (12,10))
wordcloud_ALL = WordCloud(max_font_size=50,colormap="Reds", max_words=100, background_color="white").generate(tweet_All)

ax.imshow(wordcloud_ALL, interpolation='bilinear')

ax.axis('off');

# Data Visualization( EDA ) 

## Tweet Length Class - Pie Chart

In [None]:
plt.figure(figsize=(10,7))
labels = 'Long', 'Short'
sizes = [8748,1610]
explode = (0.1, 0)  
plt.figure(figsize=(10,5))
plt.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90);
plt.axis('equal');  

## Mentions in Tweet Length 

In [None]:
plt.figure(figsize=(14,7))
sns.countplot(data=df,x='tweet_length',hue='men').set_title('Mention / Tweet Length');

## Media in tweets 

In [None]:
plt.figure(figsize=(14,7))
No_Media= len(df[df['med']==0])
Media = len(df[df['med']>0])
Platform = ['NoMedia','Media']
Count = [No_Media,Media]
#====
fig = px.pie(names = Platform,
             values = Count,
             title='Media/No Media',
            color_discrete_sequence = px.colors.sequential.Agsunset)
fig.update_traces(textposition='inside', textinfo='percent+label')

## 3D Length-Hashtag-Mentions

### X = Hashtag                      ,            Y = Mention            ,  Z = Tweet Length

In [None]:
d3 = df_copy[['tweet_length_ch','men','hash','tweet_length']]
hashtag=df_copy['hash'].values
mention=df_copy['men'].values
length=df_copy['tweet_length_ch'].values
L= df_copy['tweet_length'].values
trace = go.Scatter3d(x=hashtag,y=mention,z=length,mode='markers',marker=dict(size=5,color="crimson"))
fig=go.Figure(data=[trace])
fig.show()

## Top 10 Targets 

In [None]:
r_op =df['target'].value_counts()
r_op = r_op[:10]
sns.set_style("darkgrid")
plt.figure(figsize=(20,6));
r_op_vis = sns.barplot(r_op.index, r_op.values, alpha=0.8,palette="inferno");
plt.title('Trump Targets',fontsize=15);
plt.ylabel('insults', fontsize=12);
plt.xlabel('Target', fontsize=12);
r_op_vis.set_xticklabels(rotation=30,labels=r_op.index,fontsize=15);
plt.show();

## Most Target People by year  

In [None]:
from IPython.core.display import HTML
HTML('''<div class="flourish-embed flourish-chart" data-src="visualisation/5060515"><script src="https://public.flourish.studio/resources/embed.js"></script></div>''')

## Most insults appears 

In [None]:

tweet_All = " ".join(insul for insul in df.insult)


fig, ax = plt.subplots(1, 1, figsize  = (10,10))

wordcloud_ALL = WordCloud(max_font_size=50, max_words=100, background_color="white").generate(tweet_All)

ax.imshow(wordcloud_ALL, interpolation='bilinear')

ax.axis('off')

In [None]:
dftime = df.groupby('year',as_index=False).agg({'insult':'count'}).reset_index()
px.line(x=dftime['year'],y=dftime['insult'],title='insult by year')

In [None]:
dftime_dw = df.groupby('dayofweek',as_index=False).agg({'insult':'count'}).reset_index()
px.line(x=dftime_dw['dayofweek'],y=dftime_dw['insult'],title='insult by Daysofweek')

In [None]:
dftime_q = df.groupby('quarter',as_index=False).agg({'insult':'count'}).reset_index()
px.line(x=dftime_q['quarter'],y=dftime_q['insult'],title='insult by quarter')

In [None]:
tweets = df['tweet'].drop_duplicates()

In [None]:
all_sentences = []

for word in tweets:
    all_sentences.append(word)

all_sentences

lines = list()
for line in all_sentences:    
    words = line.split()
    for w in words: 
       lines.append(w)

### Removing Punctuation


In [None]:
import re

lines = [re.sub(r'[^A-Za-z0-9]+', '', x) for x in lines]

lines

lines2 = []

for word in lines:
    if word != '':
        lines2.append(word)

### Gettig Words roots

In [None]:
from nltk.stem.snowball import SnowballStemmer
s_stemmer = SnowballStemmer(language='english')

stem = []
for word in lines2:
    stem.append(s_stemmer.stem(word))

# Top Mention Keywords

In [None]:
import spacy
nlp = spacy.load('en_core_web_lg')

In [None]:
stem2 = []

for word in stem:
    if word not in nlp.Defaults.stop_words:
        stem2.append(word)

In [None]:
df = pd.DataFrame(stem2)
df = df[0].value_counts()

In [None]:
df = df[:20,]
#== 
px.bar(df, x=df.values,y= df.index, color=df.index, height=500)

# Top Mention Organizations


In [None]:
import spacy
from spacy import displacy
from collections import Counter
import en_core_web_sm
nlp = en_core_web_sm.load()
#====== 
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text + ' - ' + ent.label_ + ' - ' + str(spacy.explain(ent.label_)))
#======
nlp = spacy.load('en_core_web_sm') 
nlp.max_length = 2000000000000
#=====
str1 = " " 
stem2 = str1.join(lines2)

stem2 = nlp(stem2)

label = [(X.text, X.label_) for X in stem2.ents]

df6 = pd.DataFrame(label, columns = ['Word','Entity'])

df7 = df6.where(df6['Entity'] == 'ORG')

df7 = df7['Word'].value_counts()

![](https://www.pngitem.com/pimgs/m/41-412092_as-seen-on-abc-cbs-fox-nbc-cnn.png)

In [None]:
df = df7[:20,]
plt.figure(figsize=(10,5))
px.bar(df, x=df.values,y= df.index, color=df.index, height=500)

# Top mention People

In [None]:
nlp = spacy.load('en_core_web_sm') 
nlp.max_length = 2000000000000

str1 = " " 
stem2 = str1.join(lines2)

stem2 = nlp(stem2)

label = [(X.text, X.label_) for X in stem2.ents]

df10 = pd.DataFrame(label, columns = ['Word','Entity'])

df10 = df10.where(df10['Entity'] == 'PERSON')

df11 = df10['Word'].value_counts()

![](https://s3-eu-west-1.amazonaws.com/tutor2u-media/subjects/politics/Democrats.png?mtime=20150924080302)

In [None]:
df = df11[:20,]

plt.figure(figsize=(10,5))

df = df11[:20,]
plt.figure(figsize=(10,5))
px.bar(df, x=df.values,y= df.index, color=df.index, height=500)

# Sentiment Analysis 

### Removing characters

In [None]:
features=tweets.values
#=== 
processed_features = []

for sentence in range(0, len(features)):
    # Remove all the Http: urls
    processed_feature = re.sub('(https?://\S+)', '', str(features[sentence]))
    
    # Remove all the special characters
    processed_feature = re.sub(r'\W', ' ', processed_feature)

    # Remove all single characters
    processed_feature= re.sub(r'\s+[a-zA-Z]\s+', ' ', processed_feature)

    # Remove single characters from the start
    processed_feature = re.sub(r'\^[a-zA-Z]\s+', ' ', processed_feature) 

    # Substituting multiple spaces with single space
    processed_feature = re.sub(r'\s+', ' ', processed_feature, flags=re.I)

    # Removing prefixed 'b'
    processed_feature = re.sub(r'^b\s+', '', processed_feature)

    # Converting to Lowercase
    processed_feature = processed_feature.lower()

    processed_features.append(processed_feature)


### Adding Subjectivity & Polarity

In [None]:
df3=pd.DataFrame()
df3['Tweets']=processed_features
#=======
from textblob import TextBlob
from wordcloud import WordCloud
# Create a function to get the subjectivity
def getSubjectivity(text):
   return TextBlob(text).sentiment.subjectivity

# Create a function to get the polarity
def getPolarity(text):
   return  TextBlob(text).sentiment.polarity


# Create two new columns 'Subjectivity' & 'Polarity'
df3['Subjectivity'] = df3['Tweets'].apply(getSubjectivity)
df3['Polarity'] = df3['Tweets'].apply(getPolarity)


# Sentiment Analysis

In [None]:
#Create a function to compute negative (-1), neutral (0) and positive (+1) analysis
def getAnalysis(score):
 if score < 0:
  return 'Negative'
 elif score == 0:
  return 'Neutral'
 else:
  return 'Positive'
df3['Analysis'] = df3['Polarity'].apply(getAnalysis)
df3

In [None]:
Neutral = len(df3[df3['Analysis']=='Neutral'])
Negative = len(df3[df3['Analysis']=='Negative'])
Positive = len(df3[df3['Analysis']=='Positive'])
labels = ['Negative','Positive','Neutral']
values = [Negative,Positive,Neutral]
#====
import plotly.graph_objects as go
colors = ['red','green', 'lightblue' ]

fig = go.Figure(data=[go.Pie(labels=labels,
                             values=values)])
fig.update_traces(hoverinfo='label+percent', textinfo='percent', textfont_size=20,textposition='inside',
                  marker=dict(colors=colors, line=dict(color='grey', width=1)))
fig.show()

# Sentiments By Time 

In [None]:
df_copy['tweet']=df_copy['tweet'].drop_duplicates(inplace=True)
df3['year']=df_copy['year']
#=== 
df_copy[['sentiment']]=df3['Analysis']
df_tim_sen = df_copy[['year','sentiment']]
df_copy['year'].value_counts()
#=== 
df_time_sen =pd.get_dummies(df_tim_sen).groupby('year').sum().reset_index()
df_time_sen =df_time_sen.sort_values('year',ascending=True)
#=======
plt.style.use('seaborn-whitegrid')
plt.figure(figsize=(14,7))
plt.plot(df_time_sen['year'] ,df_time_sen['sentiment_Negative'],marker='o',label='Negative') 
plt.plot(df_time_sen['year'] , df_time_sen['sentiment_Neutral'],color='blue',marker='*',label='Neutral')  
plt.plot(df_time_sen['year'] ,df_time_sen['sentiment_Positive'],color='green',marker='+',label='Positive') 
#=== 
plt.annotate('High Negative insult tweets', xy=(2018, 850),  xycoords='data',
            xytext=(0.8, 0.95), textcoords='axes fraction',
            arrowprops=dict(facecolor='black', shrink=0.10),
            horizontalalignment='center', verticalalignment='top',
            )

## Tweets Length in Sentiments 

In [None]:
df_copy2.drop_duplicates(subset=['tweet'])
df3['insult']=df_copy2['insult']
df3['target']=df_copy2['target']
df3['med']=df_copy2['med']
df3['tweet_length']=df_copy2['tweet_length']
#==== 
plt.figure(figsize=(14,5))
sns.countplot(x='Analysis',data=df3,hue='tweet_length',palette="inferno")

## Media in Sentiments 

In [None]:
plt.figure(figsize=(14,5))
sns.countplot(x='Analysis',data=df3,hue='med',palette="Oranges")

# ADVANCED ANIMATED TARGET CARDS 
### Shows Top Targets in Trump insult Tweets and how Trump insult them 

In [None]:
from IPython.core.display import HTML
HTML('''<div class="flourish-embed flourish-cards" data-src="visualisation/5123150"><script src="https://public.flourish.studio/resources/embed.js"></script></div>''')

<img src="https://media.giphy.com/media/xTiTnHXbRoaZ1B1Mo8/source.gif">

#  Conclusion 



* Most of The Insult Tweets about Fake News and Democrats 

* Trump use  The  sarcastic expressions to insult The others or situations ( Crooked Hillary - Sleepy Joe - Witch Hunt )

* 25% of trump insults tweets have more than 2 insults

* Tweets have more insults when Trump talks about media and newspapers

* Trump use insults as a style in defence of any person or institution

* 85% of Trump insult tweets Length is long 

* only 10% of  Trump insult tweets have no media 

* Most target people Hillary Clinton, Joe Biden and Adam Schiff

* More insults tweets started from 2017 

* Trump typed more insults tweets in weekends 

* Trump typed more insults tweets in the last quarter of the year

* Trump insults newspapers and democratic the most 

* 50% of tweets are Negative 

* Trump  write more insult negative tweets in 2018  