# 1. Imports

In [77]:
from matplotlib import pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
from scipy import stats
import seaborn as sns
import pandas as pd
%matplotlib inline

In [78]:
iso_639_choices = [('ab', 'Abkhaz'),
                   ('aa', 'Afar'),
                   ('af', 'Afrikaans'),
                   ('ak', 'Akan'),
                   ('sq', 'Albanian'),
                   ('am', 'Amharic'),
                   ('ar', 'Arabic'),
                   ('an', 'Aragonese'),
                   ('hy', 'Armenian'),
                   ('as', 'Assamese'),
                   ('av', 'Avaric'),
                   ('ae', 'Avestan'),
                   ('ay', 'Aymara'),
                   ('az', 'Azerbaijani'),
                   ('bm', 'Bambara'),
                   ('ba', 'Bashkir'),
                   ('eu', 'Basque'),
                   ('be', 'Belarusian'),
                   ('bn', 'Bengali'),
                   ('bh', 'Bihari'),
                   ('bi', 'Bislama'),
                   ('bs', 'Bosnian'),
                   ('br', 'Breton'),
                   ('bg', 'Bulgarian'),
                   ('my', 'Burmese'),
                   ('ca', 'Catalan; Valencian'),
                   ('ch', 'Chamorro'),
                   ('ce', 'Chechen'),
                   ('ny', 'Chichewa; Chewa; Nyanja'),
                   ('zh', 'Chinese'),
                   ('cv', 'Chuvash'),
                   ('kw', 'Cornish'),
                   ('co', 'Corsican'),
                   ('cr', 'Cree'),
                   ('hr', 'Croatian'),
                   ('cs', 'Czech'),
                   ('da', 'Danish'),
                   ('dv', 'Divehi; Maldivian;'),
                   ('nl', 'Dutch'),
                   ('dz', 'Dzongkha'),
                   ('en', 'English'),
                   ('eo', 'Esperanto'),
                   ('et', 'Estonian'),
                   ('ee', 'Ewe'),
                   ('fo', 'Faroese'),
                   ('fj', 'Fijian'),
                   ('fi', 'Finnish'),
                   ('fr', 'French'),
                   ('ff', 'Fula'),
                   ('gl', 'Galician'),
                   ('ka', 'Georgian'),
                   ('de', 'German'),
                   ('el', 'Greek, Modern'),
                   ('gn', 'Guaraní'),
                   ('gu', 'Gujarati'),
                   ('ht', 'Haitian'),
                   ('ha', 'Hausa'),
                   ('he', 'Hebrew (modern)'),
                   ('hz', 'Herero'),
                   ('hi', 'Hindi'),
                   ('ho', 'Hiri Motu'),
                   ('hu', 'Hungarian'),
                   ('ia', 'Interlingua'),
                   ('id', 'Indonesian'),
                   ('ie', 'Interlingue'),
                   ('ga', 'Irish'),
                   ('ig', 'Igbo'),
                   ('ik', 'Inupiaq'),
                   ('io', 'Ido'),
                   ('is', 'Icelandic'),
                   ('it', 'Italian'),
                   ('iu', 'Inuktitut'),
                   ('ja', 'Japanese'),
                   ('jv', 'Javanese'),
                   ('kl', 'Kalaallisut'),
                   ('kn', 'Kannada'),
                   ('kr', 'Kanuri'),
                   ('ks', 'Kashmiri'),
                   ('kk', 'Kazakh'),
                   ('km', 'Khmer'),
                   ('ki', 'Kikuyu, Gikuyu'),
                   ('rw', 'Kinyarwanda'),
                   ('ky', 'Kirghiz, Kyrgyz'),
                   ('kv', 'Komi'),
                   ('kg', 'Kongo'),
                   ('ko', 'Korean'),
                   ('ku', 'Kurdish'),
                   ('kj', 'Kwanyama, Kuanyama'),
                   ('la', 'Latin'),
                   ('lb', 'Luxembourgish'),
                   ('lg', 'Luganda'),
                   ('li', 'Limburgish'),
                   ('ln', 'Lingala'),
                   ('lo', 'Lao'),
                   ('lt', 'Lithuanian'),
                   ('lu', 'Luba-Katanga'),
                   ('lv', 'Latvian'),
                   ('gv', 'Manx'),
                   ('mk', 'Macedonian'),
                   ('mg', 'Malagasy'),
                   ('ms', 'Malay'),
                   ('ml', 'Malayalam'),
                   ('mt', 'Maltese'),
                   ('mi', 'Māori'),
                   ('mr', 'Marathi (Marāṭhī)'),
                   ('mh', 'Marshallese'),
                   ('mn', 'Mongolian'),
                   ('na', 'Nauru'),
                   ('nv', 'Navajo, Navaho'),
                   ('nb', 'Norwegian Bokmål'),
                   ('nd', 'North Ndebele'),
                   ('ne', 'Nepali'),
                   ('ng', 'Ndonga'),
                   ('nn', 'Norwegian Nynorsk'),
                   ('no', 'Norwegian'),
                   ('ii', 'Nuosu'),
                   ('nr', 'South Ndebele'),
                   ('oc', 'Occitan'),
                   ('oj', 'Ojibwe, Ojibwa'),
                   ('cu', 'Old Church Slavonic'),
                   ('om', 'Oromo'),
                   ('or', 'Oriya'),
                   ('os', 'Ossetian, Ossetic'),
                   ('pa', 'Panjabi, Punjabi'),
                   ('pi', 'Pāli'),
                   ('fa', 'Persian'),
                   ('pl', 'Polish'),
                   ('ps', 'Pashto, Pushto'),
                   ('pt', 'Portuguese'),
                   ('qu', 'Quechua'),
                   ('rm', 'Romansh'),
                   ('rn', 'Kirundi'),
                   ('ro', 'Romanian, Moldavan'),
                   ('ru', 'Russian'),
                   ('sa', 'Sanskrit (Saṁskṛta)'),
                   ('sc', 'Sardinian'),
                   ('sd', 'Sindhi'),
                   ('se', 'Northern Sami'),
                   ('sm', 'Samoan'),
                   ('sg', 'Sango'),
                   ('sr', 'Serbian'),
                   ('gd', 'Scottish Gaelic'),
                   ('sn', 'Shona'),
                   ('si', 'Sinhala, Sinhalese'),
                   ('sk', 'Slovak'),
                   ('sl', 'Slovene'),
                   ('so', 'Somali'),
                   ('st', 'Southern Sotho'),
                   ('es', 'Spanish; Castilian'),
                   ('su', 'Sundanese'),
                   ('sw', 'Swahili'),
                   ('ss', 'Swati'),
                   ('sv', 'Swedish'),
                   ('ta', 'Tamil'),
                   ('te', 'Telugu'),
                   ('tg', 'Tajik'),
                   ('th', 'Thai'),
                   ('ti', 'Tigrinya'),
                   ('bo', 'Tibetan'),
                   ('tk', 'Turkmen'),
                   ('tl', 'Tagalog'),
                   ('tn', 'Tswana'),
                   ('to', 'Tonga'),
                   ('tr', 'Turkish'),
                   ('ts', 'Tsonga'),
                   ('tt', 'Tatar'),
                   ('tw', 'Twi'),
                   ('ty', 'Tahitian'),
                   ('ug', 'Uighur, Uyghur'),
                   ('uk', 'Ukrainian'),
                   ('ur', 'Urdu'),
                   ('uz', 'Uzbek'),
                   ('ve', 'Venda'),
                   ('vi', 'Vietnamese'),
                   ('vo', 'Volapük'),
                   ('wa', 'Walloon'),
                   ('cy', 'Welsh'),
                   ('wo', 'Wolof'),
                   ('fy', 'Western Frisian'),
                   ('xh', 'Xhosa'),
                   ('yi', 'Yiddish'),
                   ('yo', 'Yoruba'),
                   ('za', 'Zhuang, Chuang'),
                   ('zu', 'Zulu'),]
LANGUAGES = dict(iso_639_choices)

# 2. Load data

In [79]:
tweets_by_hour_df = pd.read_csv("tweets_by_hour.csv")
mode_hour_df = pd.read_csv("mode_hour.csv")
tweets_versus_retweets = pd.read_csv('tweets_versus_retweets.csv')
likes_per_day = pd.read_csv('likes_per_day.csv').sort_values(by=['date'], ascending=True)
retweets_per_day = pd.read_csv('retweets_per_day.csv').sort_values(by=['date'], ascending=True)
languages = pd.read_csv('tweets_languages.csv')
tweets_by_datetime = pd.read_csv('tweets by datetime.csv')
value_freq_df = pd.read_csv('value_freq_df.csv')
targeting_freq_df = pd.read_csv('targeting_freq_df.csv')
emojis_tweets_df = pd.read_csv('emojis_tweets.csv')
tweet_counts = pd.read_csv('tweet_counts.csv')
corr_matrix = pd.read_csv('correlation matrix.csv')

In [80]:
mode_hour_df.sort_values(by=['date'], inplace=True, ascending=True)
tweets_versus_retweets.sort_values(by=['date'], inplace=True, ascending=True)

In [81]:
likes_per_day['likes per day'] = likes_per_day['likes per day'].astype(int)
likes_per_day['date'] = pd.to_datetime(likes_per_day['date']).dt.date

In [82]:
retweets_per_day['retweets per day'] = retweets_per_day['retweets per day'].astype(int)
retweets_per_day['date'] = pd.to_datetime(retweets_per_day['date']).dt.date

In [83]:
languages['counts'] = languages['counts'].astype(int)
languages['language'] = languages['language'].map(LANGUAGES)

In [84]:
tweets_by_datetime['counts'] = tweets_by_datetime['counts'].astype(int)

In [85]:
value_freq_df['counts'] = value_freq_df['counts'].astype(int)
value_freq_df = value_freq_df[value_freq_df['counts'] >= 150]


In [86]:
targeting_freq_df['counts'] = targeting_freq_df['counts'].astype(int)

In [87]:
emojis_tweets_df.columns = ['emoji', 'counts']
emojis_tweets_df['counts'] = emojis_tweets_df['counts'].astype(int)

In [88]:
emojis_tweets_df = emojis_tweets_df[emojis_tweets_df['counts'] >= 50] 

In [89]:
tweet_counts['Count'] = tweet_counts['Count'].astype(int)

# 3. Plotting

## 3.1. Tweets By Time

In [108]:
# Assuming tweets_by_hour_df is a pandas DataFrame
fig = px.bar(tweets_by_hour_df, x='hour', y='counts',
             labels={'counts': 'Count', 'hour': 'Hours'},
             title='Tweets By Hour',
             template='plotly_white')

# Update x-axis ticks
fig.update_xaxes(tickvals=tweets_by_hour_df['hour'])

# Show the plot
fig.show()

In [92]:



# Calculate mean and mode
mean_active_hour = mode_hour_df['most active hour'].mean()
mode_active_hour = stats.mode(mode_hour_df['most active hour'], keepdims=True)[0][0]

# Create a scatter plot with jitter and light blue markers
scatter = go.Scatter(x=mode_hour_df['date'], y=mode_hour_df['most active hour'] + np.random.uniform(-0.3, 0.3, len(mode_hour_df)),  # Added jitter
                     mode='markers', name='Most Active Hour',
                     marker=dict(color='limegreen'))  # Changed marker color to light blue

# Create a line plot with green color
line = go.Scatter(x=mode_hour_df['date'], y=mode_hour_df['most active hour'], mode='lines', name='Most Active Hour Line of Best Fit', line=dict(color='navy'))

# Create horizontal lines as scatter traces for mean and mode
mean_line = go.Scatter(x=mode_hour_df['date'], y=[mean_active_hour] * len(mode_hour_df), mode='lines', name='Mean Active Hour', line=dict(color='red', width=2))
mode_line = go.Scatter(x=mode_hour_df['date'], y=[mode_active_hour] * len(mode_hour_df), mode='lines', name='Mode Active Hour', line=dict(color='orange', width=2))

# Create a layout with increased plot size
layout = go.Layout(title='Mode Hour', xaxis=dict(title='Date'), yaxis=dict(title='Most Active Hour'),
                   autosize=False, width=1000, height=500,  # Increased width and height
                   legend=dict(orientation='h', x=0, y=1.1) # Adjust legend position
                   )

# Create a figure and add the scatter, line, and horizontal line plots
fig = go.Figure(data=[scatter, line, mean_line, mode_line], layout=layout)

# Change grid color
fig.update_layout(plot_bgcolor='rgba(240, 240, 240, 0.95)')  # light grey background

# Change color of scatter plot markers
scatter.marker.color = 'darkblue'

# Change color of line plot
line.line.color = 'darkgreen'

# Change color of mean and mode lines
mean_line.line.color = 'darkred'
mode_line.line.color = 'darkorange'

fig.show()


In [94]:
tweets_by_datetime['datetime'] = pd.to_datetime(tweets_by_datetime['date'] + ' ' + tweets_by_datetime['hour'].astype(str) + ':00:00')

# Create the scatter plot with Plotly Express
fig = px.scatter(tweets_by_datetime, x='datetime', y='counts', size='counts', size_max=30,
                 labels={'counts': 'Count'},
                 title='Tweets by Date and Hour')

# Customize layout
fig.update_layout(
    xaxis_title='Date and Hour',
    yaxis_title='Count',
)


# Show the plot
fig.show()


## 3.2. Tweets Versus Retweets

In [95]:
tweets_versus_retweets.drop(columns=['Unnamed: 0'], inplace=True)

In [96]:
tweets_versus_retweets['date'] = pd.to_datetime(tweets_versus_retweets['date']).dt.date
tweets_versus_retweets['tweets'] = tweets_versus_retweets['tweets'].astype(int)
tweets_versus_retweets['retweets'] = tweets_versus_retweets['retweets'].astype(int)

In [106]:


# Assuming 'tweets_versus_retweets' is your DataFrame

# Calculate average number of tweets and retweets
avg_tweets = tweets_versus_retweets['tweets'].mean()
avg_retweets = tweets_versus_retweets['retweets'].mean()

# Create traces for tweets and retweets
trace_tweets = go.Scatter(
    x=tweets_versus_retweets['date'],
    y=tweets_versus_retweets['tweets'],
    mode='lines+markers',
    line=dict(color='navy', width=2),
    name='Tweets'
)

trace_retweets = go.Scatter(
    x=tweets_versus_retweets['date'],
    y=tweets_versus_retweets['retweets'],
    mode='lines+markers',
    line=dict(color='red', width=2),
    name='Retweets'
)

# Create a vertical line for the significant date
gaza_war_2023 = pd.to_datetime('2023-10-07').date()
gaza_2023_trace = go.Scatter(
    x=[gaza_war_2023, gaza_war_2023],
    y=[0, max(tweets_versus_retweets['tweets'].max(), tweets_versus_retweets['retweets'].max())],
    mode='lines',
    line=dict(color='green', width=2, dash='dash'),
    name='Start of Israeli Aggression on Gaza'
)

# Create a vertical line for the significant date
start_movement = pd.to_datetime('2020-07-01').date()
start_feminist_trace = go.Scatter(
    x=[start_movement, start_movement],
    y=[0, max(tweets_versus_retweets['tweets'].max(), tweets_versus_retweets['retweets'].max())],
    mode='lines',
    line=dict(color='yellowgreen', width=2, dash='dash'),
    name='Start of the 2020 Egyptian MeToo movement online'
)

second_deval = pd.to_datetime('2022-10-21').date()
second_deval_trace = go.Scatter(
    x=[second_deval, second_deval],
    y=[0, max(tweets_versus_retweets['tweets'].max(), tweets_versus_retweets['retweets'].max())],
    mode='lines',
    line=dict(color='Salmon', width=2, dash='dash'),
    name='Second Devaluation of the Egyptian Pound'
)

# Create traces for average number of tweets and retweets
avg_tweets_trace = go.Scatter(
    x=tweets_versus_retweets['date'],
    y=[avg_tweets] * len(tweets_versus_retweets),
    mode='lines',
    line=dict(color='orange', width=2, dash='dash'),
    name='Average Tweets'
)

avg_retweets_trace = go.Scatter(
    x=tweets_versus_retweets['date'],
    y=[avg_retweets] * len(tweets_versus_retweets),
    mode='lines',
    line=dict(color='purple', width=2, dash='dash'),
    name='Average Retweets'
)

# Create layout
layout = go.Layout(
    title='Tweets Versus Retweets',
    xaxis=dict(title='Date', tickangle=90),
    yaxis=dict(title='Count'),
    showlegend=True,
    legend=dict(x=1, y=1)
)

# Create figure
fig = go.Figure(data=[trace_tweets, trace_retweets, gaza_2023_trace, start_feminist_trace, second_deval_trace, avg_tweets_trace, avg_retweets_trace], layout=layout)

# Show figure
fig.show()


## 3.3. Interactions Per Day

In [98]:


# Assuming 'likes_per_day' and 'retweets_per_day' are your DataFrames

# Calculate mean values for likes and retweets
mean_likes = likes_per_day['likes per day'].mean()
mean_retweets = retweets_per_day['retweets per day'].mean()

# Create traces for likes per day and retweets per day
trace_likes = go.Scatter(
    x=likes_per_day['date'],
    y=likes_per_day['likes per day'],
    mode='lines+markers',
    line=dict(color='blue', width=2),
    marker=dict(color='blue'),
    name='Likes per Day'
)

trace_retweets = go.Scatter(
    x=retweets_per_day['date'],
    y=retweets_per_day['retweets per day'],
    mode='lines+markers',
    line=dict(color='red', width=2),
    marker=dict(color='red'),
    name='Retweets per Day'
)

# Create traces for mean values
mean_likes_trace = go.Scatter(
    x=likes_per_day['date'],
    y=[mean_likes] * len(likes_per_day),
    mode='lines',
    line=dict(color='orange', width=2, dash='dash'),
    name='Mean Likes'
)

mean_retweets_trace = go.Scatter(
    x=retweets_per_day['date'],
    y=[mean_retweets] * len(retweets_per_day),
    mode='lines',
    line=dict(color='purple', width=2, dash='dash'),
    name='Mean Retweets'
)

# Create layout
layout_combined = go.Layout(
    title='Likes and Retweets Per Day with Mean Values',
    xaxis=dict(title='Date', tickangle=90),
    yaxis=dict(title='Count'),
    showlegend=True,
    legend=dict(x=1, y=1)
)

# Create figure
fig_combined = go.Figure(data=[trace_likes, trace_retweets, mean_likes_trace, mean_retweets_trace], layout=layout_combined)

# Show figure
fig_combined.show()


In [99]:

# Assuming 'likes' and 'retweets' are the column names in your correlation matrix DataFrame
# corr_matrix is your DataFrame

# Create a heatmap with labels
fig = px.imshow(corr_matrix, x=['likes', 'retweets'], y=['likes', 'retweets'],
                color_continuous_scale='Blues', labels={'color': 'Correlation'})

# Show the plot
fig.show()


## 3.4 Languages

In [100]:
import plotly.express as px
import pandas as pd

# Create the bar plot with Plotly Express
fig = px.bar(languages, x='language', y='counts',
             labels={'counts': 'Count'},
             color_continuous_scale='Blues')

# Customize layout
fig.update_layout(
    title='Tweets by Language',
    xaxis_title='Language',
    yaxis_title='Count',
    xaxis=dict(tickangle=45),
    coloraxis_colorbar=dict(title='Count'),
)

fig.update_xaxes(tickangle=270)
# Show the plot
fig.show()


## 3.5 Advertisement Frequency

In [101]:
# Create a bar graph
fig = px.bar(value_freq_df, x='targeting value', y='counts', title='Advertisement Taregeting Distribution Using Values Used over 150 Times',
             labels={'targeting value': 'Targeting Value', 'counts': 'Number of Ads'})

# Show the plot

fig.update_xaxes(tickangle=270)
fig.show()

In [102]:
# Create a bar graph
fig = px.bar(targeting_freq_df, x='targeting type', y='counts', title='Advertisement Taregeting Distribution Using Target Type',
             labels={'targeting type': 'Targeting Type', 'counts': 'Number of Ads'})

fig.update_xaxes(tickangle=270)

fig.show()

## 3.6 Emojis

In [103]:
# Create a bar graph
fig = px.bar(emojis_tweets_df, x='emoji', y='counts', title='Emoji Usage Distribution Among Emojis Used Over 50 Times',
             labels={'emoji': 'Emoji', 'counts': 'Number of Times Used'})

fig.update_xaxes(tickangle=0)

fig.show()

## 3.7 Tweet Distribution

In [104]:
# Create a bar graph
fig = px.bar(tweet_counts, x='Type', y='Count', title='Tweet Type Distibution',
             labels={'Type': 'Tweet Type', 'Count': 'Count'})

fig.update_xaxes(tickangle=0)

fig.show()