In [None]:
# %run ./modules/load_data_module.ipynb
# %run ./modules/extract_info_clean_data_module.ipynb
# %run ./modules/update_dataframe_module.ipynb
# %run ./modules/sentiment_module.ipynb
# %run ./modules/semantic_similarity_module.ipynb

In [None]:
import pandas as pd
import spacy
import string
import regex as re
import nltk
import ast
import copy
import glob
import advertools as adv
import plotly.graph_objects as go
import time
from datetime import datetime



pd.options.display.max_colwidth = 285



from textblob import TextBlob

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


from spacymoji import Emoji
from nltk.corpus import stopwords
from urllib.parse import urlparse
from textblob import Word
from sklearn.metrics.pairwise import cosine_similarity



nlp = spacy.load("en_core_web_sm")
emoji = Emoji(nlp)
nlp.add_pipe("emoji", first=True)


stemmer = nltk.SnowballStemmer("english")


# nltk.download('wordnet')
# nltk.download('omw-1.4')


# GEN-DATA 

### Add list of hashtags found in tweet/reply/hashtag

In [None]:
sample_tweets['hashtags'] = add_hashtags(sample_tweets)

In [None]:
sample_replies['hashtags'] = add_hashtags(sample_replies)

In [None]:
sample_quotes['hashtags'] = a_hashtags(sample_quotes)

In [None]:
tweets['hashtags'] = add_hashtags(tweets)

In [None]:
replies['hashtags'] = add_hashtags(replies)

In [None]:
quotes['hashtags'] = add_hashtags(quotes)

### Add count of hashtags in tweet/reply/quote

In [None]:
sample_tweets['hashtag_count'] = add_nbr_hashtags(sample_tweets)

In [None]:
sample_replies['hashtag_count'] = add_nbr_hashtags(sample_replies)

In [None]:
sample_quotes['hashtag_count'] = add_nbr_hashtags(sample_quotes)

In [None]:
tweets['hashtag_count'] = add_nbr_hashtags(tweets)

In [None]:
replies['hashtag_count'] = add_nbr_hashtags(replies)

In [None]:
quotes['hashtag_count'] = add_nbr_hashtags(quotes)

### Add number of uppercase characters in tweet/reply/quote

In [None]:
sample_tweets['uppercase_count'] = add_uppercase_count(sample_tweets)

In [None]:
sample_replies['uppercase_count'] = add_uppercase_count(sample_replies)

In [None]:
sample_quotes['uppercase_count'] = add_uppercase_count(sample_quotes)

In [None]:
tweets['uppercase_count'] = add_uppercase_count(tweets)

In [None]:
replies['uppercase_count'] = add_uppercase_count(replies)

In [None]:
quotes['uppercase_count'] = add_uppercase_count(quotes)

### Add percentage of uppercase characters in tweet/reply/quote

In [None]:
sample_tweets['uppercase_pct'] = add_upper_case_pct(sample_tweets)

In [None]:
sample_replies['uppercase_pct'] = add_upper_case_pct(sample_replies)

In [None]:
sample_quotes['uppercase_pct'] = add_upper_case_pct(sample_quotes)

In [None]:
tweets['uppercase_pct'] = add_upper_case_pct(tweets)

In [None]:
replies['uppercase_pct'] = add_upper_case_pct(replies)

In [None]:
quotes['uppercase_pct'] = add_upper_case_pct(quotes)

### Add number of exclamation mark in tweet/reply/quote

In [None]:
sample_tweets['exclamation_mark_count'] = add_exclamation_mark_count(sample_tweets)

In [None]:
sample_replies['exclamation_mark_count'] = add_exclamation_mark_count(sample_replies)

In [None]:
sample_quotes['exclamation_mark_count'] = add_exclamation_mark_count(sample_quotes)

In [None]:
tweets['exclamation_mark_count'] = add_exclamation_mark_count(tweets)

In [None]:
replies['exclamation_mark_count'] = add_exclamation_mark_count(replies)

In [None]:
quotes['exclamation_mark_count'] = add_exclamation_mark_count(quotes)

### Add number of question marks found in tweet/reply/quote

In [None]:
sample_tweets['question_mark_count'] =  add_question_mark_count(sample_tweets)

In [None]:
sample_replies['question_mark_count'] =  add_question_mark_count(sample_replies)

In [None]:
sample_quotes['question_mark_count'] =  add_question_mark_count(sample_quotes)

In [None]:
tweets['question_mark_count'] =  add_question_mark_count(tweets)

In [None]:
replies['question_mark_count'] =  add_question_mark_count(replies)

In [None]:
quotes['question_mark_count'] =  add_question_mark_count(quotes)

### Add number of URLs found in tweet/reply/quote

In [None]:
sample_tweets['url_count'] = add_url_count(sample_tweets)

In [None]:
sample_replies['url_count'] = add_url_count(sample_replies)

In [None]:
sample_quotes['url_count'] = add_url_count(sample_quotes)

In [None]:
tweets['url_count'] = add_url_count(tweets)

In [None]:
replies['url_count'] = add_url_count(replies)

In [None]:
quotes['url_count'] = add_url_count(quotes)

### Add number of mentions in tweet/reply/quote

In [None]:
sample_tweets['mention_count'] = add_mention_count(sample_tweets)

In [None]:
sample_replies['mention_count'] = add_mention_count(sample_replies)

In [None]:
sample_quotes['mention_count'] = add_mention_count(sample_quotes)

In [None]:
tweets['mention_count'] = add_mention_count(tweets)

In [None]:
replies['mention_count'] = add_mention_count(replies)

In [None]:
quotes['mention_count'] = add_mention_count(quotes)

### Add number of emojis found in tweet/reply/quote

In [None]:
sample_tweets['emojie_count'] = add_emoji_count(sample_tweets)

In [None]:
sample_replies['emojie_count'] = add_emoji_count(sample_replies)

In [None]:
sample_quotes['emojie_count'] = add_emoji_count(sample_quotes)

In [None]:
tweets['emojie_count'] = add_emoji_count(tweets)

In [None]:
replies['emojie_count'] = add_emoji_count(replies)

In [None]:
quotes['emojie_count'] = add_emoji_count(quotes)

### Add tweet's author number of followers

In [None]:
sample_tweets['followers_count'] = add_followers_count(sample_tweets, users)

In [None]:
tweets['followers_count'] = add_followers_count(tweets, users)

### Add engagement score

In [None]:
sample_tweets['engagement_score'] = add_engagement_score(sample_tweets)

In [None]:
tweets['engagement_score'] = add_engagement_score(tweets)

## sentiment

### Add sentiment score to tweet/reply/quote

In [None]:
sample_tweets['sentiment_score'] = add_sentiment_score(sample_tweets)

In [None]:
sample_replies['sentiment_score'] = add_sentiment_score(sample_replies)

In [None]:
sample_quotes['sentiment_score'] = add_sentiment_score(sample_quotes)

In [None]:
tweets['sentiment_score'] = add_sentiment_score(tweets)

In [None]:
replies['sentiment_score'] = add_sentiment_score(replies)

In [None]:
quotes['sentiment_score'] = add_sentiment_score(quotes)

### Add sentiment cateory (pos, neg, neu) to tweet/reply/quote

In [None]:
sample_tweets['sentiment_category'] = add_sentiment_category(sample_tweets)

In [None]:
sample_replies['sentiment_category'] = add_sentiment_category(sample_replies)

In [None]:
sample_quotes['sentiment_category'] = add_sentiment_category(sample_quotes)

In [None]:
tweets['sentiment_category'] = add_sentiment_category(tweets)

In [None]:
replies['sentiment_category'] = add_sentiment_category(replies)

In [None]:
quotes['sentiment_category'] = add_sentiment_category(quotes)

### Add average sentiment of replies for each tweet

In [None]:
sample_tweets['avg_replies_sentiment'] = add_average_conversation_sentiment(sample_tweets, sample_replies)

In [None]:
sample_tweets['avg_quotes_sentiment'] = add_average_conversation_sentiment(sample_tweets, sample_quotes)

In [None]:
tweets['avg_replies_sentiment'] = add_average_conversation_sentiment(tweets, replies)

In [None]:
tweets['avg_quotes_sentiment'] = add_average_conversation_sentiment(tweets, quotes)

### Add average sentiment of replies + quotes of each tweet

In [None]:
sample_tweets['avg_conversation_sentiment'] = add_average_conversation_sentiment(sample_tweets, pd.concat([sample_replies, sample_quotes]))

In [None]:
tweets['avg_conversation_sentiment'] = add_average_conversation_sentiment(tweets, pd.concat([replies, quotes]))

### Add variance of replies/quotes/replies+quotes sentiment

In [None]:
sample_tweets['var_replies_sentiment'] = add_variance_of_conversation_sentiment(sample_tweets, sample_replies)

In [None]:
sample_tweets['var_quotes_sentiment'] = add_variance_of_conversation_sentiment(sample_tweets, sample_quotes)

In [None]:
sample_tweets['var_conversation_sentiment'] = add_variance_of_conversation_sentiment(sample_tweets, pd.concat([sample_replies, sample_quotes]))

In [None]:
tweets['var_replies_sentiment'] = add_variance_of_conversation_sentiment(tweets, replies)

In [None]:
tweets['var_quotes_sentiment'] = add_variance_of_conversation_sentiment(tweets, quotes)

In [None]:
tweets['var_conversation_sentiment'] = add_variance_of_conversation_sentiment(tweets, pd.concat([replies, quotes]))

### Add percentage of replies/quotes with similar sentiment to the reference tweet

In [None]:
sample_tweets['pct_similar_sentiment_replies'] = add_pct_sentiment_category(sample_tweets, sample_replies)

In [None]:
sample_tweets['pct_similar_sentiment_quotes'] = add_pct_sentiment_category(sample_tweets, sample_quotes)

In [None]:
tweets['pct_similar_sentiment_replies'] = add_pct_sentiment_category(tweets, replies)

In [None]:
tweets['pct_similar_sentiment_quotes'] = add_pct_sentiment_category(tweets, quotes)

### text cleaning

In [None]:
sample_tweets['clean_text'] = add_clean_text(sample_tweets)

In [None]:
sample_replies['clean_text'] = add_clean_text(sample_replies)

In [None]:
sample_quotes['clean_text'] =  add_clean_text(sample_quotes)

In [None]:
tweets['clean_text'] = add_clean_text(tweets)

In [None]:
replies['clean_text'] = add_clean_text(replies)

In [None]:
quotes['clean_text'] =  add_clean_text(quotes)

### Add tweet to conversation Notice, add clean text to conversation dataframe first!

In [None]:
sample_replies['clean_tweet'] = add_tweet_to_conversation(sample_tweets, sample_replies)

In [None]:
sample_quotes['clean_tweet'] = add_tweet_to_conversation(sample_tweets, sample_quotes)

In [None]:
replies['clean_tweet'] = add_tweet_to_conversation(tweets, replies)

In [None]:
quotes['clean_tweet'] = add_tweet_to_conversation(tweets, quotes)

### Add sentiment similarity to conversations

In [None]:
sample_replies['semantic_similarity'] = add_tweet_conv_semantic_similarity(sample_replies['clean_tweet'], sample_replies['clean_text'])

In [None]:
sample_quotes['semantic_similarity'] = add_tweet_conv_semantic_similarity(sample_quotes['clean_tweet'], sample_quotes['clean_text'])

In [None]:
# Do not use this anymore!!!
# replies['semantic_similarity'] = add_tweet_conv_semantic_similarity(replies['clean_tweet'], replies['clean_text'])

In [None]:
# Do not use this anymore!!!
# quotes['semantic_similarity'] = add_tweet_conv_semantic_similarity(quotes['clean_tweet'], quotes['clean_text'])

### Add variance of semantic similarity scores 

In [None]:
sample_tweets['var_replies_semantic'] = add_variance_of_conversation_sentiment(sample_tweets, sample_replies)

In [None]:
sample_tweets['var_quotes_semantic'] = add_variance_of_conversation_sentiment(sample_tweets, sample_quotes)

In [None]:
sample_tweets['var_conversation_semantic'] = add_variance_of_conversation_sentiment(sample_tweets, pd.concat([sample_quotes, sample_replies]))

In [None]:
tweets['var_replies_semantic'] = add_variance_of_conversation_sentiment(tweets, replies)

In [None]:
tweets['var_quotes_semantic'] = add_variance_of_conversation_sentiment(tweets, quotes)

In [None]:
tweets['var_conversation_semantic'] = add_variance_of_conversation_sentiment(tweets, pd.concat([quotes, replies]))

In [None]:
tweets.to_feather('tweets_final_version.feather')

In [None]:
tweets_list = split_dataframe(tweets)