In [None]:
import json
import pickle
import numpy as np
import gzip
import math
import csv
import datetime
import copy
import dateutil.parser
import random
%matplotlib inline
import matplotlib.pyplot as plt
import pylab as pl
import seaborn as sns
from scipy.stats import pearsonr, spearmanr, ttest_ind, mannwhitneyu, kruskal, ks_2samp
from statsmodels.distributions.empirical_distribution import ECDF
import location
from utils import Utils
from tweet import Tweet
from video import Video

## [abo | gun | blm]
campaign = 'abo'

bin_size = 7
connection_type = 'followers'
year = 2018

util = Utils(campaign, bin_size, connection_type, year)

tweet = Tweet(util)
video = Video(util)


## Create available early adopters, their tweets, locations, political leanings.

In [None]:
ea_tweets = tweet.getAvailableTweets(0.2)
print('#tweets by available early adopters: {}'.format(len(ea_tweets.keys())))

ea_tweets_counts = tweet.getTweetVolumeDistribution(ea_tweets)
util.plotLineChart(ea_tweets_counts, "# of tweets", "Weeks")

# Sort tweet counts by weekly
# ea_tweets_counts_sorted_ind, dates_sorted = util.sortVolumeByDate(ea_tweets_counts)


In [None]:
## Methods for tweet-video-user analysis

# Analysis of videos by tweet volume within a specific date
# tweet.sortVideosByTweetVolumeForSpecificInterval(ea_tweets, '2018-04-15', '2018-04-22')

# Print tweets that refer to specific video within a specific week
# tweet.printTweetsByVideoId(ea_tweets, '2017-01-01', '2018-05-01', 'Rw9E_nGVXxA')

# Analyze tweets of most-tweeted user.
# tweet.analyzeTweetsOfMostTweetedUsers(ea_tweets)

In [None]:
## Find early adopters' locations

# tweet.assignUserLocations(ea_tweets)

In [None]:
## Find political leanings of seed early adopters.

# First find occurrence of hashtags (potential ones for expansion) with seed political hashtags. 
# hashtags, ht_occur_vec, cooccur_mat = tweet.findHashtagCooccurrences()

# Then, manually analyze these hashtags, and decide which hashtags to select for political leaning expansion.
# Create 'left_political_hashtags_extended.txt' and 'right_political_hashtags_extended.txt' manually.

# Based on extended political hashtags, assign political labels to seed early adopters.
# leaning_labels = tweet.assignUserLeaningLabels()


## Analyze how many users, their tweets (with types) and followers are there for early adopters.

In [None]:
# tweet.summarizeUsersTweetsInfo(ea_tweets)
# tweet.summarizeFollowers()

### Analyze Communities

In [None]:
## Community summarization
# tweet.summarizeCommunities()

## (Optional) check political leanings for each community.
# predefined_community_leanings, inferred_community_leanings = tweet.checkLeaningsInCommunities()

### Analyze tweet volume per video given a time interval

In [None]:
# video_ids = tweet.sortVideosByTweetVolumeForSpecificInterval(ea_tweets, '2017-01-01', '2018-05-01')
# video_ids = [video_id[0] for video_id in video_ids]
# print(len(video_ids))


## NW Structural, Engagement, Temporal, Language and Cascade Operations.

In [None]:
## Create sub_followings_dict once to make the cascade computation cost efficient.
# tweet.createSubFollowingsDictForCascade()

### NW Structural Measures

In [None]:
## Calculate structural measures for each video in filtered video set.
# structural_measures = tweet.getAllNetworkStructureMeasures(ea_tweets)

## Anaylze structural measures for each video in filtered video set.
# measure_type = 'nw_size'
# video_leanings_probs = tweet.assignVideoLeaningLabels(ea_tweets)
# tweet.analyzeStructuralMeasures(measure_type, video_leanings_probs)

### Engagement Measures

In [None]:
## Calculate engagement measures for each video in filtered video set.
# engagement_measures = tweet.getAllEngagementMeasures(ea_tweets)

## Anaylze engagement measures for each video in filtered video set.
# measure_type = 'num_replies'
# video_leanings_probs = tweet.assignVideoLeaningLabels(ea_tweets)
# tweet.analyzeEngagementMeasures(measure_type, video_leanings_probs)

### Temporal Measures

In [None]:
## Calculate temporal measures for each video in filtered video set.
# temporal_measures = tweet.getAllTemporalMeasures(ea_tweets)

## Anaylze temporal measures for each video in filtered video set.
# measure_type = 'nw_temporal_diff_between_max_indegree_user'
# video_leanings_probs = tweet.assignVideoLeaningLabels(ea_tweets)
# tweet.analyzeTemporalMeasures(measure_type, video_leanings_probs)

## This is an analysis for a given video.
#nw_temporal_diff_wrt_first_tweet_mean, nw_temporal_diff_wrt_first_tweet_median, nw_temporal_diff_between_pairs_mean, nw_diff_speed_mnw_temporal_diff_between_pairs_median, nw_life_time, nw_temporal_diff_between_first_tweets_of_source_users_mean, nw_temporal_diff_between_first_tweets_of_source_users_median, nw_temporal_diff_between_max_indegree_user = tweet.getTemporalMeasuresByVideoId(ea_tweets, 'Iqc4BrAzDio')
#print(nw_temporal_diff_wrt_first_tweet_mean, nw_temporal_diff_wrt_first_tweet_median, nw_temporal_diff_between_pairs_mean, nw_diff_speed_mnw_temporal_diff_between_pairs_median, nw_life_time, nw_temporal_diff_between_first_tweets_of_source_users_mean, nw_temporal_diff_between_first_tweets_of_source_users_median, nw_temporal_diff_between_max_indegree_user)
#'lpskBk0fp4o'
#'Iqc4BrAzDio'

### Language Measures

In [None]:
## Calculate language measures for each video in filtered video set.
# language_liwc_measures, language_empath_measures = tweet.getAllLanguageMeasures(ea_tweets)

## Anaylze language measures for each video in filtered video set.
## dict_type: [liwc | empath]
# dict_type = 'liwc'
# measure_type = 'p2'
# video_leanings_probs = tweet.assignVideoLeaningLabels(ea_tweets)
# tweet.analyzeLanguageMeasures(dict_type, measure_type, video_leanings_probs)

In [None]:
## Anaylze language measures for each video in filtered video set.
# leaning = 'right'
# dict_type = 'liwc'
# measure_type = 'negate'
# video_leanings_probs = tweet.assignVideoLeaningLabels(ea_tweets)
# tweet.analyzeTweetsByLanguageCategory(dict_type, measure_type, video_leanings_probs, ea_tweets, leaning)

### Cascade Measures

In [None]:
## Calculate cascade for each video in filtered video set.
# cascades = tweet.getAllTweetCascades(ea_tweets)

## Anaylze cascades
# video_leanings_probs = tweet.assignVideoLeaningLabels(ea_tweets)
# tweet.analyzeCascadeMeasures(video_leanings_probs)

## YouTube Operations

### YT Reaction Measures

In [None]:
## Analyze YT Reaction measures.
# video_leanings_probs = tweet.assignVideoLeaningLabels(ea_tweets)
# video_measures = tweet.analyzeDivisiveContentsByScatterPlots(video_leanings_probs)

### YT Raw Measures comparison

In [None]:
## Analyze YT raw measures.
# measure_type = 'duration'
# video_leanings_probs = tweet.assignVideoLeaningLabels(ea_tweets)
# video_measures = tweet.analyzeVideosByProperty(video_leanings_probs, measure_type)

### YT Raw Measures (Normalized) comparison

In [None]:
## Analyze YT raw (normalized) measures.
# measure_type = 'dislikeCount'
# video_leanings_probs = tweet.assignVideoLeaningLabels(ea_tweets)
# video_measures = tweet.analyzeVideosByNormalizedProperty(video_leanings_probs, measure_type)

## Some other helper functions from Utils to create annotation files for Videos

In [None]:
# util.createCandidateVideosFile()
# util.createVideosFileForManualAnnotations()
# util.createVideosFileForManualAnnotationsUsingSubtitles()