In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter, defaultdict

In [2]:
tweets_df = pd.read_csv("tweets_till_0901.csv")

In [3]:
tweets_df.columns

Index(['id', 'conversation_id', 'created_at', 'date', 'time', 'timezone',
       'user_id', 'username', 'name', 'place', 'tweet', 'mentions', 'urls',
       'photos', 'replies_count', 'retweets_count', 'likes_count', 'hashtags',
       'cashtags', 'link', 'retweet', 'quote_url', 'video', 'near', 'geo',
       'source', 'user_rt_id', 'user_rt', 'retweet_id', 'reply_to',
       'retweet_date', 'translate', 'trans_src', 'trans_dest'],
      dtype='object')

In [4]:
all_tweets = tweets_df[:190000]
caa_tweets = all_tweets.tweet
hashtags = all_tweets.hashtags
date = all_tweets.date
retweets_count = all_tweets.retweets_count
likes_count = all_tweets.likes_count
useful_tweets = list(zip(caa_tweets,date, hashtags, retweets_count, likes_count))

#### Function to separate hindi and others

In [5]:
def separate_english_hindi_tweets(tweets):
    etweets = []
    htweets= []
    
    for t, tweet_tup in enumerate(tweets):
        e_flag = 1
        tweet = tweet_tup[0]
        for c in tweet:
            if c == "\n":
                continue
            if ord(c) > 31 and ord(c) < 127:
                continue
            elif ord(c) > 2300 and ord(c) < 2400: # hindi
                e_flag = 0
                htweets.append(tweet_tup)
                break
            else:
                continue
        if e_flag == 1:
            etweets.append(tweet_tup)
            
    return etweets, htweets

In [6]:
english_tweets = []
hindi_tweets = []
english_tweets, hindi_tweets = separate_english_hindi_tweets(useful_tweets)

#### Separate English from others

In [7]:
etweets = []
otweets= []
    
for t, etup in enumerate(english_tweets):
    e_flag = 1
    tweet = etup[0]
    for c in tweet:
        if c == "\n":
            continue
        if ord(c) > 31 and ord(c) < 250:
            continue
        elif ord(c) > 8200 and ord(c) < 8400: #special punctuations
            continue
        elif ord(c) > 9000: #smileys
            continue
        else:
#             print(c, ord(c))
            e_flag = 0
            break
    if e_flag == 1:
        etweets.append(etup)
    else:
        otweets.append(etup)

In [8]:
etweets[0]

('Yet another reason why India needs #CAA: \n\nHindus Beaten by Pakistani Police for Hoisting Saffron Flag in Their Own Home. Video Published to Cower Other Hindus into Submission!\n\n https://www.youtube.com/watch?v=lTQxDeBmCyI\xa0…\n@MEAIndia @Swamy39 @blsanthosh @davidfrawleyved @MODIfiedVikas @ShefVaidya',
 '2020-01-08',
 "['#caa']",
 0,
 0)

In [9]:
retweet_sorted_tweets = sorted(etweets, key=lambda x: x[3], reverse=True)

In [10]:
retweet_sorted_tweets[:10]

[('#CAA + #NRC + more this Sunday on @patriotact pic.twitter.com/AIoAub8Fwu',
  '2019-12-20',
  "['#caa', '#nrc']",
  17739,
  42902),
 ('My university 😍\nStudents of #PanjabUniversity in support of #CAA\nChandigarh is not only beautiful, but Nationalist too ❤️  pic.twitter.com/NUgngteXur',
  '2019-12-18',
  "['#panjabuniversity', '#caa']",
  9247,
  28077),
 ('Stop watching Hindi movies of the actors, directors, writers, anyone who has supported the violent protests, looting and arson by Muzlims over #CAA. This is ONLY way to teach them a lesson. Starve them of money.',
  '2019-12-18',
  "['#caa']",
  8570,
  21134),
 ('Delhi with #CAA\nDelhi with @narendramodi \nDelhi with @AmitShah pic.twitter.com/pDgdIoZLvh',
  '2019-12-20',
  "['#caa']",
  8561,
  20395),
 ('#CAA is meant to provide fast track citizenship to non-Muslim families from Afghanistan, Pakistan and Bangladesh that have been lynched, raped and persecuted for generations due to their religious beliefs. \n\nWhatever side yo

In [11]:
likes_sorted_tweets = sorted(etweets, key=lambda x: x[4], reverse=True)

In [12]:
likes_sorted_tweets[:10]

[('#CAA + #NRC + more this Sunday on @patriotact pic.twitter.com/AIoAub8Fwu',
  '2019-12-20',
  "['#caa', '#nrc']",
  17739,
  42902),
 ('My university 😍\nStudents of #PanjabUniversity in support of #CAA\nChandigarh is not only beautiful, but Nationalist too ❤️  pic.twitter.com/NUgngteXur',
  '2019-12-18',
  "['#panjabuniversity', '#caa']",
  9247,
  28077),
 ('Stop watching Hindi movies of the actors, directors, writers, anyone who has supported the violent protests, looting and arson by Muzlims over #CAA. This is ONLY way to teach them a lesson. Starve them of money.',
  '2019-12-18',
  "['#caa']",
  8570,
  21134),
 ('India stands strong with Hon @narendramodi ji & Hon @AmitShah ji for solving decades old problem by #CAA & giving new lease of life in Bharat, to our brothers & sisters facing religious persecution in neighbouring countries.\n\n(Siliguri #WestBengal on 24 Dec ‘19)\n#IndiaSupportsCAA pic.twitter.com/EnUURXAWol',
  '2019-12-30',
  "['#caa', '#westbengal', '#indiasupports

In [13]:
dates = defaultdict(int) # create a dictionary for counting the number of tweets on that particular date as key-value pair
def count_date_tweets(tweet_tups):
    for tt in tweet_tups:
        dates[tt[1]] += 1

In [14]:
count_date_tweets(etweets)

In [15]:
tweets_per_date = list(dates.values())
dates_for_tweets = list(dates.keys())

In [16]:
print(len(tweets_per_date))

242


In [17]:
print(len(dates_for_tweets))

242


In [18]:
print(dates_for_tweets)

['2020-01-08', '2020-01-07', '2020-01-06', '2020-01-05', '2020-01-04', '2020-01-03', '2020-01-02', '2020-01-01', '2019-12-31', '2019-12-30', '2019-12-29', '2019-12-28', '2019-12-27', '2019-12-26', '2019-12-25', '2019-12-24', '2019-12-23', '2019-12-22', '2019-12-21', '2019-12-20', '2019-12-19', '2019-12-18', '2019-12-17', '2019-12-16', '2019-12-15', '2019-12-14', '2019-12-13', '2019-12-12', '2019-12-11', '2019-12-10', '2019-12-09', '2019-12-08', '2019-12-07', '2019-12-06', '2019-12-05', '2019-12-04', '2019-12-03', '2019-12-02', '2019-12-01', '2019-11-30', '2019-11-29', '2019-11-28', '2019-11-27', '2019-11-26', '2019-11-25', '2019-11-24', '2019-11-23', '2019-11-22', '2019-11-21', '2019-11-20', '2019-11-19', '2019-11-18', '2019-11-17', '2019-11-16', '2019-11-15', '2019-11-14', '2019-11-13', '2019-11-12', '2019-11-11', '2019-11-10', '2019-11-09', '2019-11-08', '2019-11-07', '2019-11-06', '2019-11-05', '2019-11-04', '2019-11-03', '2019-11-02', '2019-11-01', '2019-10-31', '2019-10-30', '2019

In [19]:
# %matplotlib auto
# plt.barh(list(dates.keys()), list(dates.values()))
# plt.show()

In [20]:
dates_for_tweets.index('2019-06-15')

207

In [21]:
etweets[107]

 '2020-01-08',
 "['#pune', '#caa', '#istandwithdeepika', '#laxmiagarwal']",
 0,
 0)