In [None]:
from datetime import timedelta
from string import ascii_letters, digits
from os import mkdir, path
import twint
import pandas as pd
import nest_asyncio
nest_asyncio.apply()

def clean_name(dirname):
    valid = set(ascii_letters + digits)
    return ''.join(a for a in dirname if a in valid)


def twint_search(searchterm, since, until, json_name):
    '''
    Twint search for a specific date range.
    Stores results to json.
    '''
    c = twint.Config()
    c.Search = searchterm
    c.Since = since
    c.Until = until
    c.Hide_output = True
    c.Store_json = True
    c.Output = json_name
   
    try:
        twint.run.Search(c)    
    except (KeyboardInterrupt, SystemExit):
        raise
    except:
        print("Problem with %s." % since)




def twint_loop(searchterm, since, until, tag_name = ''):

    dirname = clean_name(searchterm)
    dirname = dirname
    try:
    # Create target Directory
        mkdir(dirname)
        print("Directory" , dirname ,  "Created ")
    except FileExistsError:
        print("Directory" , dirname ,  "already exists")
    
    
    daterange = pd.date_range(since, until)

    i = 1
    for start_date in daterange:

        since= start_date.strftime("%Y-%m-%d")
        until = (start_date + timedelta(days=2)).strftime("%Y-%m-%d")
        i = i+1

        json_name = tag_name + '%s.json' % since
        json_name = path.join(dirname, json_name)

        print('Getting %s ' % since )
        twint_search(searchterm, since, until, json_name)

In [1]:
twint_loop('#bumble', '7-01-2020', '09-30-2020')

In [2]:
## SENTIMENT ANALYSIS

In [3]:
from wordcloud import WordCloud
from glob import glob
from os import path
import pandas as pd
import re
from textblob import TextBlob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
plt.style.use('fivethirtyeight')

In [6]:
#merge the json to single DF
from glob import glob

file_names = glob(path.join('','*.json'))
dfs = [pd.read_json(fn, lines = True) for fn in file_names]
bumble_df = pd.concat(dfs)

bumble_df['tweet']
bumble_df.head()
tweets = pd.DataFrame(bumble_df[['tweet','date']])

In [None]:
#tweets_temp = pd.DataFrame(bumble_df[['tweet','date']])
tweets['Year'] = tweets.date.map(lambda x : x.strftime('%Y'))
tweets['month'] = tweets.date.map(lambda x : x.strftime('%m'))

In [None]:
#convert all to uppercase
tweets['tweet'] = tweets['tweet'].str.upper()
#filter the text for #Bumble and @Bumble
bumble_rate = tweets[tweets['tweet'].str.contains('@BUMBLE ') == True]
bumble_hash = tweets[tweets['tweet'].str.contains('#BUMBLE ') == True]

#Clean the test
def cleanTxt(text):
    text =  re.sub(r'@[A-Za-z0-9]+' , '', text) # removed @ mentions
    text =  re.sub(r'#' , '', text) # removed # mentions
    text =  re.sub(r'RT[\s]' , '', text) # removed RT
    text =  re.sub(r'https?:\/\/\S+' , '', text) # removed web url
    text =  re.sub(r'HTTPS?:\/\/\S+' , '', text) # removed web url
    return text

In [None]:
tweets = pd.concat([bumble_rate , bumble_hash])
tweets = tweets.drop_duplicates()
tweets['CleanTweet'] = tweets['tweet'].apply(cleanTxt)
df = pd.concat([bumble_rate , bumble_hash])
df = df.drop_duplicates(subset = ["tweet"])
df

In [None]:
#create a function to get subjectivity
def getSubjectivity(text):
    return TextBlob(text).sentiment.subjectivity

#create a function to get polarity
def getPolarity(text):
    return TextBlob(text).sentiment.polarity

tweets['polarity'] = tweets['CleanTweet'].apply(getPolarity)
tweets['subjectivity'] = tweets['CleanTweet'].apply(getSubjectivity)

In [None]:
#create a function to compute the -ve , neutral and +ve analysis
def getAnalysis(score):
    if score < 0:
        return 'Negative'
    elif score == 0:
        return 'Neutral'
    elif score > 0:
        return 'Positive'
tweets['Analysis'] = tweets['polarity'].apply(getAnalysis)

In [None]:
df_aggregated = tweets.groupby(['month','Year','Analysis']).count()
df_aggregated.head()

In [None]:
# Plot the polarity and subjectivity
plt.figure(figsize = (8,6))
for i in range(0,tweets.shape[0]):
    plt.scatter(tweets['polarity'][i], tweets['subjectivity'][i], color = 'Blue')
plt.xlabel('Polarity')
plt.ylabel('Subjectivity')
plt.show()

In [None]:
#plot skewed to right (positive)
# Get count of positive tweet
plt.title('Sentiment Analysis')
plt.xlabel('Sentiment')
plt.ylabel('Counts')
tweets['Analysis'].value_counts().plot(kind = 'bar')
plt.show()