<img style="width: 20%; border: 1px solid black" src="redbird.png">
<h2 style="text-align: center">U MAD? cloh or naw?</h2>
    

In [5]:
# General:
import tweepy           # To consume Twitter's API
import pandas as pd     # To handle data
import numpy as np      # For number computing

from textblob import TextBlob
import re
import requests

# For plotting and visualization:
from IPython.display import display
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# We import our access keys:
from credentials import *    # This will allow us to use the keys as variables

# API's setup:
def twitter_setup():
    """
    Utility function to setup the Twitter's API
    with our access keys provided.
    """
    # Authentication and access using keys:
    auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
    auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)

    # Return API with authentication:
    api = tweepy.API(auth)
    return api

def clean_tweet(tweet):
    '''
    Utility function to clean the text in a tweet by removing 
    links and special characters using regex.
    '''
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())

def analize_sentiment(tweet):
    '''
    Utility function to classify the polarity of a tweet
    using textblob.
    '''
    analysis = TextBlob(clean_tweet(tweet))
    if analysis.sentiment.polarity > 0:
        return 1
    elif analysis.sentiment.polarity == 0:
        return 0
    else:
        return -1
    
# We create an extractor object:
extractor = twitter_setup()

In [None]:
def analyze_user(user):
    
    # We create a tweet list as follows:
    tweets = extractor.user_timeline(screen_name=user, count=200)
    print("Number of tweets extracted: {}.\n".format(len(tweets)))

    # We print the most recent 5 tweets:
    # print("5 recent tweets by @{}:\n".format(user))
    # for tweet in tweets[:5]:
    #     print(tweet.text)
    #     print()

    # We create a pandas dataframe as follows:
    data = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['Tweets'])

    # We display the first 10 elements of the dataframe:
    pd.set_option('display.max_colwidth', -1)

    # # DISPLAY LESS INFORMATIONAL PANDA DATAFRAME
    # display(data.head(10))

    # We add relevant data:
    data['Length']  = np.array([len(tweet.text) for tweet in tweets])
    # data['ID']   = np.array([tweet.id for tweet in tweets])
    data['Date'] = np.array([tweet.created_at for tweet in tweets])
    data['Source'] = np.array([tweet.source for tweet in tweets])
    data['Likes']  = np.array([tweet.favorite_count for tweet in tweets])
    data['RTs']    = np.array([tweet.retweet_count for tweet in tweets])

    # We display the first 10 elements of the dataframe:
    display(data.head(10))

    # We extract the mean of lenghts:
    mean = np.mean(data['Length'])

    print("Average length of " + user + "'s tweets: {} characters".format(int(mean)))

    # We extract the tweet with more FAVs and more RTs:

    fav_max = np.max(data['Likes'])
    rt_max  = np.max(data['RTs'])

    fav = data[data.Likes == fav_max].index[0]
    rt  = data[data.RTs == rt_max].index[0]

    # Max FAVs:
    print("The tweet with the most likes is: \n{}".format(data['Tweets'][fav]))
    print("It has {} likes.".format(fav_max))
    print("It has {} characters.\n".format(data['Length'][fav]))

    # Max RTs:
    print("The tweet with the most retweets is: \n{}".format(data['Tweets'][rt]))
    print("It has {} retweets.".format(rt_max))
    print("It has {} characters.\n".format(data['Length'][rt]))

    # We create time series for data:

    tlen = pd.Series(data=data['Length'].values, index=data['Date'])
    tfav = pd.Series(data=data['Likes'].values, index=data['Date'])
    tret = pd.Series(data=data['RTs'].values, index=data['Date'])

    # # Lenghts along time:
    # tlen.plot(figsize=(16,4), color='r');

    # # Likes vs retweets visualization:
    # tfav.plot(figsize=(16,4), label="Likes", legend=True)
    # tret.plot(figsize=(16,4), label="Retweets", legend=True);

    # # We obtain all possible sources:
    # sources = []
    # for source in data['Source']:
    #     if source not in sources:
    #         sources.append(source)

    # # We print sources list:
    # print("Sources of these tweets:")
    # for source in sources:
    #     print("* {}".format(source))

    # # We create a numpy vector mapped to labels:
    # percent = np.zeros(len(sources))

    # for source in data['Source']:
    #     for index in range(len(sources)):
    #         if source == sources[index]:
    #             percent[index] += 1
    #             pass

    # percent /= 100

    # # SOURCES PIE CHART

    # pie_chart = pd.Series(percent, index=sources, name='Sources')
    # pie_chart.plot.pie(fontsize=11, autopct='%.2f', figsize=(6, 6));



    # We create a column with the result of the analysis:
    data['SA'] = np.array([ analize_sentiment(tweet) for tweet in data['Tweets'] ])

    # We display the updated dataframe with the new column:
#     display(data.head(10))

    # We construct lists with classified tweets:

    pos_tweets = [ tweet for index, tweet in enumerate(data['Tweets']) if data['SA'][index] > 0]
    neu_tweets = [ tweet for index, tweet in enumerate(data['Tweets']) if data['SA'][index] == 0]
    neg_tweets = [ tweet for index, tweet in enumerate(data['Tweets']) if data['SA'][index] < 0]

    # PERCENTAGE STUFF

    percentPos = format(len(pos_tweets)*100/len(data['Tweets']), '.2f');
    percentNeu = format(len(neu_tweets)*100/len(data['Tweets']), '.2f');
    percentNeg = format(len(neg_tweets)*100/len(data['Tweets']), '.2f');

    print("Percentage of positive tweets: {}%".format(percentPos))
    print("Percentage of neutral tweets: {}%".format(percentNeu))
    print("Percentage of negative tweets: {}%".format(percentNeg))

    # PIE CHART FOR SENTIMENT

    labels = 'Positive', 'Neutral', 'Negative'
    sentiments = [percentPos, percentNeu, percentNeg]
    colors = ['gold', 'lightcoral', 'lightskyblue']
    explode = (0.1, 0.1, 0.1)  # explode 1st slice

    # Plot
    plt.pie(sentiments, explode=explode, labels=labels, colors=colors,
            autopct='%1.1f%%', shadow=True, startangle=140)

    plt.axis('equal')
    plt.show()

    maxSent = max(sentiments)
    if ( maxSent == percentPos ):
        print(user + " is really positive!")
    elif ( maxSent == percentNeg):
          print(user + " is really negative.")
    else:
          print(user + " is neutral.")

In [8]:
def analyze_location(user):
    
    places = extractor.geo_search(query=user, granularity="city")
    place_id = places[0].id

#     print("TWEETS FROM THIS LOCATION")
    print()
    
    tweets = extractor.search(q="place:%s" % place_id, count=200)
    
#     for tweet in tweets:
#         if tweet.place:   
#             print(tweet.text + " | " + tweet.place.full_name) 
#         else:
#             print("Undefined place")
    
    # We create a tweet list as follows:
    print("Number of tweets extracted from {}: {}.\n".format(user, len(tweets)))
    print()

    # We print the most recent 5 tweets:
    # print("5 recent tweets by @{}:\n".format(user))
    # for tweet in tweets[:5]:
    #     print(tweet.text)
    #     print()

    # We create a pandas dataframe as follows:
    data = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['Tweets'])

    # We display the first 10 elements of the dataframe:
    pd.set_option('display.max_colwidth', -1)

    # # DISPLAY LESS INFORMATIONAL PANDA DATAFRAME
    # display(data.head(10))

    # We add relevant data:
    data['Length']  = np.array([len(tweet.text) for tweet in tweets])
    data['Location'] = np.array([tweet.place.full_name for tweet in tweets])
    # data['ID']   = np.array([tweet.id for tweet in tweets])
    data['Date'] = np.array([tweet.created_at for tweet in tweets])
    data['Source'] = np.array([tweet.source for tweet in tweets])
    data['Likes']  = np.array([tweet.favorite_count for tweet in tweets])
    data['RTs']    = np.array([tweet.retweet_count for tweet in tweets])

    # We display the first 10 elements of the dataframe:
    display(data.head(10))

    # We extract the mean of lenghts:
    mean = np.mean(data['Length'])

    print("Average length of " + user + "'s tweets: {} characters".format(int(mean)))

    # We extract the tweet with more FAVs and more RTs:

    fav_max = np.max(data['Likes'])
    rt_max  = np.max(data['RTs'])

    fav = data[data.Likes == fav_max].index[0]
    rt  = data[data.RTs == rt_max].index[0]

    # Max FAVs:
    print("The tweet with the most likes is: \n{}".format(data['Tweets'][fav]))
    print("It has {} likes.".format(fav_max))
    print("It has {} characters.\n".format(data['Length'][fav]))

    # Max RTs:
    print("The tweet with the most retweets is: \n{}".format(data['Tweets'][rt]))
    print("It has {} retweets.".format(rt_max))
    print("It has {} characters.\n".format(data['Length'][rt]))

    # We create time series for data:

    tlen = pd.Series(data=data['Length'].values, index=data['Date'])
    tfav = pd.Series(data=data['Likes'].values, index=data['Date'])
    tret = pd.Series(data=data['RTs'].values, index=data['Date'])

    # # Lenghts along time:
    # tlen.plot(figsize=(16,4), color='r');

    # # Likes vs retweets visualization:
    # tfav.plot(figsize=(16,4), label="Likes", legend=True)
    # tret.plot(figsize=(16,4), label="Retweets", legend=True);

    # # We obtain all possible sources:
    # sources = []
    # for source in data['Source']:
    #     if source not in sources:
    #         sources.append(source)

    # # We print sources list:
    # print("Sources of these tweets:")
    # for source in sources:
    #     print("* {}".format(source))

    # # We create a numpy vector mapped to labels:
    # percent = np.zeros(len(sources))

    # for source in data['Source']:
    #     for index in range(len(sources)):
    #         if source == sources[index]:
    #             percent[index] += 1
    #             pass

    # percent /= 100

    # # SOURCES PIE CHART

    # pie_chart = pd.Series(percent, index=sources, name='Sources')
    # pie_chart.plot.pie(fontsize=11, autopct='%.2f', figsize=(6, 6));



    # We create a column with the result of the analysis:
    data['SA'] = np.array([ analize_sentiment(tweet) for tweet in data['Tweets'] ])

    # We display the updated dataframe with the new column:
#     display(data.head(10))

    # We construct lists with classified tweets:

    pos_tweets = [ tweet for index, tweet in enumerate(data['Tweets']) if data['SA'][index] > 0]
    neu_tweets = [ tweet for index, tweet in enumerate(data['Tweets']) if data['SA'][index] == 0]
    neg_tweets = [ tweet for index, tweet in enumerate(data['Tweets']) if data['SA'][index] < 0]

    # PERCENTAGE STUFF

    percentPos = format(len(pos_tweets)*100/len(data['Tweets']), '.2f');
    percentNeu = format(len(neu_tweets)*100/len(data['Tweets']), '.2f');
    percentNeg = format(len(neg_tweets)*100/len(data['Tweets']), '.2f');

    print("Percentage of positive tweets: {}%".format(percentPos))
    print("Percentage of neutral tweets: {}%".format(percentNeu))
    print("Percentage of negative tweets: {}%".format(percentNeg))

    # PIE CHART FOR SENTIMENT

    labels = 'Positive', 'Neutral', 'Negative'
    sentiments = [percentPos, percentNeu, percentNeg]
    colors = ['gold', 'lightcoral', 'lightskyblue']
    explode = (0.1, 0.1, 0.1)  # explode 1st slice

    # Plot
    plt.pie(sentiments, explode=explode, labels=labels, colors=colors,
            autopct='%1.1f%%', shadow=True, startangle=140)

    plt.axis('equal')
    plt.show()

    maxSent = max(sentiments)
    if ( maxSent == percentPos ):
        print(user + " is really positive!")
    elif ( maxSent == percentNeg):
          print(user + " is really negative.")
    else:
          print(user + " is neutral.")

In [None]:
option = input("Would you like to search by username (U), or location (L)? ")

print()
if (option == "u"):
    user = input("Please enter a location: ")
    analyze_location(user)
elif (option == "l"):
    user = input("Please enter a username: @ ")
    analyze_user(user)
else:
    print("Please enter a valid input, it's just one character!")