In [15]:
import psycopg2
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.axes as ax
import seaborn as sn
from IPython.display import display

In [16]:
def connect_to_database():
    file = open('dbconnectioncredentials', 'r')
    creds_dict = {}
    for line in file:
        credential = line.split("=")
        value = credential[1]
        creds_dict[credential[0]] = value[0:len(value) - 1]
    connection = psycopg2.connect(
        host=creds_dict['SERVER'],
        dbname=creds_dict['DATABASE_NAME'],
        user=creds_dict['USER_NAME'],
        password=creds_dict['PASSWORD'],
        port=creds_dict['PORT']
    )
    return connection


In [17]:
connection = connect_to_database()
cursor = connection.cursor()


In [18]:
def create_correlation_hashtags_plot(sql_query):
    cursor.execute(sql_query)
    result = cursor.fetchall()
    df = pd.DataFrame(result, columns=['Date', 'Hashtag'])
    df.apply(lambda x: x.factorize()[0]).corr()
    corr = pd.crosstab(df.Date, df.Hashtag)
    plt.figure(figsize=(7, 5))
    sn.heatmap(corr, cmap='gnuplot')
    plt.show()


In [19]:
QUERY_POPULAR_HASHTAGS = "SELECT cast(tweets.createdat as date) AS Date, hashtags.name AS Hashtag FROM TWEETSHASHTAGS " \
            "INNER JOIN tweets on tweetid = tweets.id " \
            "INNER JOIN hashtags on hashtagid=hashtags.id " \
            "WHERE hashtags.name in ('brexit', 'getbrexitdone', 'stopbrexit');"


create_correlation_hashtags_plot(QUERY_POPULAR_HASHTAGS)


In [20]:
QUERY_LESS_POPULAR_HASHTAGS = "SELECT cast(tweets.createdat as date) AS Date, hashtags.name AS Hashtag FROM TWEETSHASHTAGS " \
            "INNER JOIN tweets on tweetid = tweets.id " \
            "INNER JOIN hashtags on hashtagid=hashtags.id " \
            "WHERE hashtags.name in ('brexitdeal', 'hardbrexit');"

create_correlation_hashtags_plot(QUERY_LESS_POPULAR_HASHTAGS)


In [21]:
QUERY_SENTIMENT_AVG = "SELECT cast(tweets.createdat as date) AS Date, avg(sentiment) AS sentiment_average FROM TWEETS group by cast(tweets.createdat as date);"
cursor.execute(QUERY_SENTIMENT_AVG)

df = pd.DataFrame(cursor.fetchall(), columns=['date', 'sentiment_average'])
df.set_index('date')['sentiment_average'].plot()
sn.set(font_scale=1.4)
df.set_index('date')['sentiment_average'].plot(figsize=(15, 10), linewidth=2.5)
plt.xlabel("Date", labelpad=15)
plt.xticks(df.date, rotation='90')
plt.ylabel("Sentiment average", labelpad=15)
plt.show()


