In [1]:
import psycopg2
import pandas as pd
import config
from matplotlib import pylab as plt
import seaborn as sns
import numpy as np
from IPython.display import display, HTML
import re


color_palette = sns.color_palette(palette='muted', n_colors=None, desat=.75)
sns.set(context='notebook', palette=color_palette, style='whitegrid', font='sans-serif', font_scale=1.5, color_codes=False, rc=None)
pd.set_option('display.max_colwidth', -1)
table_styles = [{'selector': 'td',
                 'props': [('min-width', '100px'), ('text-align', 'center')]},
                {'selector': 'tr',
                 'props': [('border-bottom', '1px dotted black')]},
                {'selector': 'th',
                 'props': [('text-align', 'center')]}
               ]

%matplotlib inline

directory = "url_top_lists/"
stream = "comparison"

In [2]:
conn = None
try:
    # read connection parameters
    paramsS17 = config.cfgAzureS17()
    paramsS03 = config.cfgAzureS03()

    paramsF17 = config.cfgAzureF17()
    paramsF03 = config.cfgAzureF03()
    
    # connect to the PostgreSQL server
    print('Connecting to the PostgreSQL database...')
    connS17 = psycopg2.connect(**paramsS17)
    connS03 = psycopg2.connect(**paramsS03)
    
    connF17 = psycopg2.connect(**paramsF17)
    connF03 = psycopg2.connect(**paramsF03)

    # create a cursor
    curS17 = connS17.cursor()
    curS03 = connS03.cursor()
    
    curF17 = connF17.cursor()
    curF03 = connF03.cursor()

    # execute a statement
    print('PostgreSQL database version:')
    
    curS17.execute('SELECT version()')
    curS03.execute('SELECT version()')
    curF17.execute('SELECT version()')
    curF03.execute('SELECT version()')
    
    # display the PostgreSQL database server version
    db_version_curS17 = curS17.fetchone()
    db_version_curS03 = curS03.fetchone()
    db_version_curF17 = curF17.fetchone()
    db_version_curF03 = curF03.fetchone()
    
    print(db_version_curS17)
    print(db_version_curS03)
    print(db_version_curF17)
    print(db_version_curF03)

    # close the communication with the PostgreSQL
    curS17.close()
    curS03.close()
    curF17.close()
    curF03.close()

except (Exception, psycopg2.DatabaseError) as error:
    print(error)

Connecting to the PostgreSQL database...
PostgreSQL database version:
('PostgreSQL 9.6.7, compiled by Visual C++ build 1800, 64-bit',)
('PostgreSQL 9.6.7, compiled by Visual C++ build 1800, 64-bit',)
('PostgreSQL 9.6.7, compiled by Visual C++ build 1800, 64-bit',)
('PostgreSQL 9.6.7, compiled by Visual C++ build 1800, 64-bit',)


## Query User URLs

### Sampled Data - Week 03

In [7]:
userURLsS03 = pd.read_sql_query("SELECT * FROM user_urls", connS03 )
tweetsByUsersS03 = pd.read_sql_query('''SELECT ti.id as tweet_id, tu.id as user_id, ti.text, tu.name, tu.url, tu.created_at as user_creation_date FROM tweets_info as ti
                                        INNER JOIN tweets_users as tu ON ti.user_id = tu.id;''', connS03 )

print("Number of User URLs: %s" %len(userURLsS03))
userURLsS03.head()

print("Number of Tweets by User: %s" %len(tweetsByUsersS03))
tweetsByUsersS03.head()

Number of User URLs: 56768
Number of Tweets by User: 123680


Unnamed: 0,tweet_id,user_id,text,name,url,user_creation_date
0,952666342414577664,499069779,@ProjectVoteLiam Dieciseis #BestSoloBreakout #LiamPayne #iHeartAwards,For You; Belle,,Tue Feb 21 18:55:28 +0000 2012
1,952676816803770368,2992834163,Verkehr - Lotte: Brückenarbeiten an der A1 dauern länger als geplant - Coesfeld/Osnabrück (dpa) - Autofahrer auf der Autobahn 1 an der Landesgrenze zwischen Niedersachsen und Nordrhein-Westfalen müssen sich auch zu Beginn der neuen Woche auf Behinderu... https://t.co/zx3u0gpfDM,Deutschland Germany,,Thu Jan 22 19:45:11 +0000 2015
2,952676816820555776,2992834163,Kriminalität - Dortmund: Vermummte wollen Gaststätte mit Fußballfans stürmen - Dortmund/Wolfsburg (dpa) - Im Vorfeld der Begegnung der Fußball-Bundesligisten Borussia Dortmund und VfL Wolfsburg haben Polizeikräfte einen Übergriff auf Fans aus Niedersa... https://t.co/9rZa1ehjkv,Deutschland Germany,,Thu Jan 22 19:45:11 +0000 2015
3,952676825183944705,2599674960,"15.01.18 00:00 Uhr: #Münster #Spritpreis Diesel 1.282, E5 1.46, E10 1.44 Ø-Preise #Tankerkönig.de",Tankerkönig.de,http://www.tankerkoenig.de,Wed Jul 02 11:36:29 +0000 2014
4,952676760390336512,15391102,Jeff and MacKenzie Bezos donate $33M scholarship grant for DACA students https://t.co/lz34y81ni2 https://t.co/dxJdfFyY9o,WUSA9,http://www.wusa9.com,Fri Jul 11 13:19:21 +0000 2008


In [10]:
top50url = userURLsS03['resolved_url'].value_counts()[:50]

In [16]:
top50url.index[0]
tweetsByUsersS03[tweetsByUsersS03['url'] == top50url.index[0]]

Unnamed: 0,tweet_id,user_id,text,name,url,user_creation_date
