# Data Warehouse with Redshift - Queries
Run this notebook after loading all final tables for analytics.
>
> **Stephanie Anderton**  
> DEND Project \#3  
> May 29, 2019
>

In [1]:
import configparser
import psycopg2
import pandas as pd
import json
import time
import mylib
from mylib import logger
import re
%load_ext sql

## Main()

In [3]:
logger.info('---[ Run Queries Notebook ]---')
mylib.log_timestamp()
print("Logfile:  " + mylib.get_log_file_name())

config = configparser.ConfigParser()
config.read('dwh.cfg')

HOST         = config['CLUSTER']['HOST']
DB_NAME      = config['CLUSTER']['DB_NAME']
DB_USER      = config['CLUSTER']['DB_USER']
DB_PASSWORD  = config['CLUSTER']['DB_PASSWORD']
DB_PORT      = config['CLUSTER']['DB_PORT']

ARN          = config['IAM_ROLE']['ARN']

# Connection to database
conn_string_2 = "postgresql://{}:{}@{}:{}/{}".format(DB_USER, DB_PASSWORD, 
                                                     HOST, DB_PORT, DB_NAME)
logger.info('connected to database')
print(conn_string_2)
%sql $conn_string_2

Logfile:  ./logs/etl-20190529.log


## Display top 10 rows of Tables

In [6]:
%%sql
SELECT * FROM staging_events
LIMIT  10;

 * postgresql://dwhuser:***@dwhcluster.cbsjbxldkge8.us-west-2.redshift.amazonaws.com:5439/sparkify
10 rows affected.


event_key,artist,auth,firstname,gender,iteminsession,lastname,length,level,location,method,page,registration,sessionid,song,status,ts,useragent,userid
45,Tamba Trio,Logged In,Kaylee,F,4,Summers,177.18812,free,"Phoenix-Mesa-Scottsdale, AZ",PUT,NextSong,1540344794796.0,139,Quem Quiser Encontrar O Amor,200,1541106496796,"""Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36""",8
109,M.I.A.,Logged In,Ryan,M,2,Smith,233.7171,free,"San Jose-Sunnyvale-Santa Clara, CA",PUT,NextSong,1541016707796.0,169,Mango Pickle Down River (With The Wilcannia Mob),200,1541109325796,"""Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/36.0.1985.125 Chrome/36.0.1985.125 Safari/537.36""",26
31,Tracy Gang Pussy,Logged In,Stefany,F,2,White,221.33506,free,"Lubbock, TX",PUT,NextSong,1540708070796.0,82,I Have A Wish,200,1541122457796,"""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36""",83
95,The Decemberists,Logged In,Lily,F,1,Koch,242.59873,paid,"Chicago-Naperville-Elgin, IL-IN-WI",PUT,NextSong,1541048010796.0,172,Everything I Try to Do_ Nothing Seems to Turn Out Right,200,1541149456796,"""Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/36.0.1985.125 Chrome/36.0.1985.125 Safari/537.36""",15
159,Tiziano Ferro,Logged In,Lily,F,7,Koch,251.42812,paid,"Chicago-Naperville-Elgin, IL-IN-WI",PUT,NextSong,1541048010796.0,172,Ed Ero Contentissimo,200,1541150809796,"""Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/36.0.1985.125 Chrome/36.0.1985.125 Safari/537.36""",15
223,Marques Houston,Logged In,Lily,F,15,Koch,264.98567,paid,"Chicago-Naperville-Elgin, IL-IN-WI",PUT,NextSong,1541048010796.0,172,Naked,200,1541152676796,"""Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/36.0.1985.125 Chrome/36.0.1985.125 Safari/537.36""",15
287,She & Him,Logged In,Lily,F,23,Koch,167.83628,paid,"Chicago-Naperville-Elgin, IL-IN-WI",PUT,NextSong,1541048010796.0,172,Got Me,200,1541154085796,"""Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/36.0.1985.125 Chrome/36.0.1985.125 Safari/537.36""",15
351,Avantasia,Logged In,Lily,F,31,Koch,368.97914,paid,"Chicago-Naperville-Elgin, IL-IN-WI",PUT,NextSong,1541048010796.0,172,Shelter From The Rain,200,1541155991796,"""Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/36.0.1985.125 Chrome/36.0.1985.125 Safari/537.36""",15
415,Calle 13 Featuring CafÃÂ© Tacuba,Logged In,Lily,F,37,Koch,293.32853,paid,"Chicago-Naperville-Elgin, IL-IN-WI",PUT,NextSong,1541048010796.0,172,No Hay Nadie Como TÃÂº,200,1541157637796,"""Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/36.0.1985.125 Chrome/36.0.1985.125 Safari/537.36""",15
479,Washed Out,Logged In,Tegan,F,3,Levine,168.6722,free,"Portland-South Portland, ME",PUT,NextSong,1540794356796.0,165,New Theory,200,1541158225796,"""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36""",80


In [7]:
%%sql
SELECT * FROM staging_songs
LIMIT  10;

 * postgresql://dwhuser:***@dwhcluster.cbsjbxldkge8.us-west-2.redshift.amazonaws.com:5439/sparkify
10 rows affected.


song_id,artist_id,artist_latitude,artist_longitude,artist_location,artist_name,title,duration,year
SOPMHBX12A8AE47391,AROBOIW11F4C84748F,,,,Swingin' Utters/Youth Brigade,'39 (Swingin' Utters),199.23546,0
SOEGODK12AB01892B9,ARBLYCZ11E2835EF67,,,,Three Trapped Tigers,11,374.59546,2010
SONKWKG12A6D4F9E91,AR6XSN41187B99CC29,37.16793,-95.84502,United States,Living Sacrifice,180,295.31383,1997
SOMGZPP12AB01867CE,ARPNKPA1187FB5CE9E,,,,Wang Wen,2005.1.26,296.38485,2007
SOYRACV12A8C13C4D8,AR0S3YD1187B99967F,,,"Jacksonville, FL",The Classics IV,24 Hours Of Loneliness,127.65995,1975
SOLOYXZ12AB017FA7D,ARQQLYQ11E2835E7E9,,,Toronto ON,Bruce Peninsula,2nd 4th World War,303.75138,2009
SOTXGKK12A8C144B8F,ARDN4NG1187FB5755E,35.67048,139.74092,Tokyo,Nobuo Uematsu,A Bit Of Fight,108.48608,0
SOFJOOB12A8C134405,ARQYPI21187FB50F1B,,,,The Homens,A Bolsa de Toquio,213.49832,2007
SOXLRYZ12A6D4F7FDE,ARI84T71187B9ACA53,,,,The Sleepy Jackson,A Cold War,155.37587,0
SOYDFAN12A81C21BA7,ARMGY881187FB35EA3,,,,Dof,A Compass Rose,261.53751,0


In [8]:
%%sql
SELECT * FROM songplays
LIMIT  10;

 * postgresql://dwhuser:***@dwhcluster.cbsjbxldkge8.us-west-2.redshift.amazonaws.com:5439/sparkify
10 rows affected.


sp_songplay_id,sp_start_time,sp_user_id,sp_level,sp_song_id,sp_artist_id,sp_session_id,sp_location,sp_user_agent
134,2018-11-04 07:31:31,25,paid,SOHWVJJ12AB0185F6D,ARASYMJ1187B9ACAF2,128,"Marinette, WI-MI","""Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36"""
138,2018-11-04 09:19:03,44,paid,SOCSXKQ12A6D4F95A0,ARRE7IQ1187FB4CF13,196,"Waterloo-Cedar Falls, IA",Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:31.0) Gecko/20100101 Firefox/31.0
162,2018-11-09 17:35:10,80,paid,SOOUXUD12AB0188D97,ARWKFPM1187FB4E712,416,"Portland-South Portland, ME","""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36"""
351,2018-11-10 20:02:59,36,paid,SOFOOFA12A58A7965E,ARQGRMP11E2835E446,439,"Janesville-Beloit, WI","""Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36"""
212,2018-11-11 15:00:37,67,free,SOCHRXB12A8AE48069,ARTDQRC1187FB4EFD4,414,"Nashville-Davidson--Murfreesboro--Franklin, TN","""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36"""
114,2018-11-13 10:34:37,44,paid,SOPRQEX12A8C133558,ARRJ3UC1187FB579D7,474,"Waterloo-Cedar Falls, IA",Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:31.0) Gecko/20100101 Firefox/31.0
145,2018-11-15 16:04:40,32,free,SORTSWB12A58A7DCA3,ARBAW9R1187B98FB6B,554,"New York-Newark-Jersey City, NY-NJ-PA","""Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36"""
343,2018-11-16 14:21:12,85,paid,SOLRYQR12A670215BF,ARNLO5S1187B9B80CC,436,"Red Bluff, CA","""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36"""
82,2018-11-16 20:13:00,49,paid,SOMVLEG12AB017C1D0,ARJKLZN12086C16C46,648,"San Francisco-Oakland-Hayward, CA",Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0
81,2018-11-17 15:41:21,73,paid,SONQLSC12AB01816E0,ARQFJDL1187B98BF86,518,"Tampa-St. Petersburg-Clearwater, FL","""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.78.2 (KHTML, like Gecko) Version/7.0.6 Safari/537.78.2"""


In [9]:
%%sql
SELECT * FROM users
LIMIT  10;

 * postgresql://dwhuser:***@dwhcluster.cbsjbxldkge8.us-west-2.redshift.amazonaws.com:5439/sparkify
10 rows affected.


u_user_id,u_first_name,u_last_name,u_gender,u_level
2,Jizelle,Benjamin,F,free
3,Isaac,Valdez,M,free
4,Alivia,Terrell,F,free
5,Elijah,Davis,M,free
6,Cecilia,Owens,F,free
7,Adelyn,Jordan,F,free
8,Kaylee,Summers,F,free
9,Wyatt,Scott,M,free
10,Sylvie,Cruz,F,free
11,Christian,Porter,F,free


In [10]:
%%sql
SELECT * FROM songs
LIMIT  10;

 * postgresql://dwhuser:***@dwhcluster.cbsjbxldkge8.us-west-2.redshift.amazonaws.com:5439/sparkify
10 rows affected.


s_song_id,s_title,s_artist_id,s_year,s_duration
SOAADAD12A8C13D5B0,One Shot (Album Version),ARQTC851187B9B03AF,2005,263.99302
SOAAKBE12A8C139075,Emília,ART1OPW1187FB3C5EF,0,159.7122
SOAAOLZ12A6D4FB403,Somebody Knew,ARRSUF71187FB52F33,1997,188.08118
SOAAUVF12A58A7D58C,Hickory Wind (Remastered LP Version),AR9VCSR1187B9B879E,1979,243.48689
SOAAXUI12A6D4F9936,towards Osiris,ARTHSAE12131B4B70A,0,475.50649
SOABBVH12AF72A5B57,My Love I Love,ARUIM291187FB3911A,2007,132.41424
SOABNBI12A8C13F890,Mon légionaire,AR5O86P1187FB3B8CD,0,216.86812
SOACAKP12AB018638D,Faut que tu fesses fort,ARZORKK11E2835CDA4,1999,137.92608
SOACMYN12AB018321D,Why I Pack My Lunch,ARS371M1187B992F09,0,180.94975
SOACPIZ12AB017E36D,Atone,ARZC56B119B866897E,2010,273.89342


In [11]:
%%sql
SELECT * FROM artists
LIMIT  10;

 * postgresql://dwhuser:***@dwhcluster.cbsjbxldkge8.us-west-2.redshift.amazonaws.com:5439/sparkify
10 rows affected.


a_artist_id,a_name,a_location,a_latitude,a_longitude
AR01IP11187B9AF5D2,Call To Preserve,"Rockledge, Florida",28.33268,-80.73486
AR02T3I1187FB4D0E5,Aberfeldy,"Edinburgh, Scotland",,
AR02YGA1187B9B8AC4,Bersuit Vergarabat,Buenos Aires,-34.60852,-58.37354
AR03P141187B9B8CE6,Teo Mammucari,,,
AR03Z7E1187FB44816,The Colourfield Featuring Sinead O'Connor,"Manchester, England",53.4796,-2.24881
AR049S81187B9AE8A5,The Human League,"Sheffield, Yorkshire, England",53.38311,-1.46454
AR04NH21187FB4485B,Demise Of Eros,,,
AR05VW21187FB407B4,Fudge Tunnel,Nottingham,52.94922,-1.14392
AR07HWO1187B9A2189,Lil' Band O' Gold,,,
AR083KA1187FB3EACE,[re:jazz],,,


In [12]:
%%sql
SELECT * FROM time
LIMIT  10;

 * postgresql://dwhuser:***@dwhcluster.cbsjbxldkge8.us-west-2.redshift.amazonaws.com:5439/sparkify
10 rows affected.


t_start_time,t_hour,t_day,t_week,t_month,t_year,t_weekday
2018-11-02 01:30:41,1,2,44,11,2018,5
2018-11-02 02:42:48,2,2,44,11,2018,5
2018-11-02 03:05:57,3,2,44,11,2018,5
2018-11-02 09:31:00,9,2,44,11,2018,5
2018-11-02 10:53:11,10,2,44,11,2018,5
2018-11-02 10:59:19,10,2,44,11,2018,5
2018-11-02 11:23:28,11,2,44,11,2018,5
2018-11-02 11:39:40,11,2,44,11,2018,5
2018-11-02 11:51:58,11,2,44,11,2018,5
2018-11-02 11:55:37,11,2,44,11,2018,5


---
## Example Queries

### Top 10 Songs in songplays

In [13]:
%%sql
WITH songplays_ext  AS (
         SELECT *
         FROM   songplays
         JOIN   songs
         ON     sp_song_id = s_song_id
         JOIN   artists
         ON     sp_artist_id = a_artist_id
)

SELECT   s_title AS "song title",
         a_name AS "artist name",
         COUNT(*) count
FROM     songplays_ext
GROUP BY s_title, a_name
ORDER BY count DESC, s_title
LIMIT    10;

 * postgresql://dwhuser:***@dwhcluster.cbsjbxldkge8.us-west-2.redshift.amazonaws.com:5439/sparkify
10 rows affected.


song title,artist name,count
You're The One,Dwight Yoakam,37
Catch You Baby (Steve Pitron & Max Sanna Radio Edit),Lonnie Gordon,9
I CAN'T GET STARTED,Ron Carter,9
Nothin' On You [feat. Bruno Mars] (Album Version),B.o.B,8
Hey Daddy (Daddy's Home),Usher featuring Jermaine Dupri,6
Hey Daddy (Daddy's Home),Usher,6
Make Her Say,Kid Cudi / Kanye West / Common,5
Make Her Say,Kid Cudi,5
Up Up & Away,Kid Cudi,5
Up Up & Away,Kid Cudi / Kanye West / Common,5


### Top 10 Users in songplays

In [52]:
%%sql
WITH songplays_ext AS (
         SELECT sp_session_id, u_first_name, u_last_name, u_user_id
         FROM   songplays
         JOIN   users
         ON     sp_user_id = u_user_id  AND
                sp_level   = u_level
    )

SELECT   DISTINCT( u_first_name || ' ' || u_last_name ) AS "user name",
         u_user_id AS "user ID",
         COUNT( sp_session_id ) AS "session count"
FROM     songplays_ext
GROUP BY "user ID", "user name"
ORDER BY "session count" DESC
LIMIT    10;

 * postgresql://dwhuser:***@dwhcluster.cbsjbxldkge8.us-west-2.redshift.amazonaws.com:5439/sparkify
10 rows affected.


user name,user id,session count
Chloe Cuevas,49,42
Kate Harrell,97,32
Tegan Levine,80,31
Aleena Kirby,44,21
Jacob Klein,73,18
Mohammad Rodriguez,88,17
Lily Koch,15,15
Layla Griffin,24,13
Matthew Jones,36,13
Jacqueline Lynch,29,13


### ID for user with most songs

In [53]:
%%sql
WITH songplays_ext AS (
         SELECT  sp_session_id, u_user_id
         FROM    songplays
         JOIN    users
         ON      sp_user_id = u_user_id  AND
                 sp_level   = u_level
    ),
    session_counts AS (
        SELECT   u_user_id,
                 COUNT( sp_session_id ) AS count
        FROM     songplays_ext
        GROUP BY u_user_id
    ),
    max_session  AS (
        SELECT   MAX(count) AS max_count
        FROM     session_counts
    )

SELECT  u_user_id AS top_user
FROM    session_counts
WHERE   count = ( 
        SELECT   max_count
        FROM     max_session
);


 * postgresql://dwhuser:***@dwhcluster.cbsjbxldkge8.us-west-2.redshift.amazonaws.com:5439/sparkify
1 rows affected.


top_user
49


In [21]:
%%sql
SELECT   sp_user_id, sp_level, COUNT(*)
FROM     songplays
WHERE    sp_user_id = 49
GROUP BY sp_user_id, sp_level;

 * postgresql://dwhuser:***@dwhcluster.cbsjbxldkge8.us-west-2.redshift.amazonaws.com:5439/sparkify
2 rows affected.


sp_user_id,sp_level,count
49,paid,40
49,free,2


### Top 5 sessions with most songs for Top User (ID = 49)

In [81]:
%%sql
WITH songplays_user AS (
         SELECT  *
         FROM    songplays
         WHERE   sp_user_id  = 49
    ),
    user_sessions AS (
         SELECT  u_first_name, u_last_name, 
                 sp_session_id, sp_start_time, s_title
         FROM    songplays_user
         JOIN    users
         ON      sp_user_id  = u_user_id  AND
                 sp_level    = u_level
         JOIN    songs
         ON      sp_song_id  = s_song_id
    )

SELECT   u_first_name || ' ' || u_last_name AS "user name",
         sp_session_id AS "session ID",
         (DATE_PART('year', 
                    sp_start_time) || '-' || DATE_PART('month', 
                                                       sp_start_time) || '-' || DATE_PART('day', 
                                                                                          sp_start_time))::date,
         COUNT(s_title) AS "song count"
FROM     user_sessions
GROUP BY sp_session_id, date, "user name"
ORDER BY "song count" DESC
LIMIT    5;

 * postgresql://dwhuser:***@dwhcluster.cbsjbxldkge8.us-west-2.redshift.amazonaws.com:5439/sparkify
5 rows affected.


user name,session id,date,song count
Chloe Cuevas,1041,2018-11-29,11
Chloe Cuevas,1079,2018-11-30,5
Chloe Cuevas,816,2018-11-21,3
Chloe Cuevas,987,2018-11-27,2
Chloe Cuevas,1114,2018-11-30,2
