In [36]:
%load_ext sql

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


### Connect to Redshift

In [37]:
import configparser
config = configparser.ConfigParser()
config.read_file(open('dwh.cfg'))
KEY=config.get('AWS','key')
SECRET= config.get('AWS','secret')

DWH_HOST= config.get("CLUSTER","HOST")
DWH_DB= config.get("CLUSTER","DB_NAME")
DWH_DB_USER= config.get("CLUSTER","DB_USER")
DWH_DB_PASSWORD= config.get("CLUSTER","DB_PASSWORD")
DWH_PORT = config.get("CLUSTER","DB_PORT")

DWH_ENDPOINT = config.get("CLUSTER","HOST")
DWH_ROLE_ARN = config.get("IAM_ROLE","ARN")

In [38]:
conn_string="postgresql://{}:{}@{}:{}/{}".format(DWH_DB_USER, DWH_DB_PASSWORD, DWH_ENDPOINT, DWH_PORT, DWH_DB)
print(conn_string)
%sql $conn_string

postgresql://dwhuser:Passw0rd@dwhcluster.c01fuo5drjhi.us-west-2.redshift.amazonaws.com:5439/dwh


'Connected: dwhuser@dwh'

### Get counts from tables

In [10]:
%sql SELECT COUNT(*) from users;

 * postgresql://dwhuser:***@dwhcluster.c01fuo5drjhi.us-west-2.redshift.amazonaws.com:5439/dwh
1 rows affected.


count
97


In [11]:
%sql SELECT COUNT(*) from song;

 * postgresql://dwhuser:***@dwhcluster.c01fuo5drjhi.us-west-2.redshift.amazonaws.com:5439/dwh
1 rows affected.


count
14896


In [12]:
%sql SELECT COUNT(*) from artist;

 * postgresql://dwhuser:***@dwhcluster.c01fuo5drjhi.us-west-2.redshift.amazonaws.com:5439/dwh
1 rows affected.


count
10025


In [13]:
%sql SELECT COUNT(*) from songplay;

 * postgresql://dwhuser:***@dwhcluster.c01fuo5drjhi.us-west-2.redshift.amazonaws.com:5439/dwh
1 rows affected.


count
119


### Select sample songplay records

In [15]:
%sql SELECT * from songplay LIMIT 5;

 * postgresql://dwhuser:***@dwhcluster.c01fuo5drjhi.us-west-2.redshift.amazonaws.com:5439/dwh
5 rows affected.


songplay_id,start_time,user_id,level,song_id,artist_id,session_id,location,user_agent
353,1541348754796,69,free,SOARUPP12AB01842E0,ARD46C811C8A414F3F,235,"Philadelphia-Camden-Wilmington, PA-NJ-DE-MD","""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36"""
27,1541416136796,95,paid,SOQYHVZ12A6D4F93CF,ARRZUPG11F43A69EF7,222,"Winston-Salem, NC","""Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_2 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D257 Safari/9537.53"""
8,1541603777796,2,free,SOTJEIC12A8C139054,AR0OTEX1187FB3600D,323,"Plymouth, IN","""Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36"""
321,1541842182796,44,paid,SOWLLXC12AB0180FFE,AR66PLO1187FB4C8E5,350,"Waterloo-Cedar Falls, IA",Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:31.0) Gecko/20100101 Firefox/31.0
361,1542057721796,12,free,SOARUPP12AB01842E0,ARD46C811C8A414F3F,371,"New York-Newark-Jersey City, NY-NJ-PA",Mozilla/5.0 (Windows NT 6.1; rv:31.0) Gecko/20100101 Firefox/31.0


## Analytic Queries

### Most active weekday
0=Monday; 6=Sunday

In [19]:
from time import time

In [20]:
%%time
%sql SELECT t.weekday, count(s.songplay_id) AS cnt \
FROM songplay s \
JOIN time t ON t.start_time=s.start_time \
GROUP BY t.weekday \
ORDER BY weekday;

 * postgresql://dwhuser:***@dwhcluster.c01fuo5drjhi.us-west-2.redshift.amazonaws.com:5439/dwh
7 rows affected.
CPU times: user 4.05 ms, sys: 523 µs, total: 4.57 ms
Wall time: 111 ms


weekday,cnt
0,8
1,25
2,16
3,19
4,26
5,18
6,7


### Songplay activity by Gender

In [22]:
%%time

%sql SELECT u.gender, count(s.songplay_id) AS cnt \
FROM songplay s \
JOIN users u ON s.user_id=u.user_id \
GROUP BY u.gender \
ORDER BY cnt DESC LIMIT 5;

 * postgresql://dwhuser:***@dwhcluster.c01fuo5drjhi.us-west-2.redshift.amazonaws.com:5439/dwh
2 rows affected.
CPU times: user 5.92 ms, sys: 0 ns, total: 5.92 ms
Wall time: 12.8 s


gender,cnt
F,84
M,35


### Songplay activity by subscription Level

In [23]:
%%time

%sql SELECT u.level, count(s.songplay_id) AS cnt \
FROM songplay s \
JOIN users u ON s.user_id=u.user_id \
GROUP BY u.level \
ORDER BY cnt DESC LIMIT 5;

 * postgresql://dwhuser:***@dwhcluster.c01fuo5drjhi.us-west-2.redshift.amazonaws.com:5439/dwh
2 rows affected.


level,cnt
paid,93
free,26


### Most played song

In [42]:
%%time

%sql SELECT s.title, a.name, count(sp.songplay_id) AS cnt \
FROM songplay sp LEFT JOIN song s ON sp.song_id=s.song_id \
LEFT JOIN artist a ON s.artist_id=s.artist_id \
GROUP BY s.title, a.name \
ORDER BY count(sp.songplay_id) DESC \
LIMIT 5;

 * postgresql://dwhuser:***@dwhcluster.c01fuo5drjhi.us-west-2.redshift.amazonaws.com:5439/dwh
5 rows affected.
CPU times: user 6.26 ms, sys: 152 µs, total: 6.42 ms
Wall time: 114 ms


title,name,cnt
You're The One,Alison Krauss / Union Station,111
You're The One,Bill & Gloria Gaither,111
You're The One,E-40,74
You're The One,MoZella,74
You're The One,Nostalgia 77,74


### Analytic Result:
Most active users listening to music on sparkfy music streaming app continues to be female with paid subscipions.
