# Tests & Queries

## Init

In [2]:
import configparser
import pandas as pd
%load_ext sql

config = configparser.ConfigParser()
config.read_file(open('dwh.cfg'))
HOST              = config.get('CLUSTER','HOST')
DB_NAME           = config.get('CLUSTER','DB_NAME')
DB_USER           = config.get('CLUSTER','DB_USER')
DB_PASSWORD       = config.get('CLUSTER','DB_PASSWORD')
DB_PORT           = config.get('CLUSTER','DB_PORT')

DB_NAME           = config.get('DATABASE','DB_NAME')

The sql extension is already loaded. To reload it, use:
  %reload_ext sql
postgresql://dwhuser:Passw0rd@dwhcluster.cuvu38ujek21.us-west-2.redshift.amazonaws.com:5439/dwh


'Connected: dwhuser@dwh'

## Connect to Redshift cluster

In [None]:
conn_string="postgresql://{}:{}@{}:{}/{}".format(DB_USER, DB_PASSWORD, HOST, DB_PORT, DB_NAME)
print(conn_string)
%sql $conn_string

## TOP 5 Users with more stream events

In [7]:
%%sql 
    select u.first_name || ' ' || u.last_name as full_name, count(sp.start_time) as stream_count
    from songplays sp
    left join users u on sp.user_id = u.user_id
    group by u.first_name || ' ' || u.last_name
    order by 2 desc
    limit 5;

 * postgresql://dwhuser:***@dwhcluster.cuvu38ujek21.us-west-2.redshift.amazonaws.com:5439/dwh
5 rows affected.


full_name,stream_count
Chloe Cuevas,1378
Tegan Levine,1330
Lily Koch,926
Jacqueline Lynch,692
Kate Harrell,557


## Busiests hours at the platform

In [8]:
%%sql 
select t.hour, count(sp.start_time) as stream_count
from songplays sp
left join time t on sp.start_time = t.start_time
group by t.hour
order by 2 desc
limit 5

 * postgresql://dwhuser:***@dwhcluster.cuvu38ujek21.us-west-2.redshift.amazonaws.com:5439/dwh
5 rows affected.


hour,stream_count
16,542
18,498
17,494
15,477
14,432


## Top 5 busiest states

In [9]:
%%sql
select
SPLIT_PART(sp.location, ',', 2) as state,
count(sp.start_time) as stream_count,
sum(sp.stream_duration) as stream_duration
from songplays sp
group by
SPLIT_PART(sp.location, ',', 2)
order by 2 desc, 3 desc
limit 5

 * postgresql://dwhuser:***@dwhcluster.cuvu38ujek21.us-west-2.redshift.amazonaws.com:5439/dwh
5 rows affected.


state,stream_count,stream_duration
CA,1572,388865
ME,665,165235
MI,636,155840
IL-IN-WI,475,121210
GA,456,110499


## Most played artist by state

In [10]:
%%sql 
with cte_artists_by_state as
    (
        select
            SPLIT_PART(sp.location, ',', 2) as state,
            sp.artist_name,
            row_number()
                over (partition by SPLIT_PART(sp.location, ',', 2)
                    order by
                        count(sp.start_time) desc,
                        sum(sp.stream_duration) desc
                    ) as rank,
        count(sp.start_time) as stream_count,
        sum(sp.stream_duration) as stream_duration
        from songplays sp
        group by SPLIT_PART(sp.location, ',', 2),
        sp.artist_name
    )
select
state, artist_name, stream_count, stream_duration
from cte_artists_by_state
where rank = 1
order by stream_count desc

 * postgresql://dwhuser:***@dwhcluster.cuvu38ujek21.us-west-2.redshift.amazonaws.com:5439/dwh
36 rows affected.


state,artist_name,stream_count,stream_duration
CA,Muse,13,3208
MI,Coldplay,10,2733
GA,Kings Of Leon,9,2004
ME,Taylor Swift,7,1613
AZ,BjÃÂ¶rk,5,1652
IL-IN-WI,Alliance Ethnik,5,1229
NC,Florence + The Machine,5,1122
WI-MI,The Black Keys,5,1130
IA,Kings Of Leon,4,788
AL,Radiohead,4,1111
