# Tests & Queries

## Init

In [2]:
import configparser
import pandas as pd
%load_ext sql

config = configparser.ConfigParser()
config.read_file(open('dwh.cfg'))
HOST              = config.get('CLUSTER','HOST')
DB_NAME           = config.get('CLUSTER','DB_NAME')
DB_USER           = config.get('CLUSTER','DB_USER')
DB_PASSWORD       = config.get('CLUSTER','DB_PASSWORD')
DB_PORT           = config.get('CLUSTER','DB_PORT')

DB_NAME           = config.get('CLUSTER','DB_NAME')

## Connect to Redshift cluster

In [3]:
conn_string="postgresql://{}:{}@{}:{}/{}".format(DB_USER, DB_PASSWORD, HOST, DB_PORT, DB_NAME)
%sql $conn_string

'Connected: dwhuser@dwh'

## Testing Artists Deduplication and Best Value picking

In [33]:
%%sql 

select distinct
        artist_id,
        artist_name as name,    
        artist_latitude as latitude,
        artist_longitude as longitude,
        artist_location as location
        from staging_songs
 where name in
    (
    'Bill & Gloria Gaither')
    order by name
    

 * postgresql://dwhuser:***@dwhcluster.cmfoxim90hks.us-west-2.redshift.amazonaws.com:5439/dwh
7 rows affected.


artist_id,name,latitude,longitude,location
ARK6JC91187B9B4B2D,Bill & Gloria Gaither,,,United States
ARQOTHF12454A52B60,Bill & Gloria Gaither,,,
ARDW74M1187B98DA34,Bill & Gloria Gaither,,,
ARKC83D1187B9AB367,Bill & Gloria Gaither,,,
ARMM25Z1187FB5A8C1,Bill & Gloria Gaither,29.42449,-98.49462,"San Antonio, TX"
ARBB58Y1187B9B621B,Bill & Gloria Gaither,,,
ARDFZE61187FB45994,Bill & Gloria Gaither,35.83073,-85.97874,Tennessee


In [36]:
%%sql 
select * from artist_names
where name in('Bill & Gloria Gaither')    

 * postgresql://dwhuser:***@dwhcluster.cmfoxim90hks.us-west-2.redshift.amazonaws.com:5439/dwh
1 rows affected.


name,artist_id,latitude,longitude,location
Bill & Gloria Gaither,ARQOTHF12454A52B60,35.83073,-85.97874,Tennessee


## TOP 5 Users with more stream events

In [37]:
%%sql 
    select u.first_name || ' ' || u.last_name as full_name, count(sp.start_time) as stream_count
    from songplays sp
    left join users u on sp.user_id = u.user_id
    group by u.first_name || ' ' || u.last_name
    order by 2 desc
    limit 5;

 * postgresql://dwhuser:***@dwhcluster.cmfoxim90hks.us-west-2.redshift.amazonaws.com:5439/dwh
5 rows affected.


full_name,stream_count
Chloe Cuevas,689
Tegan Levine,665
Kate Harrell,557
Lily Koch,463
Aleena Kirby,397


## Busiests hours at the platform

In [38]:
%%sql 
select t.hour, count(sp.start_time) as stream_count
from songplays sp
left join time t on sp.start_time_key = t.time_key
group by t.hour
order by 2 desc
limit 5

 * postgresql://dwhuser:***@dwhcluster.cmfoxim90hks.us-west-2.redshift.amazonaws.com:5439/dwh
5 rows affected.


hour,stream_count
16,542
18,498
17,494
15,477
14,432


## Top 5 busiest states

In [39]:
%%sql
select
SPLIT_PART(sp.location, ',', 2) as state,
count(sp.start_time) as stream_count,
sum(sp.stream_duration) as stream_duration
from songplays sp
group by
SPLIT_PART(sp.location, ',', 2)
order by 2 desc, 3 desc
limit 5

 * postgresql://dwhuser:***@dwhcluster.cmfoxim90hks.us-west-2.redshift.amazonaws.com:5439/dwh
5 rows affected.


state,stream_count,stream_duration
CA,1572,389678.3915
ME,665,165578.0111
MI,636,156168.1464
IL-IN-WI,475,121454.025
GA,456,110724.7873


## Most played artist by state

In [40]:
%%sql 
with cte_artists_by_state as
    (
        select
            SPLIT_PART(sp.location, ',', 2) as state,
            sp.artist_name,
            row_number()
                over (partition by SPLIT_PART(sp.location, ',', 2)
                    order by
                        count(sp.start_time) desc,
                        sum(sp.stream_duration) desc
                    ) as rank,
        count(sp.start_time) as stream_count,
        sum(sp.stream_duration) as stream_duration
        from songplays sp
        group by SPLIT_PART(sp.location, ',', 2),
        sp.artist_name
    )
select
state, artist_name, stream_count, stream_duration
from cte_artists_by_state
where rank = 1
order by stream_count desc

 * postgresql://dwhuser:***@dwhcluster.cmfoxim90hks.us-west-2.redshift.amazonaws.com:5439/dwh
36 rows affected.


state,artist_name,stream_count,stream_duration
CA,Muse,13,3215.8497
MI,Coldplay,10,2737.3141
GA,Kings Of Leon,9,2009.2557
ME,Taylor Swift,7,1615.5134
AZ,BjÃÂ¶rk,5,1654.5675
IL-IN-WI,Alliance Ethnik,5,1231.906
NC,Florence + The Machine,5,1125.0914
WI-MI,The Black Keys,5,1132.8498
IA,Kings Of Leon,4,790.6722
AL,Radiohead,4,1113.7808
