In [19]:
import configparser
import psycopg2
import time

In [2]:
config = configparser.ConfigParser()
config.read('dwh.cfg')

conn = psycopg2.connect("host={} dbname={} user={} password={} port={}".format(*config['CLUSTER'].values()))
cur = conn.cursor()

In [34]:
def run_query(query):
    time_start = time.time()
    cur.execute(query)
    conn.commit()
    rows = cur.fetchall()
    print("Query:", end = " ")
    print(query)
    print("Result:", end = " ")
    for row in rows:
        print(row)
    print("Execution time:", time.time() - time_start, '\n')

## Schema

In [37]:
print("Overview of Tables:")
run_query('select count(*) from staging_events')
run_query('select count(*) from staging_songs')
run_query('select count(*) from songplays')
run_query('select count(*) from songs')
run_query('select count(*) from users')
run_query('select count(*) from artists')
run_query('select count(*) from time')

Overview of Tables:
Query: select count(*) from staging_events
Result: (8056,)
Execution time: 0.8502557277679443 

Query: select count(*) from staging_songs
Result: (14896,)
Execution time: 0.9737410545349121 

Query: select count(*) from songplays
Result: (333,)
Execution time: 0.921616792678833 

Query: select count(*) from songs
Result: (14896,)
Execution time: 0.920811653137207 

Query: select count(*) from users
Result: (104,)
Execution time: 0.8538589477539062 

Query: select count(*) from artists
Result: (10025,)
Execution time: 0.9892339706420898 

Query: select count(*) from time
Result: (8023,)
Execution time: 0.9215090274810791 



### How many songs were started by free vs paid users?

In [38]:
run_query('select count(songplay_id), users.level from songplays inner join users on songplays.user_id = users.user_id group by 2;')

Query: select count(songplay_id), users.level from songplays inner join users on songplays.user_id = users.user_id group by 2;
Result: (276, 'paid')
(201, 'free')
Execution time: 4.83555006980896 



### Which hour of the day are users starting to play the most songs?

In [40]:
run_query('select hour, count(songplay_id) from songplays inner join time on songplays.start_time = time.start_time group by hour order by 1 desc;')

Query: select hour, count(songplay_id) from songplays inner join time on songplays.start_time = time.start_time group by hour order by 1 desc;
Result: (23, 11)
(22, 7)
(21, 12)
(20, 18)
(19, 16)
(18, 26)
(17, 40)
(16, 24)
(15, 25)
(14, 16)
(13, 14)
(12, 12)
(11, 16)
(10, 11)
(9, 9)
(8, 18)
(7, 13)
(6, 9)
(5, 7)
(4, 7)
(3, 2)
(2, 3)
(1, 11)
(0, 6)
Execution time: 1.116760015487671 



### Which 30 users have started listening to the most songs?

In [41]:
run_query('select user_id, count(songplay_id) from songplays group by user_id order by 2 desc limit 30;')

Query: select user_id, count(songplay_id) from songplays group by user_id order by 2 desc limit 30;
Result: (49, 42)
(97, 32)
(80, 31)
(44, 21)
(73, 18)
(88, 17)
(15, 15)
(29, 13)
(24, 13)
(36, 13)
(25, 10)
(58, 9)
(95, 8)
(30, 8)
(85, 7)
(16, 6)
(42, 6)
(50, 5)
(8, 3)
(82, 3)
(12, 2)
(2, 2)
(100, 2)
(67, 2)
(66, 2)
(52, 2)
(86, 2)
(72, 2)
(101, 2)
(53, 2)
Execution time: 4.629016876220703 



### What are the most popular operating systems among users who have played songs?

In [43]:
run_query("""
select 
sum(case when user_agent like '%Windows%' then 1 else 0 end) as windows_sum, 
sum(case when user_agent like '%Linux%' then 1 else 0 end) as linux_sum, 
sum(case when user_agent like '%Mac%' then 1 else 0 end) as mac_sum, 
sum(case when user_agent like '%iPhone%' then 1 else 0 end) as iphone_sum, 
sum(case when user_agent like '%Android%' then 1 else 0 end) as anrdoid_sum 
from songplays;
""")

Query: 
select 
sum(case when user_agent like '%Windows%' then 1 else 0 end) as windows_sum, 
sum(case when user_agent like '%Linux%' then 1 else 0 end) as linux_sum, 
sum(case when user_agent like '%Mac%' then 1 else 0 end) as mac_sum, 
sum(case when user_agent like '%iPhone%' then 1 else 0 end) as iphone_sum, 
sum(case when user_agent like '%Android%' then 1 else 0 end) as anrdoid_sum 
from songplays;

Result: (132, 50, 151, 10, 0)
Execution time: 1.096177101135254 

