# Example analytic queries

## Import libraries

In [1]:
import configparser
import psycopg2
from prettytable import PrettyTable

## Connect to Redshift database

In [2]:
#Make connection to redshift database
config = configparser.ConfigParser()
config.read('dwh.cfg')
 #create the connection to redshift database using the config files
conn = psycopg2.connect("host={} dbname={} user={} password={} port={}".format(*config['CLUSTER'].values()))
conn.autocommit = True
cur = conn.cursor()

## Schema
![database schema](../images/project_dw_star_schema.png)

## Initial tests
Let's verify data was correctly loaded.

In [3]:
cur.execute(f"SELECT DISTINCT tablename FROM pg_table_def WHERE schemaname = 'public';")
cur.fetchall()

[('artists',),
 ('songplays',),
 ('songs',),
 ('staging_events',),
 ('staging_songs',),
 ('time',),
 ('users',)]

In [4]:
tables = ["staging_events", "staging_songs", "songplays", "users", "songs", "artists", "time"]

for table in tables:
    query = f"SELECT COUNT(*) FROM {table};"
    cur.execute(query)
    print(f"\nNumber of rows table {table}: {cur.fetchone()[0]}")


Number of rows table staging_events: 8056

Number of rows table staging_songs: 14896

Number of rows table songplays: 333

Number of rows table users: 104

Number of rows table songs: 14896

Number of rows table artists: 10025

Number of rows table time: 8023


## Analytic queries
### 1. Most played songs

In [5]:
query = """
SELECT s.title AS song, a.name AS artist, s.year, COUNT(sp.song_id) AS num_plays
FROM songplays AS sp, songs AS s, artists as a
WHERE sp.song_id = s.song_id
AND sp.artist_id = a.artist_id
GROUP BY song, artist, s.year
ORDER BY num_plays desc
LIMIT 5;
"""

try:
    cur.execute(query)
except psycopg2.Error as e:
    print("Error while executing query")
    print(e)

table = PrettyTable()
table.field_names = ["song", "artist", "year", "num_plays"]

row = cur.fetchone()
while row:
    table.add_row([row[0], row[1], row[2], row[3]])
    row = cur.fetchone()
print(table)

+------------------------------------------------------+---------------+------+-----------+
|                         song                         |     artist    | year | num_plays |
+------------------------------------------------------+---------------+------+-----------+
|                    You're The One                    | Dwight Yoakam | 1990 |     37    |
|                 I CAN'T GET STARTED                  |   Ron Carter  |  0   |     9     |
| Catch You Baby (Steve Pitron & Max Sanna Radio Edit) | Lonnie Gordon |  0   |     9     |
|  Nothin' On You [feat. Bruno Mars] (Album Version)   |     B.o.B     | 2010 |     8     |
|               Hey Daddy (Daddy's Home)               |     Usher     | 2010 |     6     |
+------------------------------------------------------+---------------+------+-----------+


### 2. Most played artists from 2000's decade

In [6]:
query = """
SELECT a.name AS artist, s.year, COUNT(sp.song_id) AS num_plays
FROM songplays AS sp, songs AS s, artists AS a
WHERE sp.song_id = s.song_id
AND sp.artist_id = a.artist_id
AND s.year BETWEEN 2000 AND 2009
GROUP BY artist, s.year
ORDER BY num_plays DESC
LIMIT 5;
"""

try:
    cur.execute(query)
except psycopg2.Error as e:
    print("Error while executing query")
    print(e)

table = PrettyTable()
table.field_names = ["artist", "year", "num_plays"]

row = cur.fetchone()
while row:
    table.add_row([row[0], row[1], row[2]])
    row = cur.fetchone()
print(table)

+------------------------------------------------+------+-----------+
|                     artist                     | year | num_plays |
+------------------------------------------------+------+-----------+
|                    Kid Cudi                    | 2009 |     10    |
|         Kid Cudi / Kanye West / Common         | 2009 |     10    |
|                 Arctic Monkeys                 | 2004 |     5     |
| Richard Hawley And Death Ramps_ Arctic Monkeys | 2004 |     5     |
|                matchbox twenty                 | 2003 |     4     |
+------------------------------------------------+------+-----------+


### 3. Most active users

In [7]:
query = """
SELECT sp.user_id, CONCAT(u.first_name, CONCAT(' ', u.last_name)) AS user, COUNT(sp.song_id) AS num_plays
FROM songplays sp, users u
WHERE sp.user_id = u.user_id
GROUP BY sp.user_id, u.first_name, u.last_name
ORDER BY num_plays DESC
LIMIT 10;
"""

try:
    cur.execute(query)
except psycopg2.Error as e:
    print("Error while executing query")
    print(e)

table = PrettyTable()
table.field_names = ["user_id", "user", "num_plays"]

row = cur.fetchone()
while row:
    table.add_row([row[0], row[1], row[2]])
    row = cur.fetchone()
print(table)

+---------+--------------------+-----------+
| user_id |        user        | num_plays |
+---------+--------------------+-----------+
|    49   |    Chloe Cuevas    |     42    |
|    97   |    Kate Harrell    |     32    |
|    80   |    Tegan Levine    |     31    |
|    44   |    Aleena Kirby    |     21    |
|    73   |    Jacob Klein     |     18    |
|    88   | Mohammad Rodriguez |     17    |
|    15   |     Lily Koch      |     15    |
|    36   |   Matthew Jones    |     13    |
|    24   |   Layla Griffin    |     13    |
|    29   |  Jacqueline Lynch  |     13    |
+---------+--------------------+-----------+


### 4. Activity by day of the week and level

In [8]:
query = """
SELECT t.weekday, sp.level, COUNT(sp.song_id) AS num_plays
FROM songplays sp, time t
WHERE sp.start_time = t.start_time
GROUP BY t.weekday, sp.level
ORDER BY sp.level,
     (CASE
          WHEN t.weekday = 'MONDAY' THEN 1
          WHEN t.weekday = 'TUESDAY' THEN 2
          WHEN t.weekday = 'WEDNESDAY' THEN 3
          WHEN t.weekday = 'THURSDAY' THEN 4
          WHEN t.weekday = 'FRIDAY' THEN 5
          WHEN t.weekday = 'SATURDAY' THEN 6
          WHEN t.weekday = 'SUNDAY' THEN 7
     END);
"""

try:
    cur.execute(query)
except psycopg2.Error as e:
    print("Error while executing query")
    print(e)

table = PrettyTable()
table.field_names = ["weekday", "level", "num_plays"]

row = cur.fetchone()
while row:
    table.add_row([row[0], row[1], row[2]])
    row = cur.fetchone()
print(table)

+-----------+-------+-----------+
|  weekday  | level | num_plays |
+-----------+-------+-----------+
| MONDAY    |  free |     12    |
| TUESDAY   |  free |     9     |
| WEDNESDAY |  free |     8     |
| THURSDAY  |  free |     10    |
| FRIDAY    |  free |     15    |
| SATURDAY  |  free |     4     |
| SUNDAY    |  free |     4     |
| MONDAY    |  paid |     50    |
| TUESDAY   |  paid |     36    |
| WEDNESDAY |  paid |     53    |
| THURSDAY  |  paid |     55    |
| FRIDAY    |  paid |     38    |
| SATURDAY  |  paid |     27    |
| SUNDAY    |  paid |     12    |
+-----------+-------+-----------+


### 5. Daily evolution of the activity

In [9]:
query = """
SELECT MIN(start_time) AS date_min, MAX(start_time) AS date_max
FROM songplays;
"""

try:
    cur.execute(query)
except psycopg2.Error as e:
    print("Error while executing query")
    print(e)

table = PrettyTable()
table.field_names = ["date_min", "date_max"]

row = cur.fetchone()
while row:
    table.add_row([row[0], row[1]])
    row = cur.fetchone()
print(table)

+---------------------+---------------------+
|       date_min      |       date_max      |
+---------------------+---------------------+
| 2018-11-01 21:11:13 | 2018-11-30 17:31:24 |
+---------------------+---------------------+


In [10]:
query = """
SELECT TO_CHAR(start_time, 'YYYY/MM/DD') AS date, COUNT(song_id) AS num_plays
FROM songplays
GROUP BY date
ORDER BY date;
"""

try:
    cur.execute(query)
except psycopg2.Error as e:
    print("Error while executing query")
    print(e)

table = PrettyTable()
table.field_names = ["date", "num_plays"]

row = cur.fetchone()
while row:
    table.add_row([row[0], row[1]])
    row = cur.fetchone()
print(table)

+------------+-----------+
|    date    | num_plays |
+------------+-----------+
| 2018/11/01 |     1     |
| 2018/11/02 |     4     |
| 2018/11/03 |     5     |
| 2018/11/04 |     8     |
| 2018/11/05 |     28    |
| 2018/11/06 |     6     |
| 2018/11/07 |     9     |
| 2018/11/08 |     8     |
| 2018/11/09 |     14    |
| 2018/11/10 |     5     |
| 2018/11/11 |     3     |
| 2018/11/12 |     6     |
| 2018/11/13 |     18    |
| 2018/11/14 |     15    |
| 2018/11/15 |     27    |
| 2018/11/16 |     8     |
| 2018/11/17 |     7     |
| 2018/11/18 |     3     |
| 2018/11/19 |     10    |
| 2018/11/20 |     11    |
| 2018/11/21 |     20    |
| 2018/11/22 |     6     |
| 2018/11/23 |     11    |
| 2018/11/24 |     14    |
| 2018/11/25 |     2     |
| 2018/11/26 |     18    |
| 2018/11/27 |     10    |
| 2018/11/28 |     17    |
| 2018/11/29 |     23    |
| 2018/11/30 |     16    |
+------------+-----------+
