In [1]:
%load_ext sql

In [1]:
import configparser

config = configparser.ConfigParser()
config.read_file(open('dwh.cfg'))

# amazon aws
KEY = config.get('AWS', 'key')
SECRET = config.get('AWS', 'secret')

# Redshift
DWH_DB = config.get('DWH', 'DWH_DB')
DWH_DB_USER = config.get('DWH', 'DWH_DB_USER')
DWH_DB_PASSWORD = config.get('DWH', 'DWH_DB_PASSWORD')
DWH_PORT = config.get('DWH', 'DWH_PORT')
DWH_CLUSTER_TYPE = config.get('DWH', 'DWH_CLUSTER_TYPE')
DWH_NUM_NODES = config.get('DWH', 'DWH_NUM_NODES')
DWH_NODE_TYPE = config.get('DWH', 'DWH_NODE_TYPE')
DWH_IAM_ROLE_NAME = config.get('DWH', 'DWH_IAM_ROLE_NAME')
DWH_CLUSTER_IDENTIFIER = config.get('DWH', 'DWH_CLUSTER_IDENTIFIER')
DWH_SCHEMA = config.get('DWH', 'DWH_SCHEMA')
DWH_LOG_STAGING_TABLE = config.get('DWH', 'DWH_LOG_STAGING_TABLE')
DWH_SONG_STAGING_TABLE = config.get('DWH', 'DWH_SONG_STAGING_TABLE')
DWH_REGION = config.get('DWH', 'DWH_REGION')

# s3
LOG_JSON_FORMAT = config.get('S3', 'LOG_JSON_FORMAT')
S3_BUCKET_LOG_JSON_PATH = config.get('S3', 'S3_BUCKET_LOG_JSON_PATH')
S3_BUCKET_SONG_JSON_PATH = config.get('S3', 'S3_BUCKET_SONG_JSON_PATH')

In [4]:
import boto3
from config import *
import json
from botocore.exceptions import ClientError
import utils
from smart_open import open

def create_iam_role():
    iam = boto3.client('iam',
                       aws_access_key_id=KEY,
                       aws_secret_access_key=SECRET,
                       region_name='us-west-2'
                       )
    print("1.1 creating role")
    try:
        iam.create_role(
            Path='/',
            RoleName=DWH_IAM_ROLE_NAME,
            Description="Allows Redshift to call AWS Services.",
            AssumeRolePolicyDocument=json.dumps(
                {'Statement': [{'Action': 'sts:AssumeRole',
                  'Effect': 'Allow',
                  'Principal': {'Service': 'redshift.amazonaws.com'}}],
                 'Version': '2012-10-17'})
            )

    except ClientError as e:
        print(f'ERROR: {e}')

    print("1.2 Attaching Policy")
    try:
        iam.attach_role_policy(
            RoleName=DWH_IAM_ROLE_NAME,
            PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess")\
                        ['ResponseMetadata']['HTTPStatusCode']
    except ClientError as e:
        print(f'ERROR: {e}')

    print("1.3 Get the IAM role ARN")
    roleArn = iam.get_role(RoleName=DWH_IAM_ROLE_NAME)['Role']['Arn']
    return roleArn


def create_redshift_cluster(roleArn):
    print("1.1 Client is created ...")
    redshift = boto3.client('redshift',
                            region_name="us-west-2",
                            aws_access_key_id=KEY,
                            aws_secret_access_key=SECRET
                            )
    try:
        print("1.2 Cluster config is being created ...")
        redshift.create_cluster(
            # HW
            ClusterType=DWH_CLUSTER_TYPE,
            NodeType=DWH_NODE_TYPE,
            NumberOfNodes=int(DWH_NUM_NODES),

            # Identifiers & Credentials
            DBName=DWH_DB,
            ClusterIdentifier=DWH_CLUSTER_IDENTIFIER,
            MasterUsername=DWH_DB_USER,
            MasterUserPassword=DWH_DB_PASSWORD,

            # Roles (for s3 access)
            IamRoles=[roleArn])
    except ClientError as e:
        print(f'ERROR: {e}')

    print("1.3 Cluster is being created ...")
    while redshift.describe_clusters(
            ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)\
            ['Clusters'][0]['ClusterStatus'] != 'available':
        utils.animate()

    print("\r1.4 Cluster is created successfully ...")
    return redshift.describe_clusters(
        ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)\
    ['Clusters'][0]['Endpoint']['Address']


def delete_redshift_cluster():
    print("1.1 Client is created ...")
    redshift = boto3.client('redshift',
                            region_name="us-west-2",
                            aws_access_key_id=KEY,
                            aws_secret_access_key=SECRET
                            )
    print("1.2 Cluster is identified ...")
    try:
        redshift.delete_cluster(
            ClusterIdentifier=DWH_CLUSTER_IDENTIFIER,
            SkipFinalClusterSnapshot=True)
    except ClientError as e:
        print(f'ERROR: {e}')

    try:
        print("1.3 Cluster is being deleted ...")
        while redshift.describe_clusters(
                ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)\
                ['Clusters'][0]['ClusterStatus'] == 'deleting':
            utils.animate()
    except:
        print("\r1.4 Cluster is deleted successfully ...")
    return None

In [5]:
roleArn = create_iam_role()
create_redshift_cluster(roleArn)

1.1 creating role
1.2 Attaching Policy
1.3 Get the IAM role ARN
1.1 Client is created ...
1.2 Cluster config is being created ...
1.3 Cluster is being created ...
1.4 Cluster is created successfully ...


'dwh-cluster.cgjrwscs7tjx.us-west-2.redshift.amazonaws.com'

In [8]:
from sql_queries import create_table_queries, drop_table_queries
from config import *
import psycopg2
import argparse


def create_database(cur, conn):
    """
    This function drops all the tables in the database
    :param cur:
    :param conn:
    :return:
    """
    cur.execute("CREATE SCHEMA IF NOT EXISTS {}".format(DWH_SCHEMA))
    conn.commit()
    cur.execute("SET search_path to {}".format(DWH_SCHEMA))
    conn.commit()
    return None


def drop_tables(cur, conn):
    """
    This function drops all the tables in the database
    :param cur:
    :param conn:
    :return:
    """
    cur.execute("SET search_path to {}".format(DWH_SCHEMA))
    conn.commit()
    for query in drop_table_queries:
        cur.execute(query)
        conn.commit()
    return None


def create_tables(cur, conn):
    """
    This function creates all the tables in the database
    :param cur:
    :param conn:
    :return:
    """
    cur.execute("SET search_path to {}".format(DWH_SCHEMA))
    conn.commit()
    for query in create_table_queries:
        cur.execute(query)
        conn.commit()
    return None

In [36]:
# DROP TABLES
# ----------------------------------------------------------------------
log_staging_table_drop = "DROP TABLE IF EXISTS log_staging CASCADE"
song_staging_table_drop = "DROP TABLE IF EXISTS song_staging CASCADE"
songplay_table_drop = "DROP TABLE IF EXISTS songplays CASCADE;"
user_table_drop = "DROP TABLE IF EXISTS users;"
song_table_drop = "DROP TABLE IF EXISTS songs;"
artist_table_drop = "DROP TABLE IF EXISTS artists;"
time_table_drop = "DROP TABLE IF EXISTS time;"

# CREATE TABLES
# ----------------------------------------------------------------------
log_staging_table_create = """
 CREATE TABLE IF NOT EXISTS log_staging (
    artist VARCHAR, 
    auth VARCHAR, 
    firstName VARCHAR, 
    gender VARCHAR, 
    iteminSession INTEGER, 
    lastName VARCHAR, 
    length NUMERIC, 
    level VARCHAR, 
    location VARCHAR, 
    method VARCHAR,
    page VARCHAR, 
    registration NUMERIC, 
    sessionId INTEGER, 
    song VARCHAR,
    status INTEGER,
    ts TIMESTAMP,
    userAgent VARCHAR,
    userId INTEGER);    
"""

song_staging_table_create = """
 CREATE TABLE IF NOT EXISTS song_staging (
    num_songs INTEGER, 
    artist_id VARCHAR, 
    artist_latitude NUMERIC, 
    artist_longitude NUMERIC, 
    artist_location VARCHAR, 
    artist_name VARCHAR, 
    song_id NUMERIC, 
    title VARCHAR, 
    duration NUMERIC, 
    year INTEGER);    
"""

# facts ----------------------------------------------------------------
songplay_table_create = """
 CREATE TABLE IF NOT EXISTS songplays (
    songplay_id INTEGER IDENTITY(0,1) PRIMARY KEY, 
    start_time TIMESTAMP NOT NULL REFERENCES time(start_time) sortkey, 
    user_id VARCHAR NOT NULL REFERENCES users(user_id), 
    level VARCHAR NOT NULL, 
    song_id VARCHAR NOT NULL REFERENCES songs(song_id) distkey, 
    artist_id VARCHAR NOT NULL REFERENCES artists(artist_id), 
    session_id INTEGER NOT NULL, 
    location VARCHAR NOT NULL, 
    user_agent VARCHAR NOT NULL);
"""

# dimensions -----------------------------------------------------------
user_table_create = """
 CREATE TABLE IF NOT EXISTS users (
    user_id VARCHAR(50) PRIMARY KEY sortkey, 
    first_name VARCHAR, 
    last_name VARCHAR, 
    gender VARCHAR, 
    level VARCHAR NOT NULL)
    diststyle ALL;
"""

song_table_create = """
 CREATE TABLE IF NOT EXISTS songs (
    song_id VARCHAR PRIMARY KEY distkey, 
    title VARCHAR NOT NULL, 
    artist_id VARCHAR NOT NULL, 
    year INTEGER NOT NULL,
    duration NUMERIC NOT NULL);
"""

artist_table_create = """
 CREATE TABLE IF NOT EXISTS artists (
    artist_id VARCHAR PRIMARY KEY sortkey, 
    name VARCHAR NOT NULL, 
    location VARCHAR NOT NULL, 
    latitude NUMERIC NOT NULL, 
    longitude NUMERIC NOT NULL)
    diststyle ALL;
"""

time_table_create = """
 CREATE TABLE IF NOT EXISTS time (
    start_time TIMESTAMP UNIQUE NOT NULL sortkey, 
    hour INTEGER NOT NULL, 
    day INTEGER NOT NULL, 
    week INTEGER NOT NULL, 
    month INTEGER NOT NULL, 
    year INTEGER NOT NULL, 
    week_day VARCHAR)
    diststyle ALL;
"""

# INSERT RECORDS
# ----------------------------------------------------------------------
songplay_table_insert = ("""
INSERT INTO songplays (start_time, user_id, level, song_id, artist_id,
 session_id, location, user_agent) 
 SELECT DISTINCT lgs.ts, 
                 lsg.userId, 
                 nvl(lgs.level, 'empty'), 
                 ssg.song_id, 
                 lsg.artistId,
                 lsg.sessionId, 
                 nvl(lgs.location, 'empty'), 
                 nvl(lgs.userAgent, 'empty')
 FROM log_staging lgs
 INNER JOIN song_staging ssg ON lgs.song = ssg.title
 WHERE lgs.page = 'NextSong';
""")

user_table_insert = ("""
INSERT INTO users (user_id, first_name, last_name, gender, level) 
  SELECT DISTINCT lgs.userId, 
                  nvl(lgs.firstName, 'empty'), 
                  nvl(lgs.lastName, 'empty'),  
                  nvl(lgs.gender, 'empty'),  
                  nvl(lgs.level, 'empty'), 
  FROM log_staging lgs
  WHERE lgs.userId IS NOT NULL;
""")

song_table_insert = ("""
INSERT INTO songs (song_id, title, artist_id, year, duration) 
 SELECT DISTINCT ssg.song_id, 
                 ssg.title, 
                 ssg.artist_id, 
                 ssg.year, 
                 nvl(ssg.duration, 0.0)
  FROM song_staging ssg
""")

artist_table_insert = ("""
INSERT INTO artists (artist_id, name, location, latitude, longitude) 
 SELECT DISTINCT ssg.artist_id, 
                 ssg.artist_name, 
                 nvl(ssg.artist_location, 'empty'), 
                 nvl(ssg.artist_latitude, 0.0), 
                 nvl(ssg.artist_longitude, 0.0)
 FROM song_staging ssg
 WHERE ssg.artist_id IS NOT NULL;
""")

time_table_insert = ("""
INSERT INTO time (start_time, hour, day, week, month, year, week_day)
 SELECT DISTINCT se.ts, 
                 DATE_PART(hour, se.ts) :: INTEGER, 
                 DATE_PART(day, se.ts) :: INTEGER, 
                 DATE_PART(week, se.ts) :: INTEGER,
                 DATE_PART(month, se.ts) :: INTEGER,
                 DATE_PART(year, se.ts) :: INTEGER,
                 DATE_PART(dow, se.ts) :: INTEGER
 FROM log_staging lsg
 WHERE lsg.page = 'NextSong';
""")

# FIND SONGS
# you'll need to get the song ID and artist ID by querying the songs
# and artists tables to find matches based on song title, artist name,
# and song duration time
song_select = ("""
SELECT s.song_id, s.artist_id FROM songs s
 JOIN artists a ON s.artist_id=a.artist_id
 WHERE s.title = %s AND a.name=%s AND s.duration=%s;
""")

# QUERY LISTS

create_table_queries = [log_staging_table_create,
                        song_staging_table_create,
                        user_table_create,
                        song_table_create,
                        artist_table_create,
                        time_table_create,
                        songplay_table_create]
insert_table_queries = [user_table_insert,
                        song_table_insert,
                        artist_table_insert,
                        time_table_insert,
                        songplay_table_insert]
drop_table_queries = [log_staging_table_drop,
                      song_staging_table_drop,
                      songplay_table_drop,
                      user_table_drop,
                      song_table_drop,
                      artist_table_drop,
                      time_table_drop]



In [50]:

DWH_ENDPOINT = 'dwh-cluster.cgjrwscs7tjx.us-west-2.redshift.amazonaws.com'
# create postgres connection
conn_string = "postgresql://{}:{}@{}:{}/{}".format(
                DWH_DB_USER,
                DWH_DB_PASSWORD,
                DWH_ENDPOINT,
                DWH_PORT,
                DWH_DB
)
conn = psycopg2.connect(conn_string)
cur = conn.cursor()
create_database(cur, conn)
drop_tables(cur, conn)
create_tables(cur, conn)

In [10]:
%load_ext sql

In [38]:
conn_string="postgresql://{}:{}@{}:{}/{}".format(DWH_DB_USER, DWH_DB_PASSWORD, DWH_ENDPOINT, DWH_PORT, DWH_DB)
print(conn_string)
%sql $conn_string

postgresql://dwhuser:Passw0rd@dwh-cluster.cgjrwscs7tjx.us-west-2.redshift.amazonaws.com:5439/dwh


'Connected: dwhuser@dwh'

In [51]:
%%sql
select t.table_name
from information_schema.tables t
where t.table_schema = 'sparkify'
order by t.table_name;

 * postgresql://dwhuser:***@dwh-cluster.cgjrwscs7tjx.us-west-2.redshift.amazonaws.com:5439/dwh
7 rows affected.


table_name
artists
log_staging
song_staging
songplays
songs
time
users


In [14]:
IAM_ROLE = roleArn

In [15]:
roleArn

'arn:aws:iam::764499268961:role/dwh-role'

In [None]:
%%sql 
copy sparkify.log_staging
from 's3://udacity-dend/log_data' 
credentials 'aws_iam_role=arn:aws:iam::764499268961:role/dwh-role'
emptyasnull
blanksasnull
format as json 's3://udacity-dend/log_json_path.json'
region 'us-west-2'
timeformat 'auto';

In [None]:
%%sql 
copy sparkify.log_staging
from 's3://udacity-dend/log_data/2018' 
credentials 'aws_iam_role=arn:aws:iam::764499268961:role/dwh-role'
emptyasnull
blanksasnull
json 'auto'
timeformat 'auto';

In [None]:
DWH_LOG_STAGING_TABLE = config.get('DWH', 'DWH_LOG_STAGING_TABLE')
DWH_SONG_STAGING_TABLE = config.get('DWH', 'DWH_SONG_STAGING_TABLE')
DWH_REGION = config.get('DWH', 'DWH_REGION')

# s3
LOG_JSON_FORMAT = config.get('S3', 'LOG_JSON_FORMAT')
S3_BUCKET_LOG_JSON_PATH = config.get('S3', 'S3_BUCKET_LOG_JSON_PATH')
S3_BUCKET_SONG_JSON_PATH = config.get('S3', 'S3_BUCKET_SONG_JSON_PATH')

In [55]:
cur.execute("""COPY {}.{} FROM '{}' iam_role '{}' region 'us-west-2' FORMAT AS JSON '{}' timeformat 'epochmillisecs'""".format(DWH_SCHEMA, DWH_LOG_STAGING_TABLE, S3_BUCKET_LOG_JSON_PATH, roleArn, LOG_JSON_FORMAT));
conn.commit()

In [58]:
%%sql
SELECT * FROM stl_load_errors ORDER BY starttime DESC

 * postgresql://dwhuser:***@dwh-cluster.cgjrwscs7tjx.us-west-2.redshift.amazonaws.com:5439/dwh
52 rows affected.


userid,slice,tbl,starttime,session,query,filename,line_number,colname,type,col_length,position,raw_line,raw_field_value,err_code,err_reason
100,1,100765,2019-06-08 13:40:39.412139,20558,740,s3://udacity-dend/song_data/A/A/A/TRAAACN128F9355673.json,1,song_id,numeric,"18, 0",0,"{""artist_id"":""AR9Q9YC1187FB5609B"",""artist_latitude"":null,""artist_location"":""New Jersey"",""artist_longitude"":null,""artist_name"":""Quest_ Pup_ Kevo"",""duration"":252.94322,""num_songs"":1,""song_id"":""SOFRDWL12A58A7CEF7"",""title"":""Hit Da Scene"",""year"":0}",,1207,"Invalid exponent, Value 'F', Pos 16, Type: Decimal"
100,6,100765,2019-06-08 13:40:39.412139,20558,740,s3://udacity-dend/song_data/A/A/A/TRAAABD128F429CF47.json,1,song_id,numeric,"18, 0",0,"{""artist_id"":""ARMJAGH1187FB546F3"",""artist_latitude"":35.14968,""artist_location"":""Memphis, TN"",""artist_longitude"":-90.04892,""artist_name"":""The Box Tops"",""duration"":148.03546,""num_songs"":1,""song_id"":""SOCIWDW12A8C13D406"",""title"":""Soul Deep"",""year"":1969}",,1207,"Invalid digit, Value 'S', Pos 0, Type: Decimal"
100,2,100765,2019-06-08 13:40:39.412139,20558,740,s3://udacity-dend/song_data/A/A/A/TRAAAAK128F9318786.json,1,song_id,numeric,"18, 0",0,"{""artist_id"":""ARJNIUY12298900C91"",""artist_latitude"":null,""artist_location"":"""",""artist_longitude"":null,""artist_name"":""Adelitas Way"",""duration"":213.9424,""num_songs"":1,""song_id"":""SOBLFFE12AF72AA5BA"",""title"":""Scream"",""year"":2009}",,1207,"Invalid exponent, Value 'E', Pos 7, Type: Decimal"
100,5,100765,2019-06-08 13:40:39.412139,20558,740,s3://udacity-dend/song_data/A/A/A/TRAAAED128E0783FAB.json,1,song_id,numeric,"18, 0",0,"{""artist_id"":""ARC1IHZ1187FB4E920"",""artist_latitude"":null,""artist_location"":"""",""artist_longitude"":null,""artist_name"":""Jamie Cullum"",""duration"":246.9873,""num_songs"":1,""song_id"":""SOXZYWX12A6310ED0C"",""title"":""It's About Time"",""year"":0}",,1207,"Invalid exponent, Value 'D', Pos 15, Type: Decimal"
100,2,100702,2019-06-08 13:36:58.307061,20382,680,s3://udacity-dend/log_data/2018/11/2018-11-02-events.json,1,useragent,varchar,100,0,"{""artist"":""N.E.R.D. FEATURING MALICE"",""auth"":""Logged In"",""firstName"":""Jayden"",""gender"":""M"",""itemInSession"":0,""lastName"":""Fox"",""length"":288.9922,""level"":""free"",""location"":""New Orleans-Metairie, LA"",""method"":""PUT"",""page"":""NextSong"",""registration"":1541033612796.0,""sessionId"":184,""song"":""Am I High (Feat. Malice)"",""status"":200,""ts"":1541121934796,""userAgent"":""\\""Mozilla\\/5.0 (Windows NT 6.3; WOW64) AppleWebKit\\/537.36 (KHTML, like Gecko) Chrome\\/36.0.1985.143 Safari\\/537.36\\"""",""userId"":""101""} {""artist"":null,""auth"":""Logged In"",""firstName"":""Stefany"",""gender"":""F"",""itemInSession"":0,""lastName"":""White"",""length"":null,""level"":""free"",""location"":""Lubbock, TX"",""method"":""GET"",""page"":""Home"",""registration"":1540708070796.0,""sessionId"":82,""song"":null,""status"":200,""ts"":1541122176796,""userAgent"":""\\""Mozilla\\/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit\\/537.36 (KHTML, like Gecko) Chrome\\/36.0.1985.143 Safari\\/537.36\\"""",""userId"":""83""} {""artist"":""Death Cab for Cutie"",""auth"":""Logged In"",""firstName"":""Stefany"",""gender"":""F",,1204,String length exceeds DDL length
100,4,100702,2019-06-08 13:36:58.307061,20382,680,s3://udacity-dend/log_data/2018/11/2018-11-03-events.json,2,useragent,varchar,100,0,"{""artist"":null,""auth"":""Logged In"",""firstName"":""Celeste"",""gender"":""F"",""itemInSession"":1,""lastName"":""Williams"",""length"":null,""level"":""free"",""location"":""Klamath Falls, OR"",""method"":""GET"",""page"":""Home"",""registration"":1541077528796.0,""sessionId"":52,""song"":null,""status"":200,""ts"":1541207123796,""userAgent"":""\\""Mozilla\\/5.0 (Windows NT 6.1; WOW64) AppleWebKit\\/537.36 (KHTML, like Gecko) Chrome\\/37.0.2062.103 Safari\\/537.36\\"""",""userId"":""53""} {""artist"":""Mynt"",""auth"":""Logged In"",""firstName"":""Celeste"",""gender"":""F"",""itemInSession"":2,""lastName"":""Williams"",""length"":166.94812,""level"":""free"",""location"":""Klamath Falls, OR"",""method"":""PUT"",""page"":""NextSong"",""registration"":1541077528796.0,""sessionId"":52,""song"":""Playa Haters"",""status"":200,""ts"":1541207150796,""userAgent"":""\\""Mozilla\\/5.0 (Windows NT 6.1; WOW64) AppleWebKit\\/537.36 (KHTML, like Gecko) Chrome\\/37.0.2062.103 Safari\\/537.36\\"""",""userId"":""53""} {""artist"":""Taylor Swift"",""auth"":""Logged In"",""firstName"":""Celeste"",""gender"":""F"",""itemInSession"":3,""lastName"":""Williams"",""",,1204,String length exceeds DDL length
100,0,100702,2019-06-08 13:36:58.307061,20382,680,s3://udacity-dend/log_data/2018/11/2018-11-01-events.json,1,useragent,varchar,100,0,"{""artist"":null,""auth"":""Logged In"",""firstName"":""Walter"",""gender"":""M"",""itemInSession"":0,""lastName"":""Frye"",""length"":null,""level"":""free"",""location"":""San Francisco-Oakland-Hayward, CA"",""method"":""GET"",""page"":""Home"",""registration"":1540919166796.0,""sessionId"":38,""song"":null,""status"":200,""ts"":1541105830796,""userAgent"":""\\""Mozilla\\/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit\\/537.36 (KHTML, like Gecko) Chrome\\/36.0.1985.143 Safari\\/537.36\\"""",""userId"":""39""} {""artist"":null,""auth"":""Logged In"",""firstName"":""Kaylee"",""gender"":""F"",""itemInSession"":0,""lastName"":""Summers"",""length"":null,""level"":""free"",""location"":""Phoenix-Mesa-Scottsdale, AZ"",""method"":""GET"",""page"":""Home"",""registration"":1540344794796.0,""sessionId"":139,""song"":null,""status"":200,""ts"":1541106106796,""userAgent"":""\\""Mozilla\\/5.0 (Windows NT 6.1; WOW64) AppleWebKit\\/537.36 (KHTML, like Gecko) Chrome\\/35.0.1916.153 Safari\\/537.36\\"""",""userId"":""8""} {""artist"":""Des'ree"",""auth"":""Logged In"",""firstName"":""Kaylee"",""gender"":""F"",""itemInSession"":1,""lastName"":""Summers"",",,1204,String length exceeds DDL length
100,6,100702,2019-06-08 13:36:58.307061,20382,680,s3://udacity-dend/log_data/2018/11/2018-11-04-events.json,3,useragent,varchar,100,0,"{""artist"":null,""auth"":""Logged In"",""firstName"":""Jahiem"",""gender"":""M"",""itemInSession"":0,""lastName"":""Miles"",""length"":null,""level"":""free"",""location"":""San Antonio-New Braunfels, TX"",""method"":""GET"",""page"":""Home"",""registration"":1540817347796.0,""sessionId"":42,""song"":null,""status"":200,""ts"":1541299033796,""userAgent"":""\\""Mozilla\\/5.0 (Windows NT 5.1) AppleWebKit\\/537.36 (KHTML, like Gecko) Chrome\\/36.0.1985.143 Safari\\/537.36\\"""",""userId"":""43""} {""artist"":""Gary Hobbs"",""auth"":""Logged In"",""firstName"":""Jahiem"",""gender"":""M"",""itemInSession"":1,""lastName"":""Miles"",""length"":245.52444,""level"":""free"",""location"":""San Antonio-New Braunfels, TX"",""method"":""PUT"",""page"":""NextSong"",""registration"":1540817347796.0,""sessionId"":42,""song"":""En Mi Mundo"",""status"":200,""ts"":1541300092796,""userAgent"":""\\""Mozilla\\/5.0 (Windows NT 5.1) AppleWebKit\\/537.36 (KHTML, like Gecko) Chrome\\/36.0.1985.143 Safari\\/537.36\\"""",""userId"":""43""} {""artist"":""Lifehouse"",""auth"":""Logged In"",""firstName"":""Jahiem"",""gender"":""M"",""itemInSession"":2,""lastName"":""Miles"",""",,1204,String length exceeds DDL length
100,0,100700,2019-06-08 13:35:46.868465,20338,672,s3://udacity-dend/log_data/2018/11/2018-11-01-events.json,1,useragent,varchar,100,0,"{""artist"":null,""auth"":""Logged In"",""firstName"":""Walter"",""gender"":""M"",""itemInSession"":0,""lastName"":""Frye"",""length"":null,""level"":""free"",""location"":""San Francisco-Oakland-Hayward, CA"",""method"":""GET"",""page"":""Home"",""registration"":1540919166796.0,""sessionId"":38,""song"":null,""status"":200,""ts"":1541105830796,""userAgent"":""\\""Mozilla\\/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit\\/537.36 (KHTML, like Gecko) Chrome\\/36.0.1985.143 Safari\\/537.36\\"""",""userId"":""39""} {""artist"":null,""auth"":""Logged In"",""firstName"":""Kaylee"",""gender"":""F"",""itemInSession"":0,""lastName"":""Summers"",""length"":null,""level"":""free"",""location"":""Phoenix-Mesa-Scottsdale, AZ"",""method"":""GET"",""page"":""Home"",""registration"":1540344794796.0,""sessionId"":139,""song"":null,""status"":200,""ts"":1541106106796,""userAgent"":""\\""Mozilla\\/5.0 (Windows NT 6.1; WOW64) AppleWebKit\\/537.36 (KHTML, like Gecko) Chrome\\/35.0.1916.153 Safari\\/537.36\\"""",""userId"":""8""} {""artist"":""Des'ree"",""auth"":""Logged In"",""firstName"":""Kaylee"",""gender"":""F"",""itemInSession"":1,""lastName"":""Summers"",",,1204,String length exceeds DDL length
100,7,100700,2019-06-08 13:35:46.868465,20338,672,s3://udacity-dend/log_data/2018/11/2018-11-08-events.json,1,useragent,varchar,100,0,"{""artist"":null,""auth"":""Logged In"",""firstName"":""Dominick"",""gender"":""M"",""itemInSession"":0,""lastName"":""Norris"",""length"":null,""level"":""free"",""location"":""Los Angeles-Long Beach-Anaheim, CA"",""method"":""GET"",""page"":""Home"",""registration"":1540975502796.0,""sessionId"":44,""song"":null,""status"":200,""ts"":1541635950796,""userAgent"":""\\""Mozilla\\/5.0 (iPhone; CPU iPhone OS 7_1_2 like Mac OS X) AppleWebKit\\/537.51.2 (KHTML, like Gecko) Version\\/7.0 Mobile\\/11D257 Safari\\/9537.53\\"""",""userId"":""45""} {""artist"":""Slipknot"",""auth"":""Logged In"",""firstName"":""Aiden"",""gender"":""M"",""itemInSession"":0,""lastName"":""Ramirez"",""length"":192.57424,""level"":""paid"",""location"":""New York-Newark-Jersey City, NY-NJ-PA"",""method"":""PUT"",""page"":""NextSong"",""registration"":1540283578796.0,""sessionId"":19,""song"":""Opium Of The People (Album Version)"",""status"":200,""ts"":1541639510796,""userAgent"":""\\""Mozilla\\/5.0 (Windows NT 6.1) AppleWebKit\\/537.36 (KHTML, like Gecko) Chrome\\/36.0.1985.143 Safari\\/537.36\\"""",""userId"":""20""} {""artist"":""Cat Stevens"",""auth"":""Logged",,1204,String length exceeds DDL length


In [None]:
%%sql
SELECT s.title, count(*) as count
FROM sparkify.songplays sp
INNER JOIN sparkify.songs s ON s.song_id = sp.song_id
GROUP BY s.title
ORDER BY count DESC, s.title ASC

In [56]:
%%sql
SELECT * FROM sparkify.log_staging LIMIT 20

 * postgresql://dwhuser:***@dwh-cluster.cgjrwscs7tjx.us-west-2.redshift.amazonaws.com:5439/dwh
20 rows affected.


artist,auth,firstname,gender,iteminsession,lastname,length,level,location,method,page,registration,sessionid,song,status,ts,useragent,userid
Dalto,Logged In,Aiden,M,1,Hess,190.0,free,"La Crosse-Onalaska, WI-MN",PUT,NextSong,1540829025796,170,Falta Te Dizer,200,2018-11-02 03:34:34.796000,"""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.94 Safari/537.36""",86
Cat Stevens,Logged In,Kynnedi,F,1,Sanchez,200.0,free,"Cedar Rapids, IA",PUT,NextSong,1541079034796,88,Wild World,200,2018-11-02 09:13:37.796000,"""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36""",89
Mexican Institute of Sound,Logged In,Lily,F,11,Koch,278.0,paid,"Chicago-Naperville-Elgin, IL-IN-WI",PUT,NextSong,1541048010796,172,Mirando a Las Muchachas,200,2018-11-02 09:42:23.796000,"""Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/36.0.1985.125 Chrome/36.0.1985.125 Safari/537.36""",15
Eyehategod,Logged In,Lily,F,19,Koch,155.0,paid,"Chicago-Naperville-Elgin, IL-IN-WI",PUT,NextSong,1541048010796,172,Sisterfucker (part Ii),200,2018-11-02 10:11:24.796000,"""Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/36.0.1985.125 Chrome/36.0.1985.125 Safari/537.36""",15
,Logged In,Lily,F,27,Koch,,paid,"Chicago-Naperville-Elgin, IL-IN-WI",GET,Downgrade,1541048010796,172,,200,2018-11-02 10:34:14.796000,"""Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/36.0.1985.125 Chrome/36.0.1985.125 Safari/537.36""",15
Local Natives,Logged In,Lily,F,35,Koch,237.0,paid,"Chicago-Naperville-Elgin, IL-IN-WI",PUT,NextSong,1541048010796,172,Airplanes,200,2018-11-02 11:13:13.796000,"""Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/36.0.1985.125 Chrome/36.0.1985.125 Safari/537.36""",15
Dragonette,Logged In,Lily,F,38,Koch,246.0,paid,"Chicago-Naperville-Elgin, IL-IN-WI",PUT,NextSong,1541048010796,172,Fixin to Thrill,200,2018-11-02 11:25:30.796000,"""Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/36.0.1985.125 Chrome/36.0.1985.125 Safari/537.36""",15
The Crests,Logged In,Lily,F,41,Koch,182.0,paid,"Chicago-Naperville-Elgin, IL-IN-WI",PUT,NextSong,1541048010796,172,16 Candles,200,2018-11-02 11:36:38.796000,"""Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/36.0.1985.125 Chrome/36.0.1985.125 Safari/537.36""",15
Broadcast 2000,Logged In,Aleena,F,8,Kirby,198.0,paid,"Waterloo-Cedar Falls, IA",PUT,NextSong,1541022995796,157,The View,200,2018-11-02 11:52:19.796000,Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:31.0) Gecko/20100101 Firefox/31.0,44
Daughtry,Logged In,Aleena,F,12,Kirby,249.0,paid,"Waterloo-Cedar Falls, IA",PUT,NextSong,1541022995796,157,No Surprise,200,2018-11-02 12:07:23.796000,Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:31.0) Gecko/20100101 Firefox/31.0,44
