In [1]:
import importlib

In [29]:
import create_tables

In [34]:
importlib.reload(create_tables)

<module 'create_tables' from '/home/workspace/create_tables.py'>

In [35]:
create_tables.main()

QUERY 
    CREATE TABLE IF NOT EXISTS users
    ( user_id int PRIMARY KEY,
    first_name varchar,
    last_name varchar,
    gender char,
    level varchar
    );

QUERY 
    CREATE TABLE IF NOT EXISTS artists
    (
    artist_id varchar PRIMARY KEY,
    name varchar,
    location varchar,
    latitude real,
    longitude real
    );

QUERY 
    CREATE TABLE IF NOT EXISTS songs
    (
    song_id varchar PRIMARY KEY,
    title varchar(200),
    artist_id varchar references artists(artist_id),
    year int,
    duration real
    );

QUERY 
    CREATE TABLE IF NOT EXISTS timestamps
    (
    start_time bigint PRIMARY KEY,
    hour int,
    day int,
    weekofyear int,
    month int,
    year int,
    dayofweek int
    );

QUERY 
    CREATE TABLE IF NOT EXISTS songplays
    (songplay_id BIGINT IDENTITY(0,1),
    start_time bigint references timestamps(start_time),
    user_id int references users(user_id),
    level varchar,
    song_id varchar references songs(song_id),
    artist_id var

In [1]:
%load_ext sql

In [2]:
import configparser
import psycopg2
from sql_queries import create_table_queries, drop_table_queries


In [3]:
config = configparser.ConfigParser()
config.read('dwh.cfg')

['dwh.cfg']

# connect to redshift cluster and DB

In [4]:
DB_USER = config.get("CLUSTER","DB_USER")
DB_PASSWORD = config.get("CLUSTER","DB_PASSWORD")
HOST = config.get("CLUSTER","HOST")
DB_PORT = config.get("CLUSTER","DB_PORT")
DB_NAME = config.get("CLUSTER","DB_NAME")


In [5]:
conn_string = "postgresql://{}:{}@{}:{}/{}" \
                        .format(DB_USER, DB_PASSWORD, HOST, DB_PORT, DB_NAME)



In [6]:
%sql $conn_string

'Connected: awsuser@dev'

In [8]:
%%sql
INSERT INTO users(user_id, first_name, last_name, gender, level)
SELECT DISTINCT    userId AS user_id,
    firstName AS first_name,
    lastName AS last_name,
    gender,
    level
FROM staging_events_table;


 * postgresql://awsuser:***@redshift-cluster-1.cofp0blphhiz.us-west-2.redshift.amazonaws.com:5439/dev


InternalError: (psycopg2.InternalError) Cannot insert a NULL value into column user_id
DETAIL:  
  -----------------------------------------------
  error:  Cannot insert a NULL value into column user_id
  code:      8007
  context:   query execution
  query:     287
  location:  column:1
  process:   query0_127_287 [pid=17396]
  -----------------------------------------------

 [SQL: 'INSERT INTO users(user_id, first_name, last_name, gender, level)\nSELECT DISTINCT    userId AS user_id,\n    firstName AS first_name,\n    lastName AS last_name,\n    gender,\n    level\nFROM staging_events_table;']

In [10]:
%sql SELECT count(*) from users;

 * postgresql://awsuser:***@redshift-cluster-1.cofp0blphhiz.us-west-2.redshift.amazonaws.com:5439/dev
1 rows affected.


count
0


In [21]:
%%sql 
SELECT *
FROM staging_events_table
LIMIT 2;


 * postgresql://awsuser:***@redshift-cluster-1.cofp0blphhiz.us-west-2.redshift.amazonaws.com:5439/dev
2 rows affected.


artist,auth,firstname,gender,iteminsession,lastname,length,level,location,method,page,registration,sessionid,song,status,ts,useragent,userid
,Logged In,Walter,M,0,Frye,,free,"San Francisco-Oakland-Hayward, CA",GET,Home,1540919166796,38,,200,1541105830796,"""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36""",39
,Logged In,Kaylee,F,0,Summers,,free,"Phoenix-Mesa-Scottsdale, AZ",GET,Home,1540344794796,139,,200,1541106106796,"""Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36""",8


In [27]:
%%sql
SELECT * FROM staging_events_table
WHERE userId IS NULL
LIMIT 1;

 * postgresql://awsuser:***@redshift-cluster-1.cofp0blphhiz.us-west-2.redshift.amazonaws.com:5439/dev
1 rows affected.


artist,auth,firstname,gender,iteminsession,lastname,length,level,location,method,page,registration,sessionid,song,status,ts,useragent,userid
,Logged Out,,,0,,,free,,PUT,Login,,52,,307,1541207073796,,


In [25]:
%%sql
SELECT count(*) 
FROM staging_events_table
;

 * postgresql://awsuser:***@redshift-cluster-1.cofp0blphhiz.us-west-2.redshift.amazonaws.com:5439/dev
1 rows affected.


count
8056


In [24]:
%%sql
SELECT count(*) 
FROM staging_songs_table
;

 * postgresql://awsuser:***@redshift-cluster-1.cofp0blphhiz.us-west-2.redshift.amazonaws.com:5439/dev
1 rows affected.


count
14896


In [21]:
%%sql
    COPY staging_songs_table
    FROM 's3://udacity-dend/song_data'
    credentials 'aws_iam_role=arn:aws:iam::349696042462:role/myRedshiftRole'
    json 'auto'
    ;


 * postgresql://awsuser:***@redshift-cluster-1.cofp0blphhiz.us-west-2.redshift.amazonaws.com:5439/dev
Done.


[]

In [28]:
%%sql
SELECT * FROM users
LIMIT 5;

 * postgresql://awsuser:***@redshift-cluster-1.cofp0blphhiz.us-west-2.redshift.amazonaws.com:5439/dev
5 rows affected.


user_id,first_name,last_name,gender,level
39,Walter,Frye,M,free
10,Sylvie,Cruz,F,free
101,Jayden,Fox,M,free
69,Anabelle,Simpson,F,free
95,Sara,Johnson,F,paid


In [29]:
%%sql
SELECT * FROM artists
LIMIT 5;

 * postgresql://awsuser:***@redshift-cluster-1.cofp0blphhiz.us-west-2.redshift.amazonaws.com:5439/dev
5 rows affected.


artist_id,name,location,latitude,longitude
ARSZ7L31187FB4E610,Devotchka,"Denver, CO",39.74,-104.992
ARKYKXP11F50C47A6A,The Supersuckers,,,
ARN4G4X1187FB485B4,The Donkeys,Great Britain / UK,54.3139,-2.23218
ARQTC851187B9B03AF,O.A.R.,"Rockville, MD",39.0817,-77.1512
AROJWW21187FB574E6,Sanne Salomonsen,,,


In [32]:
%%sql
SELECT * FROM songs
LIMIT 5;

 * postgresql://awsuser:***@redshift-cluster-1.cofp0blphhiz.us-west-2.redshift.amazonaws.com:5439/dev
5 rows affected.


song_id,title,artist_id,year,duration
SOXZYWX12A6310ED0C,It's About Time,ARC1IHZ1187FB4E920,0,246.987
SOMFRKT12A8C146C67,Without You,ARQVORN11F50C4EFEC,0,165.381
SOWCWAD12AB017FD51,Memories & Rust,ARPLTRF11E8F5C15C5,0,222.824
SOTCOTZ12A8C136BCB,Elevator,AR7WK5411A348EF5EA,2008,248.32
SOBBGQK12AB0183F1E,The Beacon,AR4E4121187FB51F4E,2009,201.874


In [33]:
%%sql
SELECT count(*) FROM songs;

 * postgresql://awsuser:***@redshift-cluster-1.cofp0blphhiz.us-west-2.redshift.amazonaws.com:5439/dev
1 rows affected.


count
14896


In [9]:
%%sql
SELECT min(year), max(year) FROM timestamps;

 * postgresql://awsuser:***@redshift-cluster-1.cofp0blphhiz.us-west-2.redshift.amazonaws.com:5439/dev
1 rows affected.


min,max
2018,2018


In [10]:
%%sql
SELECT min(month), max(month) FROM timestamps;

 * postgresql://awsuser:***@redshift-cluster-1.cofp0blphhiz.us-west-2.redshift.amazonaws.com:5439/dev
1 rows affected.


min,max
11,11


In [11]:
%%sql
SELECT min(day), max(day) FROM timestamps;

 * postgresql://awsuser:***@redshift-cluster-1.cofp0blphhiz.us-west-2.redshift.amazonaws.com:5439/dev
1 rows affected.


min,max
1,30


In [14]:
%%sql
SELECT min(weekofyear), max(weekofyear) FROM timestamps;

 * postgresql://awsuser:***@redshift-cluster-1.cofp0blphhiz.us-west-2.redshift.amazonaws.com:5439/dev
1 rows affected.


min,max
44,48


In [8]:
%%sql
DROP TABLE songplays;
DROP TABLE timestamps;

 * postgresql://awsuser:***@redshift-cluster-1.cofp0blphhiz.us-west-2.redshift.amazonaws.com:5439/dev
Done.
Done.


[]