## Load DWH Params from a file

In [31]:
import configparser
import psycopg2
import boto3

In [None]:
config = configparser.ConfigParser()
config.read_file(open('dwh.cfg'))
DWH_CLUSTER_IDENTIFIER = config.get("CLUSTER","DWH_CLUSTER_IDENTIFIER")
KEY           = config.get('AWS','KEY')
SECRET        = config.get('AWS','SECRET')
DWH_DB_NAME            = config.get("CLUSTER","DB_NAME")
DWH_DB_USER            = config.get("CLUSTER","DB_USER")
DWH_DB_PASSWORD        = config.get("CLUSTER","DB_PASSWORD")
DWH_PORT               = int(config.get("CLUSTER","DB_PORT"))
DWH_ENDPOINT           = config.get("CLUSTER","HOST")
DWH_IAM_ROLE_NAME      = config.get("IAM_ROLE","dwh_iam_role_name")



## Create clients and resources for IAM and Redshift

In [None]:
iam = boto3.client('iam',region_name='us-west-2', 
                      aws_access_key_id=KEY,
                      aws_secret_access_key=SECRET)

redshift = boto3.client('redshift',
                        region_name='us-west-2', 
                        aws_access_key_id=KEY,
                        aws_secret_access_key=SECRET)

## Connect with Redshift Cluster

In [33]:
%load_ext sql

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [34]:
conn_string = psycopg2.connect("host={} dbname={} user={} password={} port={}".format(DWH_ENDPOINT,DWH_DB_NAME,DWH_DB_USER, DWH_DB_PASSWORD,DWH_PORT))
print(conn_string)


<connection object at 0x7f5f30fffe00; dsn: 'user=dwhuser password=xxx dbname=sparkfly_songs host=dwhclustersparkfly.c1rlryzy0frw.us-west-2.redshift.amazonaws.com port=5439', closed: 0>


## Perform some simple data analysis

In [35]:
# Create a cursor object
cur = conn_string.cursor()

### 1-Most played song

In [None]:
cur.execute( " select s.title as song_name,COUNT(sp.song_id) as most_played \
FROM songplays sp \
JOIN songs s on sp.song_id=s.song_id \
GROUP BY(s.title) \
ORDER  BY most_played DESC \
limit 1;")

In [None]:
# Fetch and print the results
rows = cur.fetchall()
for row in rows:
    print(row)


## Close connection

In [None]:
# Close the connection
cur.close()
conn.close()

## Delete a cluster

In [None]:
#### CAREFUL!!
#-- Uncomment & run to delete the created resources
redshift.delete_cluster( ClusterIdentifier=DWH_CLUSTER_IDENTIFIER,  SkipFinalClusterSnapshot=True)
#### CAREFUL!!

In [None]:
myClusterProps = redshift.describe_clusters(ClusterIdentifier=DWH_CLUSTER_IDENTIFIER)['Clusters'][0]
prettyRedshiftProps(myClusterProps)

In [None]:
#### CAREFUL!!
#-- Uncomment & run to delete the created resources
iam.detach_role_policy(RoleName=DWH_IAM_ROLE_NAME, PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess")
iam.delete_role(RoleName=DWH_IAM_ROLE_NAME)
#### CAREFUL!!