# Step1. Setup paths

In [1]:
import os
from functions.setup_aws import *

os.chdir('/home/mic/Desktop/Udacity 2020/data engineering/Projects Repo/data_engineering_udacity/Proj2 AWS DWH/')

iam_config = 'configs/iam.cfg'
aws_config = 'configs/aws.cfg'

# Step2. Setup AWS

### Step 2a. Setup Redshift cluster

In [None]:
#create a client for Redshift
redshift_admin = create_redshift_admin(iam_config)

In [None]:
#create a Redshift cluster with specs defined in aws.cfg
response = create_cluster(aws_config, redshift_admin)

#### Check status until cluster is available.

In [None]:
# cluster status
cluster_id = response['Cluster']['ClusterIdentifier']
cluster_params = redshift_admin.describe_clusters(ClusterIdentifier = cluster_id)['Clusters'][0]
cluster_params['ClusterStatus']

#### Update aws.cfg with some key cluster parameters

In [None]:
# get cluster parameters
cl_vpc_id = cluster_params['VpcId']
cl_vpc_sg_id = cluster_params['VpcSecurityGroups'][0]['VpcSecurityGroupId']
cl_endpoint_adress = cluster_params['Endpoint']['Address']
cl_endpoint_port = cluster_params['Endpoint']['Port']

# update aws.cfg
config = configparser.ConfigParser()
config.read_file(open('configs/aws.cfg'))

config['CLUSTER']['cl_vpc_id'] = cl_vpc_id
config['CLUSTER']['cl_vpc_sg_id'] = cl_vpc_sg_id
config['DB']['db_host'] = cl_endpoint_adress
config['DB']['db_port'] = str(cl_endpoint_port)

with open('configs/aws.cfg', 'w') as configfile:
    config.write(configfile)

configfile.close()    

### Step2b. Open endpoint for inbound connections

In [None]:
#open an incoming TCP port to access the cluster ednpoint
open_tcp_endpoint(iam_config, aws_config)

#### Test connection

In [None]:
#test connection
conn = make_connection(aws_config)
print(conn)
conn.close()

# Step3. Dowload sample raw data

In [None]:
s3 = create_s3(iam_config)
bucket_name = 'udacity-dend'
sampleDbBucket =  s3.Bucket(bucket_name)

In [None]:
#print sample song data
for obj in sampleDbBucket.objects.filter(Prefix="song_data/A/A/A/TRAAAA"):
    print(obj)

In [None]:
#print sample log data
for obj in sampleDbBucket.objects.filter(Prefix="log_data"):
    print(obj)

In [None]:
#download sample data
s3.Bucket(bucket_name).download_file(Key = 'song_data/A/A/A/TRAAAAK128F9318786.json', Filename = 'samples/TRAAAAK128F9318786.json')
s3.Bucket(bucket_name).download_file(Key = 'log_data/2018/11/2018-11-01-events.json', Filename = 'samples/2018-11-01-events.json')

# Step4. ETL - create and populate tables

In [2]:
%run 'functions/create_tables.py'

In [None]:
%run 'functions/etl.py'

# Clean-up

In [None]:
#delete cluster
cluster_id = 'redshiftCluster1'
delete_cluster(cluster_id, redshift_admin)

In [None]:
#verify cluster deletion
try:
    status = redshift_admin.describe_clusters(ClusterIdentifier = cluster_id)['Clusters'][0]['ClusterStatus']
    print('Cluster status: {}'.format(status))
except Exception as e:
    print(e)