# 1. Loading Tags to RDS

#### Uncomment the following cell to download the data

In [9]:
import boto3
import warnings
import numpy as np
from Loaders import SQLLoader, KinesisLoader
warnings.filterwarnings("ignore")

In [10]:
ssm = boto3.client('ssm')
dbhost = ssm.get_parameter(Name='/moviestream/dbhost', WithDecryption=True)['Parameter']['Value']
dbuser = ssm.get_parameter(Name='/moviestream/dbuser', WithDecryption=True)['Parameter']['Value']
dbpass = ssm.get_parameter(Name='/moviestream/dbpass', WithDecryption=True)['Parameter']['Value']
dbname = ssm.get_parameter(Name='/moviestream/dbname', WithDecryption=True)['Parameter']['Value']

In [11]:
import configparser
config = configparser.ConfigParser()
config.read("config.conf")
stream_name = config.get("kinesis","stream_name")

# A. Creating Loaders

In [12]:
file_tags= 'app-data/tags.csv'
dbengine = 'postgresql'
dbtable  = 'tags'
dtype    = {"userId":np.int64, "movieId":np.int64, "tag":np.str, "timestamp":np.int64}

tloader = SQLLoader(file_tags, dbengine, dbhost, dbuser, dbpass, dbname, dbtable, drop=True, dtype=dtype)
display(tloader.df.head(3), tloader.status())

Unnamed: 0,userId,movieId,tag,timestamp,_insert,_insert_time,_update,_update_time,_delete,_delete_time
0,3,260,classic,1439472355,0,0,0,0,0,0
1,3,260,sci-fi,1439472256,0,0,0,0,0,0
2,4,1732,dark comedy,1573943598,0,0,0,0,0,0


{'state': 'AVAILABLE',
 'iteration': 0,
 'inserted': 0,
 'updated': 0,
 'deleted': 0,
 'indb': None}

In [14]:
file_ratings = 'app-data/ratings.csv'
dtype = { "userId":np.int64, "movieId":np.int64, "rating":np.str } #date_fields = ["timestamp"]
kloader = KinesisLoader(file_ratings, stream_name, dtype = dtype)
display(kloader.df.head(3))

Unnamed: 0,userId,movieId,rating,timestamp,_sent,_sent_time
0,1,296,5.0,1147880044,0,0
1,1,306,3.5,1147868817,0,0
2,1,307,5.0,1147868828,0,0


--------

# B. SQL Loading
### B1. Full-Load

In [None]:
# THE UID LOAD MUST CONTAIN ONLY INSERTS, THIS WAY THE TABLE CAN BE CREATED AND
# THE ENGINE EXECUTE OF THE SECOND LINE CAN ALTER THE TABLE TO ALLOW DELETS REPLICATION
tloader.iud(inserts=100, updates=0 , deletes=0, max_registers=2000)
tloader.engine.execute(f"ALTER TABLE {dbtable} REPLICA IDENTITY FULL");

### B2. CDC (Inserts, Updates, Deletes)

In [None]:
tloader.iudx(inserts=100, updates=10, deletes=1)

# C. Kinesis Loading

In [None]:
kloader.loadx()