# 2. Loading Ratings to Kinesis Data Stream

In [6]:
import time
import json
import datetime
import random
import math
import configparser

import boto3
import pandas as pd
import numpy as np
from tqdm import tqdm

In [None]:
config = configparser.ConfigParser()
config.read("config.conf")

stream_name = config.get("kinesis","stream_name")
data_folder = 'app-data'
file = 'ratings.csv'

In [8]:
class MovieLensKinesisLoader:
    
    def __init__(self, data_folder, file, stream_name, window=1, dtype={}, date_fields=[]):
        """
        
        """
        self.data_folder = data_folder
        self.file = file
        self.kinesis_client = boto3.client('kinesis')
        self.stream_name = stream_name
        self.window = window
        self.current_window = -1
        self.num_registers = 0
        self.df = None
        self.df = pd.read_csv(f"{self.data_folder}/{self.file}",dtype=dtype, parse_dates=date_fields)

    
    def load(self, max_iters=None, delay=0, restart=False):
        """
        
        """
        self.current_window = 0 if restart else self.current_window
        num_iters = math.ceil(len(self.df)/self.window)
        iters = 0
        
        # FIX - JUST PER REGISTER
        for i in tqdm(range(num_iters)):
            iters += 1
            if i > self.current_window:
                temp_df = self.df.iloc[i*self.window:(i+1)*self.window]
                self.kinesis_client.put_record(
                    StreamName=self.stream_name,
                    Data=json.dumps(temp_df.to_dict(orient='records')),
                    PartitionKey="partitionkey"
                )
                self.num_registers += len(temp_df)
                self.current_window = i
                time.sleep(delay)
            if max_iters != None and iters >= max_iters:
                break
    
    def status(self):
        return {
            "CurrentWindow":self.current_window,
            "RegistersInSQL":self.num_registers,
            "Window":self.window
        }

## 1. Read data from S3

In [None]:
mloader = MovieLensKinesisLoader(
    data_folder,
    file,
    stream_name,
    window = 1,
    dtype = {
        "userId":np.int64,
        "movieId":np.int64,
        "rating":np.str,
    },
    #date_fields = ["timestamp"] #type Timestamp is not JSON serializable
)
display(mloader.df.dtypes)
mloader.df.head(3)

## 2. Send Data to the Data Setream

In [None]:
mloader.load(100)

In [None]:
mloader.status()