# Daily Model Deployment

1. Load Model from S3
2. Create Dataframe from structured streaming from Kinesis in 10 min intervals
3. Make predictions
4. Log predictions to database

In [38]:
########################### Initialize ####################################

# Basic
import subprocess
import os
import numpy as np
import pandas as pd
import time
from datetime import date, datetime
import boto3
import boto3.s3
import os.path
import sys
import io
import warnings

# Pipeline
from pyspark.ml import Pipeline, PipelineModel

# Feature Engineering
from pyspark.ml.feature import (VectorAssembler,VectorIndexer,
                                Tokenizer,StopWordsRemover, CountVectorizer,IDF,StringIndexer, HashingTF)
from pyspark.sql.functions import length
from pyspark.sql.functions import col, udf
from pyspark.sql.types import StringType
import preprocessor as p
from pyspark.sql.functions import dayofyear, concat_ws, collect_list, countDistinct
from pyspark.sql.types import *
# Models
from pyspark.ml.classification import LogisticRegression

# Streaming
from pyspark.streaming.kinesis import KinesisUtils, InitialPositionInStream
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('nlp').getOrCreate()


# Database Setup
import mysql.connector
from sqlalchemy import create_engine
from sqlalchemy import inspect
from sqlalchemy import MetaData
from sqlalchemy import Table
from sqlalchemy import Column
from sqlalchemy import Integer, String, DateTime, Float

In [30]:
# Download the Model
subprocess.run(['aws', 's3','cp','s3://brandyn-twitter-sentiment-analysis/Models/Daily_Stock_Prediction_latest/','./Models/Daily_Stock_Prediction_latest','--recursive'])

CompletedProcess(args=['aws', 's3', 'cp', 's3://brandyn-twitter-sentiment-analysis/Models/Daily_Stock_Prediction_latest/', './Models/Daily_Stock_Prediction_latest', '--recursive'], returncode=0)

In [37]:
# Deserialize Model
model = PipelineModel.load('./Models/Daily_Stock_Prediction_latest')

In [None]:
############################# Bring in Data ###############################

#### Twitter ####
# Create Schema
twitterSchema = StructType() \
            .add("created_at", TimestampType()) \
            .add("text", StringType()) \
            .add("user_followers_count", LongType()) \
            .add("user_name", StringType()) \
            .add("Company", StringType())

# Create Dataframe
