# Daily Model Deployment

1. Load Model from S3
2. Query past 10 mintues of data from MongoDB
3. Build Features
4. Make Predictions
5. Log predictions to database

In [4]:
############### Initialize ###################

# Basics
from pymongo import MongoClient
import os
import numpy as np
import pandas as pd
import time
import boto3
import io
import warnings
warnings.filterwarnings('ignore')
import time
from datetime import date, datetime
import subprocess


# NLP
import nltk
import spacy
spacy.load('en')
from nltk.corpus import stopwords
import preprocessor as p

# Model Infrastructure
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD, PCA
from sklearn.pipeline import make_pipeline, Pipeline, FeatureUnion
from sklearn.preprocessing import Normalizer
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import roc_auc_score, roc_curve, auc
from sklearn import metrics
import dill as pickle

# Models
from sklearn.linear_model import LogisticRegression

# Database Setup
import mysql.connector
from sqlalchemy import create_engine
from sqlalchemy import inspect
from sqlalchemy import MetaData
from sqlalchemy import Table
from sqlalchemy import Column
from sqlalchemy import Integer, String, DateTime, Float

In [3]:
########## Database Setup #################
User = os.environ['DB_USER']
password = os.environ['DB_PWD']
dbname = os.environ['DB_NAME']
IP = os.environ['IP']

engine = create_engine('mysql+mysqlconnector://{}:{}@{}:3306/{}'.format(User,
                                                                        password, IP, dbname), echo=False)
conn = engine.connect()

# Check to see if the tables are created and if not, create them
meta = MetaData(engine)

# Create prediction table
if not engine.dialect.has_table(engine, 'model_predictions'):
    print('Model_predictions Table does not exist')
    print('Model_predictions Table being created....')
    # Time, Source, Current Count, Count Diff
    t1 = Table('model_predictions', meta,
               Column('run_time', DateTime, default=datetime.utcnow),
               Column('model_name', String(30)),
               Column('model_version_number', Integer),
               Column('Company', String(30)),
               Column('Prediction', Integer))
    t1.create()
else:
    print('Model_predictions Table Exists')

# Create table object
meta = MetaData(engine, reflect=True)
model_predictions_table = meta.tables['model_predictions']    

# Write Function
def database_log(name, version_number, company, prediction):
    #Need to log these items to a database.
        
    ins = model_predictions_table.insert().values(
            run_time = datetime.now(),
            model_name = name,
            model_version_number = version_number,
            Company = company,
            Prediction = prediction
               )
    conn.execute(ins)

Model_predictions Table does not exist
Model_predictions Table being created....


In [5]:
# Download the Model
subprocess.run(['aws', 's3','cp','s3://brandyn-twitter-sentiment-analysis/Models/Daily_Stock_Prediction_latest.pk','./Models'])

CompletedProcess(args=['aws', 's3', 'cp', 's3://brandyn-twitter-sentiment-analysis/Models/Daily_Stock_Prediction_latest.pk', './Models'], returncode=1)

In [6]:
###################### Bring In Data #######################
#Setup Mongo and create the database and collection
User = os.environ['MONGODB_USER']
password = os.environ['MONGODB_PASS']
IP = os.environ['IP']

client = MongoClient(IP, username=User, password=password)
db = client['stock_tweets']

#Grab references
twitter_coll_reference = db.twitter
iex_coll_reference = db.iex

In [8]:
# Create Data Frame
twitter_data = pd.DataFrame(list(twitter_coll_reference.find()))

In [10]:
twitter_data.head()

Unnamed: 0,Company,_id,created_at,favorite_count,id_str,lang,quote_count,reply_count,retweet_count,retweeted,text,user_followers_count,user_name,user_screen_name,user_statuses_count
0,[TSLA],5aa6c1ccb60e700001ed139d,Mon Mar 12 18:07:08 +0000 2018,0,973259109133778948,en,0,0,0,False,$TSLA so nice so obvious...,1703,TradeTherapAnalytics,TradeTexasBig,67528
1,[AAPL],5aa6c1d7b60e700001ed139e,Mon Mar 12 18:07:19 +0000 2018,0,973259155925434370,en,0,0,0,False,@JoKiddo But how proprietary is that? Does it ...,2901,Gilmo Report,gilmoreport,18524
2,"[AAPL, GOOG, GOOGL]",5aa6c1dcb60e700001ed139f,Mon Mar 12 18:07:23 +0000 2018,0,973259173524918272,en,0,0,0,False,RT @StockTwits: This is the race to $1 trillio...,5256,Mark Hill,MarketsHill,13523
3,[AAPL],5aa6c1ddb60e700001ed13a0,Mon Mar 12 18:07:25 +0000 2018,0,973259179040362496,en,0,0,0,False,$AAPL may be working on a MacBook Air Pro. htt...,486,William White,WilliamWhiteIP,7850
4,[TWTR],5aa6c1e1b60e700001ed13a1,Mon Mar 12 18:07:29 +0000 2018,0,973259197189107713,en,0,0,0,False,In 300 shares of $TWTR at $35.42,979,Ross,VeeDub78,40293


In [11]:
# Get Window time for 10 minutes
#window_time = time.time() - (60*10)

datetime.now() - 

datetime.datetime(2018, 5, 19, 5, 21, 52, 13578)

In [12]:
datetime.now() - (60*10)

TypeError: unsupported operand type(s) for -: 'datetime.datetime' and 'int'