In [1]:
#importing packages
import pandas as pd
import numpy as np
import datetime as dt
from datetime import timedelta
import pandas_datareader as pdr
import seaborn as sns
import matplotlib.pyplot as plt
import bs4 as bs
import requests
from IPython.display import clear_output
from scipy.stats import mstats
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import RandomizedSearchCV, validation_curve, TimeSeriesSplit
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import pickle
import os
from sklearn.model_selection import GridSearchCV
import yfinance as yf
import json
import time
sns.set()

In [2]:
def generate_predictions(user_stocks):
    
    output_dict = {}
    no_data = []
    
    Target_variables = ['SMA_ratio','ATR_8','ATR_20','ATR_Ratio', 'SMA_Volume_Ratio',
                       'ADX_8','ADX_20','Stochastic_8','Stochastic_20','Stochastic_Ratio',
                      'RSI_8','RSI_20','RSI_ratio','MACD', 'Middleband']
    
    sector_dict = {'Technology': 0,
                     'Healthcare': 1,
                     'Financial Services': 2,
                     'Real Estate': 3,
                     'Consumer Cyclical': 4,
                     'Industrials': 5,
                     'Communication Services': 6,
                     'Consumer Defensive': 7,
                     'Energy': 8,
                     'Basic Materials': 9,
                     'Utilities': 10,
                     'Misc': 11}
    def Wilder(data, periods):
        start = np.where(~np.isnan(data))[0][0] #Check if nans present in beginning
        Wilder = np.array([np.nan]*len(data))
        Wilder[start+periods-1] = data[start:(start+periods)].mean() #Simple Moving Average
        for i in range(start+periods,len(data)):
            Wilder[i] = (Wilder[i-1]*(periods-1) + data[i])/periods #Wilder Smoothing
        return(Wilder)

    #Extract data from Yahoo Finance
    for ticker in user_stocks:
        user_data = pd.DataFrame()
        test_data = pd.DataFrame()
        try:
            print(ticker)
            test_data = pdr.get_data_yahoo(ticker, start = (dt.date.today() - timedelta(days=60)), end = (dt.date.today() - timedelta(days=1)))
            test_data['Symbol'] = ticker
            user_data = user_data.append(test_data)
            clear_output(wait = True)
        except:
            no_data.append(ticker)
        
        if len(no_data) != 0:
            print("No data obtained for provided ticker")
            return 0

        clear_output(wait = True)
        
        #################################
        user_data['Return'] = user_data['Close'].pct_change()
        user_data.reset_index(inplace=True)
        
        #################################
        
        # Calculating SMA for 14 day period 
        
        user_data['SMA_8'] = user_data['Close'].transform(lambda x: x.rolling(window = 8).mean())
        user_data['SMA_20'] = user_data['Close'].transform(lambda x: x.rolling(window = 20).mean())
        user_data['SMA_ratio'] = user_data['SMA_20'] / user_data['SMA_8']
        user_data.set_index('Date', inplace=True)
        
        #################################
        
        # SMA Volume Ratio
        
        user_data['SMA8_Volume'] = user_data.groupby('Symbol')['Volume'].transform(lambda x: x.rolling(window = 8).mean())
        user_data['SMA20_Volume'] = user_data.groupby('Symbol')['Volume'].transform(lambda x: x.rolling(window = 20).mean())
        user_data['SMA_Volume_Ratio'] = user_data['SMA8_Volume']/user_data['SMA20_Volume']
        
        #################################
        
        # ATR
        user_data['prev_close'] = user_data.groupby('Symbol')['Close'].shift(1)
        user_data['TR'] = np.maximum((user_data['High'] - user_data['Low']), 
                             np.maximum(abs(user_data['High'] - user_data['prev_close']), 
                             abs(user_data['prev_close'] - user_data['Low'])))
        for i in user_data['Symbol'].unique():
            TR_data = user_data[user_data.Symbol == i].copy()
            user_data.loc[user_data.Symbol==i,'ATR_8'] = Wilder(TR_data['TR'], 8)
            user_data.loc[user_data.Symbol==i,'ATR_20'] = Wilder(TR_data['TR'], 20)

        user_data['ATR_Ratio'] = user_data['ATR_8'] / user_data['ATR_20']
        
        #################################
        
        #ADX
        user_data['prev_high'] = user_data.groupby('Symbol')['High'].shift(1)
        user_data['prev_low'] = user_data.groupby('Symbol')['Low'].shift(1)

        user_data['+DM'] = np.where(~np.isnan(user_data.prev_high),
                                   np.where((user_data['High'] > user_data['prev_high']) & 
                 (((user_data['High'] - user_data['prev_high']) > (user_data['prev_low'] - user_data['Low']))), 
                                                                          user_data['High'] - user_data['prev_high'], 
                                                                          0),np.nan)

        user_data['-DM'] = np.where(~np.isnan(user_data.prev_low),
                                   np.where((user_data['prev_low'] > user_data['Low']) & 
                 (((user_data['prev_low'] - user_data['Low']) > (user_data['High'] - user_data['prev_high']))), 
                                            user_data['prev_low'] - user_data['Low'], 
                                            0),np.nan)

        for i in user_data['Symbol'].unique():
            ADX_data = user_data[user_data.Symbol == i].copy()
            user_data.loc[user_data.Symbol==i,'+DM_8'] = Wilder(ADX_data['+DM'], 8)
            user_data.loc[user_data.Symbol==i,'-DM_8'] = Wilder(ADX_data['-DM'], 8)
            user_data.loc[user_data.Symbol==i,'+DM_20'] = Wilder(ADX_data['+DM'], 20)
            user_data.loc[user_data.Symbol==i,'-DM_20'] = Wilder(ADX_data['-DM'], 20)

        user_data['+DI_8'] = (user_data['+DM_8']/user_data['ATR_8'])*100
        user_data['-DI_8'] = (user_data['-DM_8']/user_data['ATR_8'])*100
        user_data['+DI_20'] = (user_data['+DM_20']/user_data['ATR_20'])*100
        user_data['-DI_20'] = (user_data['-DM_20']/user_data['ATR_20'])*100

        user_data['DX_8'] = (np.round(abs(user_data['+DI_8'] - user_data['-DI_8'])/(user_data['+DI_8'] + user_data['-DI_8']) * 100))

        user_data['DX_20'] = (np.round(abs(user_data['+DI_20'] - user_data['-DI_20'])/(user_data['+DI_20'] + user_data['-DI_20']) * 100))

        for i in user_data['Symbol'].unique():
            ADX_data = user_data[user_data.Symbol == i].copy()
            user_data.loc[user_data.Symbol==i,'ADX_8'] = Wilder(ADX_data['DX_8'], 8)
            user_data.loc[user_data.Symbol==i,'ADX_20'] = Wilder(ADX_data['DX_20'], 20)

            
        ################################
        
        #Stochastic Oscillators
        user_data['Lowest_8D'] = user_data.groupby('Symbol')['Low'].transform(lambda x: x.rolling(window = 8).min())
        user_data['High_8D'] = user_data.groupby('Symbol')['High'].transform(lambda x: x.rolling(window = 8).max())
        user_data['Lowest_20D'] = user_data.groupby('Symbol')['Low'].transform(lambda x: x.rolling(window = 20).min())
        user_data['High_20D'] = user_data.groupby('Symbol')['High'].transform(lambda x: x.rolling(window = 20).max())

        user_data['Stochastic_8'] = ((user_data['Close'] - user_data['Lowest_8D'])/(user_data['High_8D'] - user_data['Lowest_8D']))*100
        user_data['Stochastic_20'] = ((user_data['Close'] - user_data['Lowest_20D'])/(user_data['High_20D'] - user_data['Lowest_20D']))*100

        user_data['Stochastic_%D_8'] = user_data['Stochastic_8'].rolling(window = 8).mean()
        user_data['Stochastic_%D_20'] = user_data['Stochastic_8'].rolling(window = 20).mean()

        user_data['Stochastic_Ratio'] = user_data['Stochastic_%D_8']/user_data['Stochastic_%D_20']

        ################################
        
        #RSI 

        user_data['Diff'] = user_data.groupby('Symbol')['Close'].transform(lambda x: x.diff())
        user_data['Up'] = user_data['Diff']
        user_data.loc[(user_data['Up']<0), 'Up'] = 0

        user_data['Down'] = user_data['Diff']
        user_data.loc[(user_data['Down']>0), 'Down'] = 0 
        user_data['Down'] = abs(user_data['Down'])

        user_data['avg_8up'] = user_data.groupby('Symbol')['Up'].transform(lambda x: x.rolling(window=8).mean())
        user_data['avg_8down'] = user_data.groupby('Symbol')['Down'].transform(lambda x: x.rolling(window=8).mean())

        user_data['avg_20up'] = user_data.groupby('Symbol')['Up'].transform(lambda x: x.rolling(window=20).mean())
        user_data['avg_20down'] = user_data.groupby('Symbol')['Down'].transform(lambda x: x.rolling(window=20).mean())

        user_data['RS_8'] = user_data['avg_8up'] / user_data['avg_8down']
        user_data['RS_20'] = user_data['avg_20up'] / user_data['avg_20down']

        user_data['RSI_8'] = 100 - (100/(1+user_data['RS_8']))
        user_data['RSI_20'] = 100 - (100/(1+user_data['RS_20']))

        user_data['RSI_ratio'] = user_data['RSI_8']/user_data['RSI_20']
        
        #################################
        
        #MACD

        user_data['8Ewm'] = user_data.groupby('Symbol')['Close'].transform(lambda x: x.ewm(span=8, adjust=False).mean())
        user_data['20Ewm'] = user_data.groupby('Symbol')['Close'].transform(lambda x: x.ewm(span=20, adjust=False).mean())
        user_data['MACD'] = user_data['20Ewm'] - user_data['8Ewm']
        
        ##################################
        
        #Bollinger Bands

        user_data['20MA'] = user_data.groupby('Symbol')['Close'].transform(lambda x: x.rolling(window=20).mean())
        user_data['SD'] = user_data.groupby('Symbol')['Close'].transform(lambda x: x.rolling(window=20).std())
        user_data['upperband'] = user_data['20MA'] + 2*user_data['SD']
        user_data['lowerband'] = user_data['20MA'] - 2*user_data['SD']
        user_data['Middleband'] = (user_data['upperband'] + user_data['lowerband'])/2
        
        ##################################
        
        #ROC
        
        user_data['RC'] = user_data.groupby('Symbol')['Close'].transform(lambda x: x.pct_change(periods = 20)) 
        
        ##################################
        
        #Winsorize data
        
        for variable in Target_variables:
            user_data.loc[:,variable] = mstats.winsorize(user_data.loc[:,variable], limits = [0.1,0.1])
            
        ##################################
        
        #Make Predictions based on the pickle files containing the models 
        
        row= user_data.tail(1).fillna(0)
        try:
            sector = yf.Ticker(ticker).info['sector']
        except:
            sector = 'Misc'
        rf_cv = pickle.load(open(os.getcwd() + f'\\RFC_2\\Sector_{sector_dict[sector]}', 'rb'))
        best_rf = rf_cv.best_estimator_
        X_test = row.reset_index()[Target_variables]
        pred = best_rf.predict_proba(X_test)
        output_dict[ticker] = pred[0][-1]
        
    output = {}
    
    print("Probability of stock price increase in 14 days from " + (dt.date.today() - timedelta(days=1)).strftime('%Y-%m-%d') + ":")
    for ticker, prob in output_dict.items():
        output[ticker] = prob
        print("\t" + ticker + " - " + str(prob))
    return output
        
        
        