In [127]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor, RandomForestClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score, explained_variance_score,mean_absolute_error
import math
import yfinance as yf

Get SP500 Data

In [128]:
def get_sp500_list():
    data = pd.read_csv('data/constituents_csv.csv')
    return data

sp_500 = get_sp500_list()

In [129]:
def get_nyse_list():
    data = pd.read_csv('data/nyse.csv', sep = '|')
    return data
    
nyse = get_nyse_list()

Get Stock Prices

In [94]:
start_date = '2021-04-16'
end_date = '2021-10-16'
ticker = 'AAPL'

a = yf.download(ticker, start_date, end_date)
a.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-04-16,134.300003,134.669998,133.279999,134.160004,133.537262,84922400
2021-04-19,133.509995,135.470001,133.339996,134.839996,134.214096,94264200
2021-04-20,135.020004,135.529999,131.809998,133.110001,132.492126,94812300
2021-04-21,132.360001,133.75,131.300003,133.5,132.88031,68847100
2021-04-22,133.039993,134.149994,131.410004,131.940002,131.32756,84566500


Calculate Market Capitalization of JPMC

In [130]:
ticker = 'JPM'
start_date = '2021-04-16'
end_date = '2021-10-16'

def calc_market_capitalization(ticker_df):
    ticker_df['market_cap'] = ticker_df['Close'] * ticker_df['Volume']

jpm_data = yf.download(ticker, start_date, end_date)
calc_market_capitalization(jpm_data)

jpm_data.iloc[-1].market_cap

[*********************100%***********************]  1 of 1 completed


2376041879.7042847

ML Model

In [101]:
start_date = '2021-04-16'
end_date = '2021-10-16'
ticker = 'AAPL'

aapl_data = yf.download(ticker, start_date, end_date)

def make_x_and_y(close_prices, period = 6):
    x = []
    y = []
    for i in range(len(close_prices) - period):
        x.append(close_prices[i:i+period])
        y.append(close_prices[i+period])
    return x, y

x,y = make_x_and_y(list(aapl_data.Close))
print(x[0:2])

[*********************100%***********************]  1 of 1 completed
[[134.16000366210938, 134.83999633789062, 133.11000061035156, 133.5, 131.94000244140625, 134.32000732421875], [134.83999633789062, 133.11000061035156, 133.5, 131.94000244140625, 134.32000732421875, 134.72000122070312]]


In [131]:
def return_model(x,y):
    clf = RandomForestRegressor()
    clf.fit(x,y)
    return clf
period = 6
latest_feat = np.array(list(aapl_data.Close)[-period:]).reshape((1,-1))
classifier = return_model(x,y)
tomorrow_prediction = classifier.predict(latest_feat)

print("Tomorrow's Predicted Value is : ", tomorrow_prediction[0])
print("Today's Value is: ", aapl_data.Close[-1])

Tomorrow's Predicted Value is :  144.20079818725586
Today's Value is:  144.83999633789062


In [142]:
regressor_list = [LinearRegression(), DecisionTreeRegressor(), ExtraTreesRegressor(), RandomForestRegressor()]
regressor_list2 = [LinearRegression(), DecisionTreeRegressor(), ExtraTreesRegressor(), RandomForestRegressor(n_estimators = 200)]


def make_x_and_y(close_prices, period = 6):
    x = []
    y = []
    for i in range(len(close_prices) - period):
        x.append(close_prices[i:i+period])
        y.append(close_prices[i+period])
    return x, y

def dynamic_return_model(model, x, y):
    clf = model
    clf.fit(x,y)
    return clf

def train_model_return_accuracy(stock_data, model):
    x,y = make_x_and_y(list(stock_data.Close))
    X_train, X_test, y_train, y_test = train_test_split(x, y, random_state = 42)
    classifier = dynamic_return_model(model, X_train, y_train)
    y_pred = []
    for i in X_test:
        feat = np.array(i).reshape((1,-1))
        y_pred_latest = classifier.predict(feat)
        y_pred.append(y_pred_latest[0])
    print("\n")
    print("Model - " + str(model))
    print("Explained variance: ", explained_variance_score(y_test,y_pred))
    print("Mean absolute error: ", mean_absolute_error(y_test, y_pred))
    print("R2 Score: ", r2_score(y_test, y_pred))


print("\n\n\n" + "="*25)
print("Machine Learning Models Training Accuracy")
for i in regressor_list:
    train_model_return_accuracy(aapl_data, i)

print("\n\n\n" + "="*25)
print("Machine Learning Models Training Accuracy - Estimators = 200")
for i in regressor_list2:
    train_model_return_accuracy(aapl_data,i)




Machine Learning Models Training Accuracy


Model - LinearRegression()
Explained variance:  0.9671295198249269
Mean absolute error:  1.3718327853180785
R2 Score:  0.9659366459884328


Model - DecisionTreeRegressor()
Explained variance:  0.9497813954103168
Mean absolute error:  1.8429023988785282
R2 Score:  0.9407521696875253


Model - ExtraTreesRegressor()
Explained variance:  0.948132256827182
Mean absolute error:  1.7661990577943862
R2 Score:  0.9474764598151771


Model - RandomForestRegressor()
Explained variance:  0.95613626065873
Mean absolute error:  1.6095508969214658
R2 Score:  0.9546531886522663



Machine Learning Models Training Accuracy - Estimators = 200


Model - LinearRegression()
Explained variance:  0.9671295198249269
Mean absolute error:  1.3718327853180785
R2 Score:  0.9659366459884328


Model - DecisionTreeRegressor()
Explained variance:  0.9319605638678403
Mean absolute error:  2.0341937157415573
R2 Score:  0.9289000570611551


Model - ExtraTreesRegressor()
Expl

Calculate Volatilities of Stocks

In [140]:
%%capture

start_date = '2021-04-16'
end_date = '2021-10-16'

aapl_data = yf.download('AAPL', start_date, end_date)
amzn_data = yf.download('AMZN', start_date, end_date)
msft_data = yf.download('MSFT', start_date, end_date)
googl_data = yf.download('GOOGL', start_date, end_date)

In [141]:
print("Volatilities of different stocks are given below: ")
print("Apple: ", aapl_data.Close.std())
print("Amazon: ", amzn_data.Close.std())
print("Microsoft: ", msft_data.Close.std())
print("Google: ", googl_data.Close.std())

Volatilities of different stocks are given below: 
Apple:  9.233734657334155
Amazon:  141.97649240808988
Microsoft:  20.06743208743851
Google:  210.33354967113846


In [134]:
sp_500_1 = sp_500.head()

In [135]:
sp_500_1

Unnamed: 0,Symbol,Name,Sector
0,MMM,3M,Industrials
1,AOS,A. O. Smith,Industrials
2,ABT,Abbott Laboratories,Health Care
3,ABBV,AbbVie,Health Care
4,ABMD,Abiomed,Health Care


In [137]:
%%capture
sp_500_1['Volatility_Close'] = sp_500_1.apply(lambda x: yf.download(x['Symbol'], start_date, end_date).Close.std(), axis = 1)
sp_500_1['Volatility_Open'] = sp_500_1.apply(lambda x: yf.download(x['Symbol'], start_date, end_date).Open.std(), axis = 1)



In [138]:
sp_500_1

Unnamed: 0,Symbol,Name,Sector,Volatility_Close,Volatility_Open
0,MMM,3M,Industrials,8.610289,8.545207
1,AOS,A. O. Smith,Industrials,2.854853,2.817999
2,ABT,Abbott Laboratories,Health Care,5.347175,5.399847
3,ABBV,AbbVie,Health Care,3.81594,3.86301
4,ABMD,Abiomed,Health Care,26.571635,26.549412


In [120]:
%%capture
sp_500['Volatility_Close'] = sp_500.apply(lambda x: get_stock_data(x['Symbol'], start_date, end_date).Close.std(), axis = 1)
sp_500['Volatility_Open'] = sp_500.apply(lambda x: get_stock_data(x['Symbol'], start_date, end_date).Open.std(), axis = 1)


In [125]:
sp_500.sort_values(['Volatility_Close'], ascending = False).head()

Unnamed: 0,Symbol,Name,Sector,Volatility_Close,Volatility_Open
105,CMG,Chipotle Mexican Grill,Consumer Discretionary,225.519841,225.774068
23,GOOGL,Alphabet (Class A),Communication Services,210.33355,210.322541
24,GOOG,Alphabet (Class C),Communication Services,196.995962,197.645311
345,NVR,NVR,Consumer Discretionary,172.642158,172.513568
26,AMZN,Amazon,Consumer Discretionary,141.976492,140.946229


In [126]:
sp_500.sort_values(['Volatility_Open'], ascending = False).head()

Unnamed: 0,Symbol,Name,Sector,Volatility_Close,Volatility_Open
105,CMG,Chipotle Mexican Grill,Consumer Discretionary,225.519841,225.774068
23,GOOGL,Alphabet (Class A),Communication Services,210.33355,210.322541
24,GOOG,Alphabet (Class C),Communication Services,196.995962,197.645311
345,NVR,NVR,Consumer Discretionary,172.642158,172.513568
26,AMZN,Amazon,Consumer Discretionary,141.976492,140.946229


Putting it all together
===

In [216]:
how_many_stocks = input("How many stocks do you want to analyse? Enter at least 10 and max 500\n")
correct = False
while not correct:
    try:
        how_many_stocks = int(how_many_stocks)
        if how_many_stocks > 500 or how_many_stocks < 10:
            raise ValueError
        correct = True
    except:
        print("Please enter a correct number. ")
        how_many_stocks = input("How many stocks do you want to analyse? Enter at least 10 and max 500\n")

which_dataset = input("Which stock list would you like to analyse? \n Enter 1 for S&P500 \n Enter 2 for NYSE \n")
selected = False
while not selected:
    if which_dataset == "1":
        data = get_sp500_list()
        selected = True
    elif which_dataset == "2":
        data = get_nyse_list()
        data.columns = ['Symbol', 'Security Name', 'Exchange', 'CQS Symbol', 'ETF',
       'Round Lot Size', 'Test Issue', 'NASDAQ Symbol']
        selected = True
    else:
        print("Please enter a correct number. ")
        which_dataset = input("Which stock list would you like to analyse? \n Enter 1 for S&P500 \n Enter 2 for NYSE \n")

listOfSymbols = []
for symbol in data.Symbol:
    listOfSymbols.append(symbol)

symbols_not_found = ["BRK.B", "OGN", "BF.B"]

today = '2021-10-17'

def make_x_and_y(close_prices,period = 6):
    x = []
    y = []
    for i in range(len(close_prices)-period):
        x.append(close_prices[i:i+period])
        y.append(close_prices[i+period])
    return x,y

def return_model(x,y):
    clf = LinearRegression()
    clf.fit(x,y)
    return clf

period = 6
predicted_prices_for_tomorrow = []
today_prices = []
volatility_for_stocks = []
new_symbols_for_df = []
for i in range(0,how_many_stocks):
    current_symbol = listOfSymbols[i]
    #getting the past 6 months of data
    if current_symbol not in symbols_not_found:
        print("Currently, analysing " + current_symbol + ".........")
        try:
            current_data = yf.download(current_symbol,start = '2021-04-16', end = '2021-10-16')
            close_prices = current_data.Close
            volatility = close_prices.std()
            features,dependent_variable = make_x_and_y(list(close_prices))
            features = np.array(features)
            dependent_variable = np.array(dependent_variable)
            clf = return_model(features,dependent_variable)
            latest_feat = np.array(list(close_prices)[-period:]).reshape((1,-1))
            tomorrow_prediction =clf.predict(latest_feat)
            
            if len(current_data) > 0:
                new_symbols_for_df.append(current_symbol)
                volatility_for_stocks.append(volatility)
                predicted_prices_for_tomorrow.append(tomorrow_prediction[0])
                today_prices.append(close_prices[-1])
            
        except:
            print("Ticker symbol cannot be found .... ")

full_data = pd.DataFrame({'symbol':new_symbols_for_df, 'volatility':volatility_for_stocks, 'today':today_prices, 'tomorrow':predicted_prices_for_tomorrow})

strategy = input('Which strategy do you want to use? \n Enter 1 for Conservative \n Enter 2 for Aggressive\n')
has_given_response = False
while not has_given_response:
    if strategy == "1":
        has_given_response = True
        new = full_data.sort_values(by = 'volatility', ascending = True)
        new = new[new.tomorrow > new.today]
        print("Top conservative stocks are ...")
        print(new.head())
    elif strategy == "2":
        has_given_response = True
        new = full_data.sort_values(by = 'volatility', ascending = False)
        new = new[new.tomorrow > new.today]
        print("Top aggressive stocks are ...")
        print(new.head())
    else: 
        print("Please enter a correct number. ")
        strategy = input('Which strategy do you want to use? \n Enter 1 for Conservative \n Enter 2 for Aggressive\n')

    

How many stocks do you want to analyse? Enter at least 10 and max 500
10
Which stock list would you like to analyse? 
 Enter 1 for S&P500 
 Enter 2 for NYSE 
1
Currently, analysing MMM.........
[*********************100%***********************]  1 of 1 completed
Currently, analysing AOS.........
[*********************100%***********************]  1 of 1 completed
Currently, analysing ABT.........
[*********************100%***********************]  1 of 1 completed
Currently, analysing ABBV.........
[*********************100%***********************]  1 of 1 completed
Currently, analysing ABMD.........
[*********************100%***********************]  1 of 1 completed
Currently, analysing ACN.........
[*********************100%***********************]  1 of 1 completed
Currently, analysing ATVI.........
[*********************100%***********************]  1 of 1 completed
Currently, analysing ADM.........
[*********************100%***********************]  1 of 1 completed
Currently, an

In [214]:
len(current_data)

0

In [200]:
new_symbols_for_df

['A', 'AA', 'AAA', 'AAAU', 'AAC', 'AAC.U', 'AAC.W', 'AAIC', 'AAIC$B', 'AAIC$C']

In [207]:
volatility_for_stocks

[14.496020322379472,
 5.345666905904348,
 0.013108721907684919,
 0.4394251917012928,
 0.043459823123088596,
 nan,
 nan,
 0.14610419064853059,
 nan,
 nan]

In [210]:
today_prices

[153.27000427246094,
 56.0,
 25.0,
 17.579999923706055,
 9.739999771118164,
 3.930000066757202]

In [211]:
predicted_prices_for_tomorrow

[152.98880279541015,
 49.689500427246095,
 25.000616718928022,
 17.60329990386963,
 9.738299741744996,
 3.898599967956543]

PART 2
===================

In [73]:
def calculate_label(parameter):
    changes = []
    for j in range(len(parameter) - 1):
        changes.append(parameter[j+1] - parameter[j])
    for i in changes:
        if i > 0:
            return 1
        elif i < 0:
            return -1
        elif i == 0:
            return 0

def make_x_and_y(close_prices, period = 6, in_future = 7):
    x = []
    y = []
    for i in range(len(close_prices) - period - in_future):
        x.append(close_prices[i:i+period])
        l = list(close_prices[i + period : i + period + in_future])
        y.append(calculate_label(l))
    return x, y

def get_user_selected_stocks():
    u = input("Please enter the tickers of stocks seperated by space..")
    return u.split(" ")

def get_user_stock_weights():
    u = input("Please enter the % of your income (format as 0.###) that you wish to allocate funds to your selected stocks separated by space..")
    return u.split(" ")

def user_selected_analysis_market_cap(selected, income):
    market_cap = []
    closing_price = []
    for symbol in selected:
        current_data = yf.download(symbol, start='2021-09-16', end = '2021-10-16')
        closing_price.append(current_data.Close[-1])
        market_cap.append(current_data.Close[-1]*current_data.Volume[-1])
    s = sum(market_cap)
    stocks_to_purchase = []
    for i,cap in enumerate(market_cap):
        portion = (income * cap)/s
        stocks_to_purchase.append(math.floor(portion / closing_price[i]))
    df = pd.DataFrame({"Ticker" : selected, "shares_to_buy" : stocks_to_purchase})
    return df


def user_selected_analysis_equal_weight(selected, income):
    closing_price = []
    for symbol in selected:
        current_data = yf.download(symbol, start='2021-09-16', end = '2021-10-16')
        closing_price.append(current_data.Close[-1])
    s = len(selected)
    stocks_to_purchase = []
    for i, closing in enumerate(closing_price):
        buying_power = (income)/s
        stocks_to_purchase.append(math.floor(buying_power / closing_price[i]))
    df = pd.DataFrame({"Ticker" : selected, "shares_to_buy" : stocks_to_purchase, "closing_price" : closing_price})
    return df

def user_selected_analysis_user_weight(selected, income):
    price_ratio = []
    closing_price = []
    for symbol in selected:
        current_data = yf.download(symbol, start='2021-09-16', end = '2021-10-16')
        closing_price.append(current_data.Close[-1])
        price_ratio.append(current_data.Close[-1]/closing_price[0])
    s = sum(price_ratio)
    stocks_to_purchase = []
    for i,ratio in enumerate(price_ratio):
        portion = (income * ratio)/s
        stocks_to_purchase.append(math.floor(portion / closing_price[i]))
    df = pd.DataFrame({"Ticker" : selected, "shares_to_buy" : stocks_to_purchase, "closing_price" : closing_price})
    return df

def user_selected_analysis_user_weight(selected, weights, income):
    
    if len(selected) != len(weights):
        print("List of stocks selected and the weights provided not the same length")
        return
    
    closing_price = []
    for symbol in selected:
        current_data = yf.download(symbol, start='2021-09-16', end = '2021-10-16')
        closing_price.append(current_data.Close[-1])
    
    stocks_to_purchase = []
    for i, weight in enumerate(weights):
        buying_power = income*float(weight)
        stocks_to_purchase.append(math.floor(buying_power / closing_price[i]))
    
    df = pd.DataFrame({"Ticker" : selected, "shares_to_buy" : stocks_to_purchase, "closing_price" : closing_price})
    return df


        

In [33]:
a = user_selected_analysis_equal_weight(get_user_selected_stocks(), 43214321)
a['buying_amount'] = a.shares_to_buy * a.closing_price
a

Please enter the tickers of stocks seperated by space..AAPL MSFT GOOG
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Ticker,shares_to_buy,closing_price,buying_amount
0,AAPL,99453,144.839996,14404770.0
1,MSFT,47351,304.209991,14404650.0
2,GOOG,5083,2833.5,14402680.0


In [77]:
a = user_selected_analysis_user_weight(get_user_selected_stocks(), get_user_stock_weights(), 10000)
a['buying_amount'] = a.shares_to_buy * a.closing_price
a

Please enter the tickers of stocks seperated by space..AAPL MSFT GOOG
Please enter the % of your income (format as 0.###) that you wish to allocate funds to your selected stocks separated by space...4 .2 .4
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Ticker,shares_to_buy,closing_price,buying_amount
0,AAPL,27,144.839996,3910.679901
1,MSFT,6,304.209991,1825.259949
2,GOOG,1,2833.5,2833.5


APP LOOP

In [84]:
def get_correct_input_from_user(text_to_use):
    i = input(text_to_use)
    got_response = False
    while not got_response:
        try:
            a = int(i)
            got_response = True
        except ValueError:
            print("Please enter a correct number...")
            i = input(text_to_use)
    return i

def get_num_of_stocks():
    how_many_stocks = input("How many stocks you want to analyse? Enter at least 10 and max 500\n")
    correct = False
    while not correct:
        try:
            how_many_stocks = int(how_many_stocks)
            if how_many_stocks > 500 or how_many_stocks < 10:
                raise ValueError
            correct = True
        except:
            print("Please enter a correct number. ")
            how_many_stocks = input("How many stocks you want to analyse? Enter at least 10 and max 500\n")
    return how_many_stocks

def return_classifier_model(x,y):
    clf = RandomForestClassifier()
    clf.fit(x,y)
    return_clf

period = 6
predicted_prices_for_tomorrow = []
volatility_for_stocks = []
new_symbols_for_df = []
end_day_price = []
volumes = []

def stock_data_loop():
    print("Wait for some time, I am analyzing " + str(how_many_stocks) + " stocks")
    for current_symbol in listOfSymbols[0:how_many_stocks]:
        # Getting the past 6 months of data
        if current_symbol not in symbols_not_found:
            print("Currently, analysing " + current_symbol + "........")
            new_symbols_for_df.append(current_symbol)
            current_data = yf.download(current_symbol, start = '2021-04-16', end = '2021-10-16')
            close_prices = current_data.Close
            volumes.append(current_data.Volume[-1])
            volatility = close_prices.std()
            volatility_for_stocks.append(volatility)
            features, dependent_variable = make_x_and_y(list(close_prices))
            features = np.array(features)
            dependent_variable = np.array(dependent_variable)
            clf = return_classifier_model(features,dependent_variable)
            latest_feat = np.array(list(close_prices)[-period:]).reshape((1,-1))
            tomorrow_prediction = clf.predict(latest_feat)
            end_day_price.append(close_prices[-1])
            if tomorrow_prediction[0] == 1:
                predicted_prices_for_tomorrow.append("BUY NOW")
            elif tomorrow_prediction[0] == -1:
                predicted_prices_for_tomorrow.append("SELL NOW")
    full_data = pd.DataFrame({'symbol':new_symbols_for_df, 'volatility':volatility_for_stocks, 'tomorrow':predicted_prices_for_tomorrow, 'price':end_day_price, 'Volume' : volumes})
    return full_data

In [85]:
income = get_correct_input_from_user(text_to_use = 'How much money do you ahve to invest?')
time.sleep(2)
print('Do you wish to add your own stocks or want ML to re')

NameError: name 'how_many_stocks' is not defined