# Import all required libraries 

In [1]:
from __future__ import division
import pandas as pd
import numpy as np
import datetime
import time
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM

import yfinance as yf
import plotly.graph_objs as go
import pandas_datareader as pdr



# Define model for one company

In [2]:
#here just copy past out all code from model.ipynb
#this function will return predicted price on 30th day 

def My_model(company_name):
    
    end_date=datetime.date.today()
    year=end_date.year
    month=end_date.month
    day = end_date.day
    if month <= 6 :
        year = year-1
        month = 12-6+month
    else :
        month = month-6
    start_date=datetime.date(year,month,day)
    
    df = pdr.get_data_yahoo(company_name, start=start_date, end=end_date)
    df.drop("Adj Close",axis=1,inplace=True)
    df.to_csv("data.csv")
    df1=df.reset_index()['Close']
    scaler=MinMaxScaler(feature_range=(0,1))
    df1=scaler.fit_transform(np.array(df1).reshape(-1,1))
    
    training_size=int(len(df1)*0.9)
    test_size=len(df1)-training_size
    train_data,test_data=df1[0:training_size,:],df1[training_size:len(df1),:1]
    
    def create_dataset(dataset, time_step=1):
        dataX, dataY = [], []
        for i in range(len(dataset)-time_step-1):
            a = dataset[i:(i+time_step), 0]   ###i=0, 0,1,2,3-----99   100 
            dataX.append(a)
            dataY.append(dataset[i + time_step, 0])
        return np.array(dataX), np.array(dataY)
    time_step =10
    X_train, y_train = create_dataset(train_data, time_step)
    X_test, ytest = create_dataset(test_data, time_step)
    
    X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
    X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)
    
    model=Sequential()
    model.add(LSTM(50,return_sequences=True,input_shape=(10,1)))
    model.add(LSTM(50,return_sequences=True))
    model.add(LSTM(50))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error',optimizer='adam')
    model.fit(X_train,y_train,validation_data=(X_test,ytest),epochs=100,batch_size=60,verbose=0)
    
    train_predict=model.predict(X_train)
    test_predict=model.predict(X_test)
    
    train_predict=scaler.inverse_transform(train_predict)
    test_predict=scaler.inverse_transform(test_predict)
    
    import math
    from sklearn.metrics import mean_squared_error
    MSE_train = math.sqrt(mean_squared_error(y_train,train_predict))
    MSE_test= math.sqrt(mean_squared_error(ytest,test_predict))
    
    x_input=test_data[len(test_data)-10:].reshape(1,-1)
    
    temp_input=list(x_input)
    temp_input=temp_input[0].tolist()
    
    from numpy import array

    lst_output=[]
    n_steps=10
    i=0
    while(i<30):
    
        if(len(temp_input)>10):
            #print(temp_input)
            x_input=np.array(temp_input[1:])
            #print("{} day input {}".format(i,x_input))
            x_input=x_input.reshape(1,-1)
            x_input = x_input.reshape((1, n_steps, 1))
            #print(x_input)
            yhat = model.predict(x_input, verbose=0)
            #print("{} day output {}".format(i,yhat))
            temp_input.extend(yhat[0].tolist())
            temp_input=temp_input[1:]
            #print(temp_input)
            lst_output.extend(yhat.tolist())
            i=i+1
        else:
            x_input = x_input.reshape((1, n_steps,1))
            yhat = model.predict(x_input, verbose=0)
            #print(yhat[0])
            temp_input.extend(yhat[0].tolist())
            #print(len(temp_input))
            lst_output.extend(yhat.tolist())
            i=i+1
    price_at_30th_day = scaler.inverse_transform([lst_output[-1]]).item(0)
    day_new=np.arange(1,101)
    day_pred=np.arange(101,101+30)
    df3=df1.tolist()
    df3.extend(lst_output)
    df3=scaler.inverse_transform(df3).tolist()
    
    def next_30day_plot():
        plt.plot(df3[len(df3)-30:])
        plt.xlabel("Days")
        plt.ylabel("Price")
    return price_at_30th_day
    

In [3]:
#test our funtion 
price_at_30th_day=My_model("CIPLA.NS")
price_at_30th_day

943.2598641216754

# Define a funtion to get tickers of S&P 500

In [4]:
#we will get all tickers from wikipedia by web scraping

def save_sp500_tickers():
    import requests
    from bs4 import BeautifulSoup
    import bs4 as bs
    import pickle
    
    
    resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class': 'wikitable sortable'})
    tickers = []
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text
        tickers.append(ticker)
    with open("sp500tickers.pickle", "wb") as f:
        pickle.dump(tickers, f)
    return tickers

In [5]:
tickers = save_sp500_tickers()
tickers

['MMM\n',
 'ABT\n',
 'ABBV\n',
 'ABMD\n',
 'ACN\n',
 'ATVI\n',
 'ADBE\n',
 'AMD\n',
 'AAP\n',
 'AES\n',
 'AFL\n',
 'A\n',
 'APD\n',
 'AKAM\n',
 'ALK\n',
 'ALB\n',
 'ARE\n',
 'ALGN\n',
 'ALLE\n',
 'LNT\n',
 'ALL\n',
 'GOOGL\n',
 'GOOG\n',
 'MO\n',
 'AMZN\n',
 'AMCR\n',
 'AEE\n',
 'AAL\n',
 'AEP\n',
 'AXP\n',
 'AIG\n',
 'AMT\n',
 'AWK\n',
 'AMP\n',
 'ABC\n',
 'AME\n',
 'AMGN\n',
 'APH\n',
 'ADI\n',
 'ANSS\n',
 'ANTM\n',
 'AON\n',
 'AOS\n',
 'APA\n',
 'AAPL\n',
 'AMAT\n',
 'APTV\n',
 'ADM\n',
 'ANET\n',
 'AJG\n',
 'AIZ\n',
 'T\n',
 'ATO\n',
 'ADSK\n',
 'ADP\n',
 'AZO\n',
 'AVB\n',
 'AVY\n',
 'BKR\n',
 'BLL\n',
 'BAC\n',
 'BBWI\n',
 'BAX\n',
 'BDX\n',
 'BRK.B\n',
 'BBY\n',
 'BIO\n',
 'TECH\n',
 'BIIB\n',
 'BLK\n',
 'BK\n',
 'BA\n',
 'BKNG\n',
 'BWA\n',
 'BXP\n',
 'BSX\n',
 'BMY\n',
 'AVGO\n',
 'BR\n',
 'BRO\n',
 'BF.B\n',
 'CHRW\n',
 'CDNS\n',
 'CZR\n',
 'CPB\n',
 'COF\n',
 'CAH\n',
 'KMX\n',
 'CCL\n',
 'CARR\n',
 'CTLT\n',
 'CAT\n',
 'CBOE\n',
 'CBRE\n',
 'CDW\n',
 'CE\n',
 'CNC\n',
 'CNP

In [6]:
a = []

In [7]:
for i in tickers:
    a.append(i[:-1])

In [8]:
# a is a list of all tikers 
a

['MMM',
 'ABT',
 'ABBV',
 'ABMD',
 'ACN',
 'ATVI',
 'ADBE',
 'AMD',
 'AAP',
 'AES',
 'AFL',
 'A',
 'APD',
 'AKAM',
 'ALK',
 'ALB',
 'ARE',
 'ALGN',
 'ALLE',
 'LNT',
 'ALL',
 'GOOGL',
 'GOOG',
 'MO',
 'AMZN',
 'AMCR',
 'AEE',
 'AAL',
 'AEP',
 'AXP',
 'AIG',
 'AMT',
 'AWK',
 'AMP',
 'ABC',
 'AME',
 'AMGN',
 'APH',
 'ADI',
 'ANSS',
 'ANTM',
 'AON',
 'AOS',
 'APA',
 'AAPL',
 'AMAT',
 'APTV',
 'ADM',
 'ANET',
 'AJG',
 'AIZ',
 'T',
 'ATO',
 'ADSK',
 'ADP',
 'AZO',
 'AVB',
 'AVY',
 'BKR',
 'BLL',
 'BAC',
 'BBWI',
 'BAX',
 'BDX',
 'BRK.B',
 'BBY',
 'BIO',
 'TECH',
 'BIIB',
 'BLK',
 'BK',
 'BA',
 'BKNG',
 'BWA',
 'BXP',
 'BSX',
 'BMY',
 'AVGO',
 'BR',
 'BRO',
 'BF.B',
 'CHRW',
 'CDNS',
 'CZR',
 'CPB',
 'COF',
 'CAH',
 'KMX',
 'CCL',
 'CARR',
 'CTLT',
 'CAT',
 'CBOE',
 'CBRE',
 'CDW',
 'CE',
 'CNC',
 'CNP',
 'CDAY',
 'CERN',
 'CF',
 'CRL',
 'SCHW',
 'CHTR',
 'CVX',
 'CMG',
 'CB',
 'CHD',
 'CI',
 'CINF',
 'CTAS',
 'CSCO',
 'C',
 'CFG',
 'CTXS',
 'CLX',
 'CME',
 'CMS',
 'KO',
 'CTSH',
 'CL',
 'CMC

In [9]:
a.remove('BRK.B')

In [10]:
a.remove('BF.B')
# in these two company's we get date error so we remove them from our list of tickers

In [11]:
a

['MMM',
 'ABT',
 'ABBV',
 'ABMD',
 'ACN',
 'ATVI',
 'ADBE',
 'AMD',
 'AAP',
 'AES',
 'AFL',
 'A',
 'APD',
 'AKAM',
 'ALK',
 'ALB',
 'ARE',
 'ALGN',
 'ALLE',
 'LNT',
 'ALL',
 'GOOGL',
 'GOOG',
 'MO',
 'AMZN',
 'AMCR',
 'AEE',
 'AAL',
 'AEP',
 'AXP',
 'AIG',
 'AMT',
 'AWK',
 'AMP',
 'ABC',
 'AME',
 'AMGN',
 'APH',
 'ADI',
 'ANSS',
 'ANTM',
 'AON',
 'AOS',
 'APA',
 'AAPL',
 'AMAT',
 'APTV',
 'ADM',
 'ANET',
 'AJG',
 'AIZ',
 'T',
 'ATO',
 'ADSK',
 'ADP',
 'AZO',
 'AVB',
 'AVY',
 'BKR',
 'BLL',
 'BAC',
 'BBWI',
 'BAX',
 'BDX',
 'BBY',
 'BIO',
 'TECH',
 'BIIB',
 'BLK',
 'BK',
 'BA',
 'BKNG',
 'BWA',
 'BXP',
 'BSX',
 'BMY',
 'AVGO',
 'BR',
 'BRO',
 'CHRW',
 'CDNS',
 'CZR',
 'CPB',
 'COF',
 'CAH',
 'KMX',
 'CCL',
 'CARR',
 'CTLT',
 'CAT',
 'CBOE',
 'CBRE',
 'CDW',
 'CE',
 'CNC',
 'CNP',
 'CDAY',
 'CERN',
 'CF',
 'CRL',
 'SCHW',
 'CHTR',
 'CVX',
 'CMG',
 'CB',
 'CHD',
 'CI',
 'CINF',
 'CTAS',
 'CSCO',
 'C',
 'CFG',
 'CTXS',
 'CLX',
 'CME',
 'CMS',
 'KO',
 'CTSH',
 'CL',
 'CMCSA',
 'CMA',
 'CAG'

In [12]:
#total numbers of tickers
len(a)

503

In [13]:
final_dataframe = pd.DataFrame(a,columns=["company_name"])

In [14]:
final_dataframe

Unnamed: 0,company_name
0,MMM
1,ABT
2,ABBV
3,ABMD
4,ACN
...,...
498,YUM
499,ZBRA
500,ZBH
501,ZION


In [20]:
#now we will collect closing price for each company 
previous_close = []

In [21]:
for name in a:
    
    start_date=datetime.datetime(2021,12,1)
    end_date=datetime.date.today()
    print(name)
    df = pdr.get_data_yahoo(name, start=start_date, end=end_date)
    df=df.drop(['High', 'Low','Open','Volume','Volume','Adj Close'], axis = 1)
    x = df["Close"].iloc[-1]
    previous_close.append(x)
    
     
    
    

MMM
ABT
ABBV
ABMD
ACN
ATVI
ADBE
AMD
AAP
AES
AFL
A
APD
AKAM
ALK
ALB
ARE
ALGN
ALLE
LNT
ALL
GOOGL
GOOG
MO
AMZN
AMCR
AEE
AAL
AEP
AXP
AIG
AMT
AWK
AMP
ABC
AME
AMGN
APH
ADI
ANSS
ANTM
AON
AOS
APA
AAPL
AMAT
APTV
ADM
ANET
AJG
AIZ
T
ATO
ADSK
ADP
AZO
AVB
AVY
BKR
BLL
BAC
BBWI
BAX
BDX
BBY
BIO
TECH
BIIB
BLK
BK
BA
BKNG
BWA
BXP
BSX
BMY
AVGO
BR
BRO
CHRW
CDNS
CZR
CPB
COF
CAH
KMX
CCL
CARR
CTLT
CAT
CBOE
CBRE
CDW
CE
CNC
CNP
CDAY
CERN
CF
CRL
SCHW
CHTR
CVX
CMG
CB
CHD
CI
CINF
CTAS
CSCO
C
CFG
CTXS
CLX
CME
CMS
KO
CTSH
CL
CMCSA
CMA
CAG
COP
ED
STZ
COO
CPRT
GLW
CTVA
COST
CTRA
CCI
CSX
CMI
CVS
DHI
DHR
DRI
DVA
DE
DAL
XRAY
DVN
DXCM
FANG
DLR
DFS
DISCA
DISCK
DISH
DG
DLTR
D
DPZ
DOV
DOW
DTE
DUK
DRE
DD
DXC
EMN
ETN
EBAY
ECL
EIX
EW
EA
EMR
ENPH
ETR
EOG
EFX
EQIX
EQR
ESS
EL
ETSY
EVRG
ES
RE
EXC
EXPE
EXPD
EXR
XOM
FFIV
FAST
FRT
FDX
FIS
FITB
FE
FRC
FISV
FLT
FMC
F
FTNT
FTV
FBHS
FOXA
FOX
BEN
FCX
GPS
GRMN
IT
GNRC
GD
GE
GIS
GM
GPC
GILD
GL
GPN
GS
GWW
HAL
HBI
HIG
HAS
HCA
PEAK
HSIC
HSY
HES
HPE
HLT
HOLX
HD
HON
HRL
HST
HWM
HPQ
HUM
HBAN
HII
I

In [22]:
len(previous_close)

503

In [23]:
final_dataframe.insert(1, "previous_close", previous_close, True)

In [24]:
final_dataframe

Unnamed: 0,company_name,previous_close
0,MMM,172.589996
1,ABT,130.270004
2,ABBV,118.849998
3,ABMD,300.630005
4,ACN,361.420013
...,...,...
498,YUM,124.830002
499,ZBRA,588.289978
500,ZBH,121.220001
501,ZION,61.910000


In [25]:
future_price = []
for cmp_name in a:
    print(cmp_name)
    price_at_30th_day=My_model(cmp_name)
    future_price.append(price_at_30th_day)

MMM
ABT
ABBV
ABMD
ACN
ATVI
ADBE
AMD
AAP
AES
AFL
A
APD
AKAM
ALK
ALB
ARE
ALGN
ALLE
LNT
ALL
GOOGL
GOOG
MO
AMZN
AMCR
AEE
AAL
AEP
AXP
AIG
AMT
AWK
AMP
ABC
AME
AMGN
APH
ADI
ANSS
ANTM
AON
AOS
APA
AAPL
AMAT
APTV
ADM
ANET
AJG
AIZ
T
ATO
ADSK
ADP
AZO
AVB
AVY
BKR
BLL
BAC
BBWI
BAX
BDX
BBY
BIO
TECH
BIIB
BLK
BK
BA
BKNG
BWA
BXP
BSX
BMY
AVGO
BR
BRO
CHRW
CDNS
CZR
CPB
COF
CAH
KMX
CCL
CARR
CTLT
CAT
CBOE
CBRE
CDW
CE
CNC
CNP
CDAY
CERN
CF
CRL
SCHW
CHTR
CVX
CMG
CB
CHD
CI
CINF
CTAS
CSCO
C
CFG
CTXS
CLX
CME
CMS
KO
CTSH
CL
CMCSA
CMA
CAG
COP
ED
STZ
COO
CPRT
GLW
CTVA
COST
CTRA
CCI
CSX
CMI
CVS
DHI
DHR
DRI
DVA
DE
DAL
XRAY
DVN
DXCM
FANG
DLR
DFS
DISCA
DISCK
DISH
DG
DLTR
D
DPZ
DOV
DOW
DTE
DUK
DRE
DD
DXC
EMN
ETN
EBAY
ECL
EIX
EW
EA
EMR
ENPH
ETR
EOG
EFX
EQIX
EQR
ESS
EL
ETSY
EVRG
ES
RE
EXC
EXPE
EXPD
EXR
XOM
FFIV
FAST
FRT
FDX
FIS
FITB
FE
FRC
FISV
FLT
FMC
F
FTNT
FTV
FBHS
FOXA
FOX
BEN
FCX
GPS
GRMN
IT
GNRC
GD
GE
GIS
GM
GPC
GILD
GL
GPN
GS
GWW
HAL
HBI
HIG
HAS
HCA
PEAK
HSIC
HSY
HES
HPE
HLT
HOLX
HD
HON
HRL
HST
HWM
HPQ
HUM
HBAN
HII
I

In [26]:
final_dataframe.insert(2, "predicted_price_of_30thday", future_price , True)

In [27]:
final_dataframe

Unnamed: 0,company_name,previous_close,predicted_price_of_30thday
0,MMM,172.589996,177.113277
1,ABT,130.270004,124.707807
2,ABBV,118.849998,116.300503
3,ABMD,300.630005,341.293900
4,ACN,361.420013,384.059697
...,...,...,...
498,YUM,124.830002,128.295968
499,ZBRA,588.289978,543.186030
500,ZBH,121.220001,137.792921
501,ZION,61.910000,67.358576


In [28]:
final_dataframe["%change in price in next 30 days"]=(final_dataframe["predicted_price_of_30thday"]-final_dataframe["previous_close"])/final_dataframe["previous_close"]*100

In [29]:
final_dataframe

Unnamed: 0,company_name,previous_close,predicted_price_of_30thday,%change in price in next 30 days
0,MMM,172.589996,177.113277,2.620824
1,ABT,130.270004,124.707807,-4.269745
2,ABBV,118.849998,116.300503,-2.145137
3,ABMD,300.630005,341.293900,13.526226
4,ACN,361.420013,384.059697,6.264092
...,...,...,...,...
498,YUM,124.830002,128.295968,2.776549
499,ZBRA,588.289978,543.186030,-7.666958
500,ZBH,121.220001,137.792921,13.671770
501,ZION,61.910000,67.358576,8.800801


here (-)ve sign mean that stock price will decrease in next 30 day so we have to sale stock of that company
and (+)ve sign mean that stock price will increase in next 30 day so we have to buy that sale 


In [30]:
final_dataframe= final_dataframe.sort_values('%change in price in next 30 days',ascending=False)

In [36]:
buy_stock= final_dataframe.head(20)
buy_stock

Unnamed: 0,company_name,previous_close,predicted_price_of_30thday,%change in price in next 30 days
7,AMD,144.009995,522.391558,262.746738
177,ETSY,231.330002,751.824455,225.000843
151,DLTR,137.039993,399.623286,191.610701
375,POOL,544.840027,1394.184368,155.888756
288,LYV,100.93,247.051774,144.775362
496,XLNX,217.059998,517.597109,138.458083
411,STX,105.410004,245.766915,133.153312
158,DRE,59.68,121.156632,103.010442
165,EIX,66.610001,127.516873,91.438031
236,HPQ,37.549999,68.954004,83.632504


In [45]:
m = []
for i in range(20):
    m.append(i)
    
buy_stock=buy_stock.set_axis(m, axis=0)
buy_stock

Unnamed: 0,company_name,previous_close,predicted_price_of_30thday,%change in price in next 30 days
0,AMD,144.009995,522.391558,262.746738
1,ETSY,231.330002,751.824455,225.000843
2,DLTR,137.039993,399.623286,191.610701
3,POOL,544.840027,1394.184368,155.888756
4,LYV,100.93,247.051774,144.775362
5,XLNX,217.059998,517.597109,138.458083
6,STX,105.410004,245.766915,133.153312
7,DRE,59.68,121.156632,103.010442
8,EIX,66.610001,127.516873,91.438031
9,HPQ,37.549999,68.954004,83.632504


So this is the data of all company that will return a profit according to our prediction
now we will define a new columns in our dataframe which will tell us that how many stocks we have to buy.


now we assume that our portfolio is 20000 USD  

In [46]:
portfolio_size = 20000 

In [47]:
#price for each stock 
each_price = 20000/20

In [51]:
position_size = float(portfolio_size) /20
for i in range(0, len(buy_stock['company_name'])):
    buy_stock.loc[i, 'Number Of Shares to Buy'] = math.floor(position_size / buy_stock['previous_close'][i])





In [52]:
buy_stock 

Unnamed: 0,company_name,previous_close,predicted_price_of_30thday,%change in price in next 30 days,Number Of Shares to Buy
0,AMD,144.009995,522.391558,262.746738,6.0
1,ETSY,231.330002,751.824455,225.000843,4.0
2,DLTR,137.039993,399.623286,191.610701,7.0
3,POOL,544.840027,1394.184368,155.888756,1.0
4,LYV,100.93,247.051774,144.775362,9.0
5,XLNX,217.059998,517.597109,138.458083,4.0
6,STX,105.410004,245.766915,133.153312,9.0
7,DRE,59.68,121.156632,103.010442,16.0
8,EIX,66.610001,127.516873,91.438031,15.0
9,HPQ,37.549999,68.954004,83.632504,26.0


so this is the fianl output by LSTM  model 
here we can also define the dataframe or company's those predicted profit is very high in (-)ve so it return tha dataframe of 
company for number of sales to buy 