Imports

In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt # Plotting
import matplotlib.dates as mdates # Styling dates
%matplotlib inline

import datetime as dt # For defining dates

import time

import yfinance as yf

import os

import cufflinks as cf
import plotly.express as px
import plotly.graph_objects as go

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()

from plotly.subplots import make_subplots

import os
from os import listdir
from os.path import isfile, join

import warnings
warnings.simplefilter("ignore")

Constants

In [6]:
PATH = "/Users/oppoudel/dev/Python4Finance/Final/Machine_Learning/Stocks/"

# Start end date defaults
S_DATE = "2017-02-01"
E_DATE = "2022-12-06"
S_DATE_DT = pd.to_datetime(S_DATE)
E_DATE_DT = pd.to_datetime(E_DATE)

In [3]:
stocks_not_found = []
missing_stocks = []

Save data to csv from yahoo

In [3]:
# Function that gets a dataframe by providing a ticker and period of time like "1y" or "5y"
def save_to_csv_from_yahoo(folder, ticker, period):
    stock = yf.Ticker(ticker)
    
    try:
        print("Get Data for : ", ticker)
        # Get historical closing price data
        df = stock.history(period=period)[['Open','Close','High','Low','Volume']]
    
        # Wait 2 seconds
        time.sleep(2)
        
        # Remove the period for saving the file name
        # Save data to a CSV file
        # File to save to 
        the_file = folder + ticker.replace(".", "_") + '.csv'
        print(the_file, " Saved")
        df.to_csv(the_file)
    except Exception as ex:
        stocks_not_found.append(ticker)
        print("Couldn't Get Data for :", ticker)

Function that returns a stock dataframe from CSV

In [56]:
# Reads a dataframe from the CSV file, changes index to date and returns it
def get_stock_df_from_csv(ticker):
    
    # Try to get the file and if it doesn't exist issue a warning
    try:
        df = pd.read_csv(PATH + ticker + '.csv', index_col=0)
    except FileNotFoundError:
        print("File Doesn't Exist")
    else:
        return df

Get Column Name from CSV

In [57]:
def get_column_from_csv(file, col_name):
    # Try to get the file and if it doesn't exist issue a warning
    try:
        df = pd.read_csv(file)
    except FileNotFoundError:
        print("File Doesn't Exist")
    else:
        return df[col_name]

Get Stock Sectors

In [7]:
tickers = get_column_from_csv("/Users/oppoudel/dev/Python4Finance/Final/Portfolio/stock_sectors.csv", 'Symbol')


Download all Sector stocks

In [8]:
for ticker in ['XOM']:
    save_to_csv_from_yahoo(PATH, ticker, "max")

Get Data for :  XOM
/Users/oppoudel/dev/Python4Finance/Final/Machine_Learning/Stocks/XOM.csv  Saved


In [9]:
stocks_not_found

[]

Get Daily Returns

In [51]:
def add_daily_return_to_df(df, ticker):
    df['daily_return'] = (df['Close'] / df['Close'].shift(1)) - 1
    # Save data to a CSV file
    #df.to_csv(PATH + ticker + '.csv')
    return df 

Get Cumulative Returns

In [52]:
def add_cum_return_to_df(df, ticker):
    df['cum_return'] = (1 + df['daily_return']).cumprod()
    #df.to_csv(PATH + ticker + '.csv')
    return df

Get Stock names in List

In [53]:
files = [x for x in listdir(PATH) if isfile(join(PATH, x))]
tickers = [os.path.splitext(x)[0] for x in files]
tickers
# tickers.remove('.ds_Store')
tickers.sort()
len(tickers)

505

Add Daily and Cumulative Returns to all stocks

In [54]:
for x in tickers:
    try:
        print("Working on :", x)
        new_df = get_stock_df_from_csv(x)
        new_df = add_daily_return_to_df(new_df, x)
        new_df = add_cum_return_to_df(new_df, x)
        new_df.to_csv(PATH + x + '.csv')
    except Exception as ex:
        print(ex)

Working on : A
Working on : AAL
Working on : AAP
Working on : AAPL
Working on : ABBV
Working on : ABC
Working on : ABMD
Working on : ABT
Working on : ACN
Working on : ADBE
Working on : ADI
Working on : ADM
Working on : ADP
Working on : ADSK
Working on : AEE
Working on : AEP
Working on : AES
Working on : AFL
Working on : AIG
Working on : AIZ
Working on : AJG
Working on : AKAM
Working on : ALB
Working on : ALGN
Working on : ALK
Working on : ALL
Working on : ALLE
Working on : AMAT
Working on : AMCR
Working on : AMD
Working on : AME
Working on : AMGN
Working on : AMP
Working on : AMT
Working on : AMZN
Working on : ANET
Working on : ANSS
Working on : ANTM
Working on : AON
Working on : AOS
Working on : APA
Working on : APD
Working on : APH
Working on : APTV
Working on : ARE
Working on : ATO
Working on : ATVI
Working on : AVB
Working on : AVGO
Working on : AVY
Working on : AWK
Working on : AXP
Working on : AZO
Working on : BA
Working on : BAC
Working on : BAX
Working on : BBWI
Working on : BB

Returns a Dataframe by Cumulative Returns for all stocks

In [40]:
def get_cum_ret_for_stocks(stock_df):
    tickers = []
    cum_rets = []

    for index, row in stock_df.iterrows():
        df = get_stock_df_from_csv(row['Symbol'])
        #print(row['Symbol'])
        if df is None:
            pass
        else:
            tickers.append(row['Symbol'])
            cum = df['cum_return'].iloc[-1]
            cum_rets.append(cum)
    return pd.DataFrame({'Ticker':tickers, 'CUM_RET':cum_rets})

Get Sector Stocks

In [15]:
sec_df = pd.read_csv("/Users/oppoudel/dev/Python4Finance/Original/stock_sectors.csv")
sec_df

indus_df = sec_df.loc[sec_df['Sector'] == "Industrials"]
health_df = sec_df.loc[sec_df['Sector'] == "Health Care"]
it_df = sec_df.loc[sec_df['Sector'] == "Information Technology"]
comm_df = sec_df.loc[sec_df['Sector'] == "Communication Services"]
staple_df = sec_df.loc[sec_df['Sector'] == "Consumer Staples"]
discretion_df = sec_df.loc[sec_df['Sector'] == "Consumer Discretionary"]
utility_df = sec_df.loc[sec_df['Sector'] == "Utilities"]
financial_df = sec_df.loc[sec_df['Sector'] == "Financials"]
material_df = sec_df.loc[sec_df['Sector'] == "Materials"]
restate_df = sec_df.loc[sec_df['Sector'] == "Real Estate"]
energy_df = sec_df.loc[sec_df['Sector'] == "Energy"]

In [16]:
industrial = get_cum_ret_for_stocks(indus_df)
health_care = get_cum_ret_for_stocks(health_df)
it = get_cum_ret_for_stocks(it_df)
commun = get_cum_ret_for_stocks(comm_df)
staple = get_cum_ret_for_stocks(staple_df)
discretion = get_cum_ret_for_stocks(discretion_df)
utility = get_cum_ret_for_stocks(utility_df)
finance = get_cum_ret_for_stocks(financial_df)
material = get_cum_ret_for_stocks(material_df)
restate = get_cum_ret_for_stocks(restate_df)
energy = get_cum_ret_for_stocks(energy_df)

File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist
File Doesn't Exist


Top Insdustrial

In [17]:
industrial.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
44,NOC,1.330286
53,RHI,1.311963
26,GD,1.310576
51,RTX,1.277219
52,RSG,1.262516
50,PWR,1.237402
40,LMT,1.216785
38,LHX,1.196104
69,GWW,1.19558
71,WM,1.180671


Top Healthcare

In [18]:
health_care.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
40,MCK,1.636247
27,LLY,1.498041
1,ABBV,1.451121
50,REGN,1.408153
21,CVS,1.367883
7,ANTM,1.331239
5,ABC,1.324748
57,UNH,1.318858
17,CNC,1.291265
48,PFE,1.289476


Top IT

In [19]:
it.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
26,FTNT,1.508178
9,ANET,1.478032
27,IT,1.469963
36,JNPR,1.427078
51,PAYX,1.355458
12,AVGO,1.295233
48,NVDA,1.279287
7,AAPL,1.21167
45,MSI,1.199296
35,JKHY,1.176754


Top Communications

In [20]:
commun.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
14,LYV,1.296405
13,IPG,1.216622
1,GOOGL,1.04034
2,GOOG,1.033248
19,OMC,1.009395
11,FOXA,1.007053
20,TMUS,0.964821
12,FOX,0.958848
24,VZ,0.949583
9,EA,0.899408


Top Staple

In [21]:
staple.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
0,ADM,1.587009
17,KR,1.555924
9,COST,1.542538
27,HSY,1.415387
1,MO,1.258797
5,KO,1.235043
28,TSN,1.232796
25,PG,1.232437
11,GIS,1.221569
23,PEP,1.213277


Top Discretion

In [22]:
discretion.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
3,AZO,1.468033
15,DLTR,1.449377
53,TSLA,1.377913
42,ORLY,1.328871
20,F,1.250766
56,ULTA,1.237699
34,MAR,1.209827
27,HLT,1.203611
52,TGT,1.190167
14,DG,1.177016


Top Utility

In [23]:
utility.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
16,EXC,1.568475
6,CNP,1.353175
17,FE,1.305197
8,ED,1.295497
24,SRE,1.267874
12,EIX,1.263874
19,NI,1.242026
23,PEG,1.212467
25,SO,1.210513
10,DTE,1.201015


Top Finance

In [24]:
finance.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
5,AON,1.403686
36,MMC,1.356777
6,AJG,1.308049
3,AIG,1.30719
60,WRB,1.286784
15,CINF,1.276564
2,AXP,1.264698
14,CB,1.238543
47,PRU,1.22738
7,AIZ,1.217675


Top Material

In [25]:
material.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
20,NUE,2.102416
6,CF,2.090792
25,MOS,1.983916
23,SEE,1.368777
1,ALB,1.254868
7,CTVA,1.200124
19,NEM,1.172953
12,FMC,1.165792
21,PKG,1.164308
13,FCX,1.158972


Top Realstate

In [26]:
restate.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
18,PLD,1.496808
19,PSA,1.48945
15,IRM,1.48092
11,EXR,1.479974
17,MAA,1.362843
7,DRE,1.347328
2,AVB,1.329524
27,WELL,1.296571
16,KIM,1.28826
24,UDR,1.278583


Top Energy

In [27]:
energy.sort_values(by=['CUM_RET'], ascending=False).head(10)

Unnamed: 0,Ticker,CUM_RET
4,DVN,2.888556
11,MRO,2.377141
14,OXY,2.304903
0,APA,2.262716
8,HAL,1.996817
3,COP,1.99448
6,EOG,1.7988
5,FANG,1.742627
17,PXD,1.702759
12,MPC,1.702601
