In [1]:
import numpy as np 
import pandas as pd
from pandas_datareader import data as web # Reads stock data 
import matplotlib.pyplot as plt # Plotting
import matplotlib.dates as mdates # Styling dates
%matplotlib inline
import os# Used to get data from a directory
from os import listdir
from os.path import isfile, join
import datetime as dt # For defining dates
import mplfinance as mpf # Matplotlib finance

import time

In [2]:
stocks_not_downloaded = []
missing_stocks = []

In [3]:
def save_to_csv_from_yahoo(folder, ticker, syear, smonth, sday, eyear, emonth, eday):
    # Defines the time periods to use
    start = dt.datetime(syear, smonth, sday)
    end = dt.datetime(eyear, emonth, eday)
    
    try:
        print("Get Data for : ", ticker)
        # Reads data into into a dataframe
        df = web.DataReader(ticker, 'yahoo', start, end)['Adj Close']
    
        # Wait 10 seconds
        time.sleep(10)
    
        # Save data to a CSV file
        df.to_csv(folder + ticker + '.csv')
    except Exception as ex:
        stocks_not_downloaded.append(ticker)
        print("Couldn't Get Data for :", ticker)

In [4]:
def get_stock_df_from_csv(folder, ticker):
    
    # Try to get the file and if it doesn't exist issue a warning
    try:
        df = pd.read_csv(folder + ticker + '.csv',index=False)
    except FileNotFoundError:
        print("File Doesn't Exist")
    else:
        return df

In [7]:
def get_column_from_csv(file, col_name):
    # Try to get the file and if it doesn't exist issue a warning
    try:
        df = pd.read_csv(file)
    except FileNotFoundError:
        print("File Doesn't Exist")
    else:
        return df[col_name]

In [8]:
#Test Receiving stock Ticker
ticker = get_column_from_csv('E:\\New folder\\data set\\Wilshire-5000-Stocks.csv','Ticker')
ticker

0          A
1         AA
2        AAL
3       AAME
4        AAN
        ... 
3476    ZUMZ
3477     ZUO
3478    ZYNE
3479    ZYXI
3480    ZNGA
Name: Ticker, Length: 3481, dtype: object

In [9]:
# # Folder used to store stock data
folder ='C:\\Users\\nafissaad\\Finance\\Stock List\\'

for x in range(20):
    save_to_csv_from_yahoo(folder, ticker[x], 2017, 1, 1, 2022,1,25)
print("Finished")

Get Data for :  A
Get Data for :  AA
Get Data for :  AAL
Get Data for :  AAME
Get Data for :  AAN
Get Data for :  AAOI
Get Data for :  AAON
Get Data for :  AAP
Get Data for :  AAPL
Get Data for :  AAT
Get Data for :  AAWW
Get Data for :  AAXN
Couldn't Get Data for : AAXN
Get Data for :  ABBV
Get Data for :  ABC
Get Data for :  ABCB
Get Data for :  ABEO
Get Data for :  ABG
Get Data for :  ABIO
Get Data for :  ABM
Get Data for :  ABMD
Finished


In [10]:
#Download Stocks in Blocks
for x in range(20,25):
        save_to_csv_from_yahoo(folder, ticker[x], 2017, 1, 1, 2022,1,25)
print("Finished")

Get Data for :  ABR
Get Data for :  ABT
Get Data for :  ABTX
Get Data for :  AC
Get Data for :  ACA
Finished


In [11]:
stocks_not_downloaded

['AAXN']

In [12]:
PATH='C:\\Users\\nafissaad\\Finance\\Stock List\\'
S_YEAR = 2017
S_MONTH = 1
S_DATE = 3
S_DATE_SART = f'{S_YEAR}-{S_MONTH}-{S_DATE}'
S_DATE_DATETIME = dt.datetime(S_YEAR,S_MONTH,S_DATE)
E_YEAR = 2021
E_MONTH = 8
E_DATE = 19
E_DATE_END = f'{E_YEAR}-{E_MONTH}-{E_DATE}'
S_DATE_DATETIME = dt.datetime(E_YEAR,E_MONTH,E_DATE)

In [13]:
#Get Stock File Names in a List
files = [x for x in listdir(PATH) if isfile(join(PATH,x))]
files

['A.csv',
 'AA.csv',
 'AAL.csv',
 'AAME.csv',
 'AAN.csv',
 'AAOI.csv',
 'AAON.csv',
 'AAP.csv',
 'AAPL.csv',
 'AAT.csv',
 'AAWW.csv',
 'ABBV.csv',
 'ABC.csv',
 'ABCB.csv',
 'ABEO.csv',
 'ABG.csv',
 'ABIO.csv',
 'ABM.csv',
 'ABMD.csv',
 'ABR.csv',
 'ABT.csv',
 'ABTX.csv',
 'AC.csv',
 'ACA.csv']

In [14]:
tickers = [os.path.splitext(x)[0] for x in files]
len(tickers)

24

In [15]:
#Create a Data Frame From Our List
stock_df = pd.DataFrame(tickers, columns=['Ticker'])
stock_df

Unnamed: 0,Ticker
0,A
1,AA
2,AAL
3,AAME
4,AAN
5,AAOI
6,AAON
7,AAP
8,AAPL
9,AAT


In [24]:
#Function that SaveS Dataframe to csv
def get_df_from_csv(ticker):
    
    # Try to get the file and if it doesn't exist issue a warning
    try:
        df = pd.read_csv(PATH + ticker + '.csv')
    except FileNotFoundError:
        print("File Doesn't Exist")
    else:
        return df

In [25]:
#Function that Saves Dataframe to CSV
def save_dataframe_to_csv(df, ticker):
    df.to_csv(PATH + ticker + '.csv',index=False)

In [26]:
#Delete Unnamed Columns in CSV Files
def delete_unnamed_cols(df):
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
    return df

In [27]:
#add Daily Return to Dataframe
def add_daily_return_to_df(df,ticker):
    df['daily_return']=(df['Adj Close']/df['Adj Close'].shift(1))-1
    df.to_csv(PATH +ticker +'.csv')
    return df

In [28]:
#Returns Return on Investment over Time
def get_roi_defined_time(df):
    df['Date'] =pd.to_datetime(df['Date'])
    start_val =df[df['Date']==S_DATE_START]['Adj Close'][0]
    end_val =df[df['Date']==E_DATE_END][0]['Adj Close'][0]
    print('Initial Price:',start_val)
    print('Final Price:',start_val)
    roi = (end_val - start_val)/start_val
    return roi

In [29]:
# get Coefficient of Variation
def get_cov(stock_df):
    mean = stock_df['Adj Close'].mean()
    sd=stock_df['Adj Close'].std()
    cov=sd/mean
    return cov

In [30]:
tickers[3]

'AAME'

In [31]:
stock_a = get_df_from_csv(tickers[3])
stock_a

Unnamed: 0,Date,Adj Close
0,2017-01-03,4.001373
1,2017-01-04,4.001373
2,2017-01-05,3.757387
3,2017-01-06,4.001373
4,2017-01-09,3.854982
...,...,...
1270,2022-01-19,2.500000
1271,2022-01-20,2.480000
1272,2022-01-21,2.280000
1273,2022-01-24,2.310000


In [32]:
add_daily_return_to_df(stock_a, tickers[3])
stock_a

Unnamed: 0,Date,Adj Close,daily_return
0,2017-01-03,4.001373,
1,2017-01-04,4.001373,0.000000
2,2017-01-05,3.757387,-0.060976
3,2017-01-06,4.001373,0.064935
4,2017-01-09,3.854982,-0.036585
...,...,...,...
1270,2022-01-19,2.500000,-0.003984
1271,2022-01-20,2.480000,-0.008000
1272,2022-01-21,2.280000,-0.080645
1273,2022-01-24,2.310000,0.013158


In [33]:
save_dataframe_to_csv(stock_a,tickers[3])

In [34]:

# Cycle through all tickers
for ticker in tickers:
    print("Working on :", ticker)
    
    # Get a dataframe for that ticker
    stock_df = get_df_from_csv(ticker)
    
    # Add daily return to this dataframe
    add_daily_return_to_df(stock_df, ticker)
    
    # Delete unnamed columns in dataframe
    stock_df = delete_unnamed_cols(stock_df)
    
    # Save cleaned dataframe to csv
    save_dataframe_to_csv(stock_df, ticker)

Working on : A
Working on : AA
Working on : AAL
Working on : AAME
Working on : AAN
Working on : AAOI
Working on : AAON
Working on : AAP
Working on : AAPL
Working on : AAT
Working on : AAWW
Working on : ABBV
Working on : ABC
Working on : ABCB
Working on : ABEO
Working on : ABG
Working on : ABIO
Working on : ABM
Working on : ABMD
Working on : ABR
Working on : ABT
Working on : ABTX
Working on : AC
Working on : ACA


KeyError: 'Ticker'