### Code reads stock values from csv file
* Location of input files is set in get_filename
* Run Main method to get all values for ticker listed in it
* Values changes are in  %

In [1]:
# Dependencies
import csv
import os
import pandas as pd 

In [2]:
# Supporting functions
def get_filename(ticker):
    return os.path.join("..", "InputData", ticker + ".csv") # use relative location of the input files to the notebook

In [12]:
def read_ticker(file_name):
    # Input file into dataframe
    rawdata = pd.read_csv(file_name) 
    # Date into date type, date of week, week of year, Close value
    rawdata.Date = rawdata.Date.astype('datetime64[ns]')
    rawdata['Year.Week'] = [ td.year + (int(td.strftime("%W"))) / 100 for td in rawdata.Date] # YYYY.0W
    rawdata['Weekday'] = [ td.weekday() for td in rawdata.Date]
    rawdata['CloseValue'] = rawdata.Close
    
    # Pivot data
    return rawdata.pivot(index='Year.Week', columns='Weekday', values='CloseValue')

In [16]:
def get_ticker_data(ticker):
    # Read data from input file, calculate changes between two days
    middata = read_ticker(get_filename(ticker))
    middata.columns = ['Mo', 'Tu', 'We', 'Th', 'Fr'] # change column names, pandas does not like numerical values (?)
    delta1 = 'Th..Mo'
    delta2 = 'Mo..Th'
    middata['Tmp'] = middata.Th.shift(1) # move the column values one row down, it's used for cross week change calculation
    middata[delta1] = 100 * (middata.Mo - middata.Tmp) / middata.Tmp # relative change from last week Thurday to Monday
    middata[delta2] = 100 * (middata.Th - middata.Mo) /  middata.Mo # change from Monday to this week Thursday
    middata = middata.drop('Tmp', axis=1) # drop temporaty column

    # Clean input data
    cleandata = middata.dropna() # drop rows wiht n/a values; run this before columns reduction 
    cleandata = cleandata.loc[:, [delta1, delta2]] # only needed columns
    cleandata.insert(loc=0, column='Ticker', value=ticker)
    # Uncomment next two lines if you want get column with year value
    #cleandata.insert(loc=1, column='Year', value=cleandata.index)
    #cleandata.Year = cleandata.Year.astype(int) # get year number by column data type change
    return cleandata

In [27]:
# Main method, call it to extract data from input file into DataFrame
def main():
    # List of tickers
    tickers = ["XLB","XLC","XLE","XLF","XLI","XLK","XLP","XLRE", "XLU", "XLV", "XLY"]

    outputdf = pd.DataFrame()

    print("Running data extraction: ", end = '')
    for ticker in tickers:
        print(ticker, end = ' ')
        outputdf = outputdf.append(get_ticker_data(ticker))
    print(" ... Done.")

    return outputdf

In [28]:
# Run data extraction
# Returned value is df, you can save its value into file, or use it otherwise
dataout = main()

Running data extraction: XLBXLCXLEXLFXLIXLKXLPXLREXLUXLVXLY ... Done.


In [25]:
# Check basic stat
dataout.describe()

Unnamed: 0,Th..Mo,Mo..Th
count,7534.0,7534.0
mean,-0.032672,0.143921
std,1.963821,2.429792
min,-14.234365,-23.089889
25%,-0.882447,-1.025743
50%,0.087238,0.187795
75%,0.941647,1.347232
max,18.918925,30.031933
