# Objective and key steps

Input:
A table that conatains the historical pricing data

Output:
A table that still contains the historical pricing data but with additional columns/variables for price percentage change over different time windows 

Key steps:

1. Data import -  the pricing data through Yahoo api and light-touch cleanse
2. Feature creation - 1. percentage change on closing price for one and 2.multiple time windows


In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
import datetime
import ticker_data_prep

2021-01-21 16:19:52,529 - data being cooked


# Data import

In [2]:
#input for data download
tkr='TSLA'
timespan=1800

In [3]:
#use yfinance to download ticker data, interval - daily
today=datetime.date.today()
start_date=(today-datetime.timedelta(days=timespan)).strftime('%Y-%m-%d')
enddate=today.strftime('%Y-%m-%d')
data=yf.download(tkr,start_date,enddate)
data.head(2)

2021-01-21 16:19:54,300 - NumExpr defaulting to 8 threads.


[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-02-17,31.799999,33.868,31.336,33.736,33.736,29126000
2016-02-18,34.484001,34.59,32.953999,33.354,33.354,19438000


Light touch data cleansing

In [4]:
df_stock=ticker_data_prep.tkr_data_main(ticker=tkr,timescope=timespan,datatype='daily')
df_stock.head(2)

2021-01-21 16:19:54,560 - Getting data for TSLA


[*********************100%***********************]  1 of 1 completed

2021-01-21 16:19:54,683 - Column names updated with ticker identifier
2021-01-21 16:19:54,683 - Exporting stock daily data for TSLA
2021-01-21 16:19:54,701 - row count is 1241
2021-01-21 16:19:54,702 - column count is 7
2021-01-21 16:19:54,702 - Data has been exported to raw_stock_ticker_data/equity_TSLA_daily.csv 



                 Open    High     Low   Close  Adj Close    Volume ticker
Date                                                                     
2016-02-17  31.799999  33.868  31.336  33.736     33.736  29126000   TSLA


Unnamed: 0_level_0,tsla_open,tsla_high,tsla_low,tsla_close,tsla_adj_close,tsla_volume,tsla_ticker
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-02-17,31.799999,33.868,31.336,33.736,33.736,29126000.0,TSLA
2016-02-18,34.484001,34.59,32.953999,33.354,33.354,19438000.0,TSLA


In [5]:
#get the column names for open, high,low and close price
for c in df_stock.columns:
    if 'open' in c:
        colnm_open=c
    if 'high' in c:
        colnm_high=c
    if 'low' in c:
        colnm_low=c
    if 'close' in c:
        colnm_close=c

# Feature creation

percentage change for one time window

In [6]:
window=1 
colnm_pct_chg='pct_chg'+'_'+str(window)
df_stock[colnm_pct_chg]=df_stock[colnm_close].pct_change(window)

In [7]:
df_stock[colnm_pct_chg]

Date
2016-02-17         NaN
2016-02-18   -0.011323
2016-02-19   -0.001139
2016-02-22    0.066995
2016-02-23   -0.002982
                ...   
2021-01-13    0.005851
2021-01-14   -0.011013
2021-01-15   -0.022296
2021-01-19    0.022260
2021-01-20    0.006986
Name: pct_chg_1, Length: 1241, dtype: float64

In [8]:
df_stock.head(5)

Unnamed: 0_level_0,tsla_open,tsla_high,tsla_low,tsla_close,tsla_adj_close,tsla_volume,tsla_ticker,pct_chg_1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2016-02-17,31.799999,33.868,31.336,33.736,33.736,29126000.0,TSLA,
2016-02-18,34.484001,34.59,32.953999,33.354,33.354,19438000.0,TSLA,-0.011323
2016-02-19,32.731998,33.498001,32.5,33.316002,33.316002,14797000.0,TSLA,-0.001139
2016-02-22,34.023998,35.782001,33.970001,35.548,35.548,25300500.0,TSLA,0.066995
2016-02-23,35.231998,36.346001,34.736,35.442001,35.442001,29922000.0,TSLA,-0.002982


percentage change for multiple time windows

In [11]:
windows=[i for i in range (1,61)]
for w in windows:
    colnm_pct_chg='pct_chg'+'_'+str(w)
    df_stock[colnm_pct_chg]=df_stock[colnm_close].pct_change(w)
df_stock.head(5)

Unnamed: 0_level_0,tsla_open,tsla_high,tsla_low,tsla_close,tsla_adj_close,tsla_volume,tsla_ticker,pct_chg_1,pct_chg_2,pct_chg_3,...,pct_chg_51,pct_chg_52,pct_chg_53,pct_chg_54,pct_chg_55,pct_chg_56,pct_chg_57,pct_chg_58,pct_chg_59,pct_chg_60
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-02-11,30.4,32.652,29.4,30.094,30.094,71262000.0,TSLA,,,,...,,,,,,,,,,
2016-02-12,31.0,31.402,28.74,30.208,30.208,36179000.0,TSLA,0.003788,,,...,,,,,,,,,,
2016-02-16,31.74,32.59,30.822001,31.034,31.034,27969000.0,TSLA,0.027344,0.031235,,...,,,,,,,,,,
2016-02-17,31.799999,33.868,31.336,33.736,33.736,29126000.0,TSLA,0.087066,0.11679,0.121021,...,,,,,,,,,,
2016-02-18,34.484001,34.59,32.953999,33.354,33.354,19438000.0,TSLA,-0.011323,0.074757,0.104145,...,,,,,,,,,,


In [12]:
df_stock[list(filter(lambda x:'pct_chg' in x,df_stock.columns))].describe()

Unnamed: 0,pct_chg_1,pct_chg_2,pct_chg_3,pct_chg_4,pct_chg_5,pct_chg_6,pct_chg_7,pct_chg_8,pct_chg_9,pct_chg_10,...,pct_chg_51,pct_chg_52,pct_chg_53,pct_chg_54,pct_chg_55,pct_chg_56,pct_chg_57,pct_chg_58,pct_chg_59,pct_chg_60
count,1240.0,1239.0,1238.0,1237.0,1236.0,1235.0,1234.0,1233.0,1232.0,1231.0,...,1190.0,1189.0,1188.0,1187.0,1186.0,1185.0,1184.0,1183.0,1182.0,1181.0
mean,0.003357,0.006745,0.01017,0.013574,0.017033,0.02041,0.023633,0.026891,0.030172,0.033376,...,0.16158,0.164508,0.167568,0.170629,0.173874,0.177099,0.180246,0.183566,0.18697,0.190396
std,0.036515,0.052285,0.064935,0.076995,0.087038,0.095736,0.103218,0.11068,0.118156,0.124896,...,0.349234,0.353479,0.358094,0.362587,0.367683,0.372784,0.377027,0.382113,0.387273,0.392626
min,-0.210628,-0.212982,-0.339175,-0.355597,-0.430459,-0.440255,-0.405888,-0.486524,-0.501449,-0.518052,...,-0.349221,-0.346968,-0.350876,-0.361429,-0.382777,-0.380641,-0.368401,-0.397493,-0.388865,-0.404058
25%,-0.013859,-0.019188,-0.023508,-0.026906,-0.030122,-0.0314,-0.034051,-0.035656,-0.042599,-0.046897,...,-0.087087,-0.083716,-0.084049,-0.080957,-0.084238,-0.084115,-0.083758,-0.086125,-0.083759,-0.084638
50%,0.00159,0.005186,0.007439,0.009503,0.01248,0.014343,0.015912,0.018929,0.020733,0.022838,...,0.057041,0.062454,0.061347,0.067169,0.066121,0.065834,0.069297,0.071038,0.072167,0.076382
75%,0.019496,0.031308,0.040912,0.04935,0.055416,0.063301,0.070383,0.075073,0.085753,0.090188,...,0.284105,0.291125,0.286978,0.296163,0.300125,0.302616,0.313215,0.313583,0.313739,0.314771
max,0.198949,0.363512,0.384279,0.526808,0.564756,0.589656,0.570518,0.605893,0.639734,0.658833,...,1.754767,1.728792,1.73963,1.780566,1.769235,1.789189,1.734434,1.837744,1.700607,1.813269
