In [1]:
import pandas as pd
import pandas_datareader.data as web
import io
import requests
import time
import datetime

In [2]:
#function to get stock data
def yahoo_stocks(symbol, start, end):
    return web.DataReader(symbol, 'yahoo', start, end)

In [3]:
#get 7 year stock data for Apple
startDate = datetime.datetime(2010, 1, 4)
endDate = datetime.date.today()
stockData = yahoo_stocks('AAPL', startDate, endDate)

In [4]:
#adding rows for missing dates
def add_missing_dates(dataframe, start, end):
    idx = pd.date_range(start, end)
    dataframe.index = pd.DatetimeIndex(dataframe.index)
    dataframe = dataframe.reindex(idx, fill_value='np.nan')
    return dataframe

In [5]:
stockData = add_missing_dates(stockData, startDate, endDate)

In [6]:
stockData.head(10)

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
2010-01-04,30.49,30.6429,30.34,30.5729,27.5051,123432400
2010-01-05,30.6571,30.7986,30.4643,30.6257,27.5526,150476200
2010-01-06,30.6257,30.7471,30.1071,30.1386,27.1143,138040000
2010-01-07,30.25,30.2857,29.8643,30.0829,27.0642,119282800
2010-01-08,30.0429,30.2857,29.8657,30.2829,27.2442,111902700
2010-01-09,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan
2010-01-10,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan
2010-01-11,30.4,30.4286,29.7786,30.0157,27.0038,115557400
2010-01-12,29.8843,29.9671,29.4886,29.6743,26.6967,148614900
2010-01-13,29.6957,30.1329,29.1571,30.0929,27.0732,151473000


In [7]:
#convert the columns to numeric
def convert_to_numeric(dataframe):
    for col in dataframe:
        dataframe[col] = pd.to_numeric(dataframe[col], errors='coerce')
    return dataframe

In [8]:
stockDataNumeric = convert_to_numeric(stockData)

In [9]:
def interpolate(dataframe):
    features = list(dataframe)
    for feature in features:
        dataframe[feature] = dataframe[feature].interpolate()
    return dataframe

In [10]:
stockDataInterpolated = interpolate(stockDataNumeric)

In [11]:
stockDataInterpolated.head(10)

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
2010-01-04,30.49,30.642857,30.34,30.572857,27.505054,123432400.0
2010-01-05,30.657143,30.798571,30.464285,30.625713,27.552608,150476200.0
2010-01-06,30.625713,30.747143,30.107143,30.138571,27.114347,138040000.0
2010-01-07,30.25,30.285715,29.864286,30.082857,27.064222,119282800.0
2010-01-08,30.042856,30.285715,29.865715,30.282858,27.244156,111902700.0
2010-01-09,30.161904,30.333334,29.836667,30.19381,27.164044,113120900.0
2010-01-10,30.280952,30.380953,29.80762,30.104763,27.083932,114339200.0
2010-01-11,30.4,30.428572,29.778572,30.015715,27.00382,115557400.0
2010-01-12,29.884285,29.967142,29.488571,29.674286,26.69665,148614900.0
2010-01-13,29.695715,30.132856,29.157143,30.092857,27.073221,151473000.0


In [12]:
def prev_diff(dataframe):
    close = dataframe['Close']
    prev_diff = [0]
    for i in range(1, len(dataframe)):
        prev_diff.append(round((close[i]-close[i-1]),6))
    return prev_diff

In [13]:
stockDataInterpolated['prev_diff'] = prev_diff(stockDataInterpolated)

In [14]:
stockDataPrevAdd = stockDataInterpolated

In [15]:
from stockstats import StockDataFrame 

stockstats_df = StockDataFrame.retype(stockDataPrevAdd)

In [16]:
stockstats_df.head(5)

Unnamed: 0,open,high,low,close,adj close,volume,prev_diff
2010-01-04,30.49,30.642857,30.34,30.572857,27.505054,123432400.0,0.0
2010-01-05,30.657143,30.798571,30.464285,30.625713,27.552608,150476200.0,0.052856
2010-01-06,30.625713,30.747143,30.107143,30.138571,27.114347,138040000.0,-0.487142
2010-01-07,30.25,30.285715,29.864286,30.082857,27.064222,119282800.0,-0.055714
2010-01-08,30.042856,30.285715,29.865715,30.282858,27.244156,111902700.0,0.200001


In [17]:
stockstats_df['close_50_sma']

2010-01-04     30.572857
2010-01-05     30.599285
2010-01-06     30.445714
2010-01-07     30.355000
2010-01-08     30.340571
2010-01-09     30.316111
2010-01-10     30.285918
2010-01-11     30.252143
2010-01-12     30.187937
2010-01-13     30.178429
2010-01-14     30.154805
2010-01-15     30.093452
2010-01-16     30.066566
2010-01-17     30.066760
2010-01-18     30.088619
2010-01-19     30.128080
2010-01-20     30.135084
2010-01-21     30.112262
2010-01-22     30.014248
2010-01-23     29.938702
2010-01-24     29.882415
2010-01-25     29.842760
2010-01-26     29.824379
2010-01-27     29.819077
2010-01-28     29.765114
2010-01-29     29.675577
2010-01-30     29.597381
2010-01-31     29.529311
2010-02-01     29.470320
2010-02-02     29.420643
                 ...    
2017-10-10    157.954100
2017-10-11    157.889500
2017-10-12    157.809900
2017-10-13    157.764300
2017-10-14    157.726167
2017-10-15    157.696567
2017-10-16    157.675500
2017-10-17    157.655500
2017-10-18    157.592500


In [19]:
stockstats_df.head(5)

Unnamed: 0,open,high,low,close,adj close,volume,prev_diff,close_50_sma
2010-01-04,30.49,30.642857,30.34,30.572857,27.505054,123432400.0,0.0,30.572857
2010-01-05,30.657143,30.798571,30.464285,30.625713,27.552608,150476200.0,0.052856,30.599285
2010-01-06,30.625713,30.747143,30.107143,30.138571,27.114347,138040000.0,-0.487142,30.445714
2010-01-07,30.25,30.285715,29.864286,30.082857,27.064222,119282800.0,-0.055714,30.355
2010-01-08,30.042856,30.285715,29.865715,30.282858,27.244156,111902700.0,0.200001,30.340571


In [20]:
import math

def ten_day_volatility(dataframe):
    volatility = dataframe['close'].rolling(window=10,center=False).std(ddof=0)
    # daily_pct_change = stockstats_df['close'] / stockstats_df['close'].shift(1) - 1
    # volatility2 = daily_pct_change.rolling(window=10,center=False).std(ddof=0) * math.sqrt(10)
    top = dataframe[0:9]
    top_vol = top['close'].rolling(window=2,center=False).std(ddof=0)
    top_vol[0] = 0
    volatility[0:9] = top_vol
    return volatility

In [21]:
volatility = ten_day_volatility(stockstats_df)
stockstats_df['10_day_volatility'] = volatility

In [22]:
stockstats_df.head(5)

Unnamed: 0,open,high,low,close,adj close,volume,prev_diff,close_50_sma,10_day_volatility
2010-01-04,30.49,30.642857,30.34,30.572857,27.505054,123432400.0,0.0,30.572857,0.0
2010-01-05,30.657143,30.798571,30.464285,30.625713,27.552608,150476200.0,0.052856,30.599285,0.026428
2010-01-06,30.625713,30.747143,30.107143,30.138571,27.114347,138040000.0,-0.487142,30.445714,0.243571
2010-01-07,30.25,30.285715,29.864286,30.082857,27.064222,119282800.0,-0.055714,30.355,0.027857
2010-01-08,30.042856,30.285715,29.865715,30.282858,27.244156,111902700.0,0.200001,30.340571,0.100001


In [25]:
x = pd.DataFrame(stockstats_df)

In [27]:
type(x)

pandas.core.frame.DataFrame