In [1]:
import requests as http
from io import StringIO
from datetime import datetime
from datetime import timedelta

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px

from av_lib import *

In [2]:
api_key = None
with open('api-key.txt') as file:
    api_key = file.read()

In [3]:
symbol = Symbol('AAPL', 'Apple Inc.', 'USA')

In [4]:
url = daily_adjusted_query(api_key, symbol=symbol.tick(), output_size='full')
data = fetch_data(url)

In [5]:
data = StringIO(data)
data = pd.read_csv(data)
data

Unnamed: 0,timestamp,open,high,low,close,adjusted_close,volume,dividend_amount,split_coefficient
0,2022-12-09,142.340,145.5700,140.90,142.16,142.160000,76097011,0.0,1.0
1,2022-12-08,142.360,143.5200,141.10,142.65,142.650000,62128338,0.0,1.0
2,2022-12-07,142.190,143.3700,140.00,140.94,140.940000,69721094,0.0,1.0
3,2022-12-06,147.075,147.3000,141.92,142.91,142.910000,64727186,0.0,1.0
4,2022-12-05,147.770,150.9199,145.77,146.63,146.630000,68826442,0.0,1.0
...,...,...,...,...,...,...,...,...,...
5811,1999-11-05,84.620,88.3700,84.00,88.31,0.672034,3721500,0.0,1.0
5812,1999-11-04,82.060,85.3700,80.62,83.62,0.636344,3384700,0.0,1.0
5813,1999-11-03,81.620,83.2500,81.00,81.50,0.620210,2932700,0.0,1.0
5814,1999-11-02,78.000,81.6900,77.31,80.25,0.610698,3564600,0.0,1.0


In [6]:
data['timestamp'] = pd.to_datetime(data['timestamp'])
#data.set_index('timestamp', inplace=True)
data.head()

Unnamed: 0,timestamp,open,high,low,close,adjusted_close,volume,dividend_amount,split_coefficient
0,2022-12-09,142.34,145.57,140.9,142.16,142.16,76097011,0.0,1.0
1,2022-12-08,142.36,143.52,141.1,142.65,142.65,62128338,0.0,1.0
2,2022-12-07,142.19,143.37,140.0,140.94,140.94,69721094,0.0,1.0
3,2022-12-06,147.075,147.3,141.92,142.91,142.91,64727186,0.0,1.0
4,2022-12-05,147.77,150.9199,145.77,146.63,146.63,68826442,0.0,1.0


In [7]:
adj_close = px.line(
    data_frame=data, 
    x='timestamp', 
    y='adjusted_close', 
    title=f'Adjusted Stock Price - {symbol.tick()}',
    labels={
        'timestamp': 'Date',
        'adjusted_close': 'Price'
    },
    log_y=True
)
adj_close.show()

In [8]:
data['timedelta'] = (data['timestamp'] - datetime.today()).dt.days
data.head()

Unnamed: 0,timestamp,open,high,low,close,adjusted_close,volume,dividend_amount,split_coefficient,timedelta
0,2022-12-09,142.34,145.57,140.9,142.16,142.16,76097011,0.0,1.0,-4
1,2022-12-08,142.36,143.52,141.1,142.65,142.65,62128338,0.0,1.0,-5
2,2022-12-07,142.19,143.37,140.0,140.94,140.94,69721094,0.0,1.0,-6
3,2022-12-06,147.075,147.3,141.92,142.91,142.91,64727186,0.0,1.0,-7
4,2022-12-05,147.77,150.9199,145.77,146.63,146.63,68826442,0.0,1.0,-8


In [9]:
adj_close = px.line(
    data_frame=data, 
    x='timedelta', 
    y='adjusted_close', 
    title=f'Adjusted Stock Price - {symbol.tick()}',
    labels={
        'timedelta': 'Days Before',
        'adjusted_close': 'Price'
    },
    log_y=True
)
adj_close.show()

In [10]:
import statsmodels.formula.api as smf

model = smf.ols(data=data, formula='np.log(adjusted_close) ~ timedelta').fit()
model.summary()

0,1,2,3
Dep. Variable:,np.log(adjusted_close),R-squared:,0.947
Model:,OLS,Adj. R-squared:,0.947
Method:,Least Squares,F-statistic:,104300.0
Date:,"Mon, 12 Dec 2022",Prob (F-statistic):,0.0
Time:,17:37:13,Log-Likelihood:,-3668.8
No. Observations:,5816,AIC:,7342.0
Df Residuals:,5814,BIC:,7355.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,5.3004,0.012,444.400,0.000,5.277,5.324
timedelta,0.0008,2.45e-06,322.910,0.000,0.001,0.001

0,1,2,3
Omnibus:,34.679,Durbin-Watson:,0.003
Prob(Omnibus):,0.0,Jarque-Bera (JB):,40.257
Skew:,-0.129,Prob(JB):,1.81e-09
Kurtosis:,3.315,Cond. No.,9750.0


In [11]:
to_delta_days = np.vectorize(lambda x: (x - datetime.today()).dt.days)

In [12]:
next_5y = pd.date_range(start=datetime.today(), end=datetime.today() + timedelta(days=360*5), freq=timedelta(days=1)).to_series()
today = datetime.today()
delta_days = (next_5y - today).dt.days
proj_data = pd.DataFrame()
proj_data['timestamp'] = next_5y
proj_data['timedelta'] = delta_days
proj_data

Unnamed: 0,timestamp,timedelta
2022-12-12 17:37:13.635728,2022-12-12 17:37:13.635728,-1
2022-12-13 17:37:13.635728,2022-12-13 17:37:13.635728,0
2022-12-14 17:37:13.635728,2022-12-14 17:37:13.635728,1
2022-12-15 17:37:13.635728,2022-12-15 17:37:13.635728,2
2022-12-16 17:37:13.635728,2022-12-16 17:37:13.635728,3
...,...,...
2027-11-12 17:37:13.635728,2027-11-12 17:37:13.635728,1795
2027-11-13 17:37:13.635728,2027-11-13 17:37:13.635728,1796
2027-11-14 17:37:13.635728,2027-11-14 17:37:13.635728,1797
2027-11-15 17:37:13.635728,2027-11-15 17:37:13.635728,1798


In [13]:
data = pd.concat([data, proj_data])
data['prediction'] = np.exp(model.predict(data['timedelta']))
data.set_index('timestamp', inplace=True)
data.sort_index(inplace=True)
#data.drop_duplicates(inplace=True)
data

Unnamed: 0_level_0,open,high,low,close,adjusted_close,volume,dividend_amount,split_coefficient,timedelta,prediction
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1999-11-01 00:00:00.000000,80.00,80.69,77.37,77.62,0.590684,2487300.0,0.0,1.0,-8443,0.253645
1999-11-02 00:00:00.000000,78.00,81.69,77.31,80.25,0.610698,3564600.0,0.0,1.0,-8442,0.253846
1999-11-03 00:00:00.000000,81.62,83.25,81.00,81.50,0.620210,2932700.0,0.0,1.0,-8441,0.254047
1999-11-04 00:00:00.000000,82.06,85.37,80.62,83.62,0.636344,3384700.0,0.0,1.0,-8440,0.254247
1999-11-05 00:00:00.000000,84.62,88.37,84.00,88.31,0.672034,3721500.0,0.0,1.0,-8439,0.254448
...,...,...,...,...,...,...,...,...,...,...
2027-11-12 17:37:13.635728,,,,,,,,,1795,827.920812
2027-11-13 17:37:13.635728,,,,,,,,,1796,828.575348
2027-11-14 17:37:13.635728,,,,,,,,,1797,829.230401
2027-11-15 17:37:13.635728,,,,,,,,,1798,829.885972


In [16]:
px.line(data_frame=data, log_y=True)