In [1]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from matplotlib import rc, rcParams
f = mticker.ScalarFormatter(useMathText=True)

import scipy
import pandas as pd
import yfinance as yf
from yahoofinancials import YahooFinancials
from datetime import datetime as dt
from datetime import date, timedelta
import datetime
from matplotlib.dates import YearLocator, DateFormatter

import plotly.graph_objs as go
from plotly.offline import plot

#for offline plotting
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [2]:
###############
# LaTeX block #
###############

rc('text', usetex=True)
rc('font',**{'family':'serif','serif':['Palatino']})
rc('xtick', labelsize=18)
rc('ytick', labelsize=18)
rcParams['legend.numpoints'] = 1

In [3]:
def plot_cosmetics_single():
    
    ax = plt.gca()
    ax.tick_params(direction='in', axis='both', which='major', length=6.5, width=1.2, labelsize=18)
    ax.tick_params(direction='in', axis='both', which='minor', length=3., width=1.2, labelsize=18)
    ax.xaxis.set_tick_params(pad=7)
    ax.xaxis.labelpad = 5
    ax.yaxis.labelpad = 15
    
    
def plot_cosmetics_multi():    
    
    ax = plt.gca()
    ax.tick_params(direction='in', axis='both', which='major', length=6.5, width=1.2, labelsize=20)
    ax.tick_params(direction='in', axis='both', which='minor', length=3., width=1.2, labelsize=20)
    ax.xaxis.set_tick_params(pad=10)
    ax.xaxis.labelpad = 5
    ax.yaxis.labelpad = 10

## Use Ticker to plot directly from Yahoo Finance.

In [None]:
ticker = yf.Ticker('AAPL')
aapl_df_ticker = ticker.history(period="5y")
aapl_df_ticker['Close'].plot(title="APPLE's stock price")

<Axes: title={'center': "APPLE's stock price"}, xlabel='Date'>

## Get access to the data and also store them in arrays.
I want to obtain the same plot.

In [None]:
# download the series, not necessary for the next step
aapl_df = yf.download('AAPL',
                      start='2019-01-01', 
                      #end='2021-06-12', 
                      progress=False,
)
print(f'{aapl_df.head()}')



# store it as a pandas dataframe, to have it accessible as numpy arrays
asset_aapl = pd.DataFrame( yf.download('AAPL', 
                      start='2019-01-01', 
                      #end='2021-06-12', 
                      progress=False)
)
print(f'{asset_aapl.head()}')

In [None]:
closing_price_aapl = asset_aapl.loc[:, 'Close']          # access the the dataframe
asset_aapl['Date'] = pd.to_datetime(asset_aapl.index)    # store the date 
date_aapl = pd.to_datetime(asset_aapl.index)             # convert it to use it for computation, not essential

print(f'Data spanning {(asset_aapl.Date.max() - asset_aapl.Date.min()).days} days, from {asset_aapl.Date.min()} to {asset_aapl.Date.max()}.')


f, ax = plt.subplots(figsize = [6., 4.])
plot_cosmetics_single()

ax.set_title('APPLE`s Stock Price', fontsize=15)
ax.plot(asset_aapl['Date'], closing_price_aapl)
ax.xaxis.set_major_locator(YearLocator(1))
ax.xaxis.set_major_formatter(DateFormatter("%Y"))
ax.xaxis.set_tick_params(rotation=45)
ax.set_xlabel('Date', fontsize=15)

In [None]:
asset_aapl.info()
asset_aapl[['Open', 'High', 'Low', 'Close', 'Adj Close']].plot(kind='box', fontsize=15)

## I want to make the plot interactive.

In [None]:
iplot_layout = go.Layout(
    title = 'Stock Prices of Apple',
    xaxis = dict(
        title='Date',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    ),
    yaxis = dict(
        title='Price',
        titlefont=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
)

apple_data = [{ 'x' : asset_aapl['Date'], 'y' : asset_aapl['Close'] }]
plot = go.Figure(data=apple_data, layout=iplot_layout)


iplot(plot)

In [None]:
# build the regression model
from sklearn.model_selection import train_test_split

# preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

# model evaluation
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score

In [None]:
# the following line levels the multi-level column format downloaded from yfinance
asset_aapl = asset_aapl.reset_index(level=None, drop=True, inplace=False, col_level=0, col_fill='')

asset_aapl['Date'] = pd.to_datetime(asset_aapl['Date'])
X = np.array(asset_aapl.index).reshape(-1,1)
Y = asset_aapl['Close']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=101)

print(X.shape, Y.shape)

In [None]:
# check if the tickers are in one level
print(asset_aapl['Date'])

In [None]:
scaler = StandardScaler().fit(X_train)

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
# create a linear model
lm = LinearRegression()
lm.fit(X_train, Y_train)

In [None]:
# Plot actual and predicted values for train dataset
trace0 = go.Scatter(
    x = X_train.T[0],
    y = Y_train, 
    mode = 'markers',
    name = 'Actual'
)

trace1 = go.Scatter(
    x = X_train.T[0],
    y = lm.predict(X_train).T,
    mode = 'lines',
    name = 'Predicted'
)

apple_data = [trace0, trace1]
iplot_layout.xaxis.title.text = 'Day'
plot2 = go.Figure(data=apple_data, layout=iplot_layout)

In [None]:
iplot(plot2)