In [None]:
from datetime import datetime
import numpy as np
import pandas as pd
import pandas_datareader as web
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA as sklearnPCA
# !pip install arch
from arch import arch_model

# !pip install git+https://github.com/RJT1990/pyflux
# import pyflux as pf

plt.style.use('Solarize_Light2')

# Model evaluation = Visual check, log liklihood

In [None]:
""" Allows web scraping """
ticker_list = ["AMT", "AZRE", "CAT", "COST", "GOOD", "HON", "ILMN", "NVTA", "NEE", "PLUG", "PLD", "UNP", "VRTX", "Z"]
stockStartDate = '2016-01-01'
today = datetime.today().strftime('%Y-%m-%d')
numAssets = len(ticker_list)


def getMyPortfolio(stocks, start, end, col):
    data = web.DataReader(stocks, data_source='yahoo', start=start, end=end)[col]
    ticker_df = pd.DataFrame(data)
    ticker_df.describe()             # Mean, std, min, 25%, 50%, 75%, max
    ticker_df.pct_change().cov()     # Covariance matrix

    return


getMyPortfolio(ticker_list, stockStartDate, today, 'Adj Close')

In [None]:
""" Read from csv, plot, write to csv """
labels_list = ['Asset 1', 'Asset 2', 'Asset 3']

data_df = pd.read_csv('Individual_return-1.csv')
data_pct = data_df.pct_change()
data_pct.to_csv('SP500_pct.csv')
print(data_df)
plt.figure(figsize=(12, 7))
plt.title('Plotted csv data')
plt.ylabel('Close Price USD')
plt.xlabel('Time Period')
plt.plot(data_df, lw=1.5)
plt.show()

In [None]:
""" Autocorrelation - correlation of a signal with a delayed copy of 
itself as a function of delay. The similarity between observations as a 
function of the time lag between them.
Partial autocorrelation is the autocorrelation between yt and yt–h after the 
removal of any linear dependence on y1, y2, ..., yt–h+1
""" 

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

series = pd.read_csv('PYPL.csv')
plot_pacf(series)
plot_acf(series)

In [None]:
""" ARMA - Autoregressive–moving-average model.
AR - Attempts to explain mean reversion behaviour, and takes param 'P'
MA - Attempts to capture shock movements within white noise fluctuations
and takes param 'Q'
 """

# !pip install git+https://github.com/statsmodels/statsmodels
from statsmodels.tsa.arima.model import ARIMA

series = pd.read_csv('PYPL.csv')          # Must be single column pandas series, not dataframe 
sq_price = series.pct_change() ** 2

train = price.iloc[1:3000]
test = price.iloc[3001:]

model = ARIMA(train, order=(0, 0, 1), )         # Order = (p, d, q), for all ARIMA-type models
model_fit = model.fit()
print(model_fit.summary())

plt.figure(figsize=(10, 4))
plt.plot(sq_price)
plt.title("Squared Returns less Resid. ", fontsize=20)

residuals = model_fit.resid
plot_2 = price - residuals
plt.plot(plot_2)
plt.show()


In [None]:
""" GARCH  - Estimates generalized autoregressive conditional 
heteroskedasticity. It is an ARCH model used to estimate volatility, using a 
moving average. This requires a clear variance in volatility, or high levels of 
volatility clustering. """

data = web.DataReader('JPM', 'yahoo', stockStartDate, today)    # Replace with real data
returns = pd.DataFrame(data['Adj Close']).pct_change()
data = returns.iloc[2:]                                         # Remove first NaN

am = arch_model(data, p=1, q=1)                                 # GARCH(p,q)
res = am.fit(update_freq=0)                                     # Iterative
res.summary()
# fig = res.plot(annualize="D")                                 # Daily weekly or monthy

In [None]:
""" 
Solve a PCA  - Principal component analysis, has 3 steps:
1 - Standardization = z = (value -mean)/ standard deviation 
2 - Covariance matrix computation 
3 - Eigenveectors, eigenvalues and as such; principal components

In notes: 
- Given a covariance matrix apply PCA to obtain the eigenvalues, 
principal components, and the transition matrix.
- Given a time series, knows how to apply PCA

This code tackles step 3, and takes a DataFrame of returns.
 """

data_df = pd.read_csv('Individual_return-1.csv')

X = data_df
pca = sklearnPCA(n_components=3)                                  # Components variable
model = pca.fit(X)
print(f'Variance Ratio: {model.explained_variance_ratio_}')       # Ratio of principal component (not necessary)
print(f'Raw Value Variances: \n{model.explained_variance_}')      # Raw value variances
print(f'Transition Matrix: \n{model.components_.T}')              # Transition matrix (coeffs)

scores = pca.score_samples(X)
print(f'Principal Components: \n{scores}')                        # Prinipal Components
 