In [None]:
"""
This is William Franks reference sheet for Algo Trading at Northeastern
Below will include relevant functions and operations needed for the class

The information will be sorted by cells with the title at the top
"""

In [46]:
# Imports

import numpy as np
import statistics
import requests
import csv

from scipy.stats import skew, kurtosis
from scipy import stats

import pandas as pd

from statsmodels.tsa.stattools import adfuller

In [40]:
# Mean, Variance, Skew, Kurtosis

data = [1, 5, -3, 8, 2]
data_df = pd.DataFrame()
data_df['Data'] = data

np.mean(data) # mean

statistics.variance(data) # Sample Variance
statistics.pvariance(data) # Population Variance

skew(data, bias=False) # Scipy Skew, bias=False means it IS corrected for bias
data_df['Data'].skew() # Pandas Skew

kurtosis(data, bias=False, fisher=False) # Scipy Kurtosis, fisher=True for Exkurtosis
data_df['Data'].kurtosis() # Pandas Kurtosis, defaults to Exkurtosis. 3 less than above


np.float64(-0.17474690099903256)

In [35]:
# Covariance and Correlation

x = [-1, 5, -7, 4, -10]
y = [2, -1, 14, 8, -2]
df = pd.DataFrame()
df['x'] = x
df['y'] = y

np.cov(x, y, ddof=1) # NumPy Covariance matrix (Sample Cov, ddof=1)
np.cov(x, y, ddof=1)[0, 1] # NumPy Covariance
df['x'].cov(df['y']) # Pandas Covariance (Sample Cov, n-1)
statistics.covariance(x, y) # Stats Cov

np.corrcoef(x, y) # NumPy correlation matrix
np.corrcoef(x, y)[0, 1] # NumPy correlation
df['x'].corr(df['y']) # Pandas correlation
statistics.correlation(x, y) # Stats Cor

-0.08550148560121865

In [None]:
# Hypothesis Testing
# For all the questions, different = "Statistically Different"

np.random.seed(42) # Just random data for us to use
x = np.random.normal(0.05, 0.15, 100) # mean, stddev, n
y = np.random.normal(0.02, 0.20, 100)

# Is the mean of x different from 0, SciPy 1 sample T
t_stat, p_val_1 = stats.ttest_1samp(x, 0) 

# Is the variance of x different from 0.0225 (0.15**2), SciPy 1 sample var
n = len(x)
target_var = 0.15 ** 2
chi_sq = (n - 1) * np.var(x, ddof=1) / target_var
p_val_2 = 2 * min(stats.chi2.cdf(chi_sq, n-1), stats.chi2.sf(chi_sq, n-1))

# Is mean(x) different to mean(y), SciPy 2 sample t
_, p_val_3 = stats.ttest_ind(x, y, equal_var=False) # False means Welch's T

# Is Var(x) different to Var(y), Scipy 2 sample T
_, p_val_4 = stats.levene(x, y)

# Is x normally distributed? SciPy, Jarque Bera Test
_, p_val_5 = stats.jarque_bera(x)

# Hand Calculation t stat (is mean different than 2%)
t_stat = (x.mean() - 0.02) / (x.std() / (len(x) ** 0.5)) # (xbar - null) / (stddev / (sqrt(n)))
deg_freedom = len(x) - 1
p_value = 2 * (1 - stats.t.cdf(abs(t_stat), df=deg_freedom))




np.float64(0.03442302239088593)

In [None]:
# Matrix Operations

A = np.array(
    [[4, 5],
     [5, 9]]
     )

A.T # Transpose of Matrix A
np.linalg.inv(A) # Inverse of Matrix A
value, vector = np.linalg.eig(A) # Eiganvalue and Eigenvector of Matrix A
np.linalg.cholesky(A) # Cholesky, Symmetric Matrices only




array([[ True,  True],
       [ True,  True]])

In [None]:
# Alpha Vantage Processing

key='&apikey=ZKMMTO1ATDBLXH2K' # API Key
ticker='&symbol=IBM' # Ticker
endpoint='function=TIME_SERIES_DAILY_ADJUSTED' # Called 'function', the dataset we want
size='&outputsize=compact'
web='https://www.alphavantage.co/query?'
url =web+endpoint+ticker+size+key

r = requests.get(url)
print(r.status_code) # 200 good, 400 bad
data = r.json()

print(data.keys()) #printing the keys
meta = data['Meta Data']
time_series_data = data['Time Series (Daily)']

ts_df = pd.DataFrame.from_dict(time_series_data, orient='index').reset_index().rename(columns={'index': 'Date'})
clean_cols_dict = {'1. open': 'Open', '2. high': 'High', '3. low': 'Low', '4. close': 'Close', # Dictionary to convert the names of the columns
            '5. adjusted close': 'Adj Close', '6. volume': 'Volume', '7. dividend amount': 'Dividend', '8. split coefficient': 'Split Coef'}

clean=[] # This is how you do it with standard Python
for date in time_series_data.keys():
    # print (r1[date])
    clean.append([date, meta['2. Symbol'], time_series_data[date]['4. close']]) #stacking prices


ts_np = ts_df.to_numpy() # How to convert a df to a 2d numpy matrix

np.savetxt("IBM TS Price.csv", ts_np, delimiter=",", fmt="%s") # Saves to the folder you are working in



200
dict_keys(['Meta Data', 'Time Series (Daily)'])


In [None]:
# Pandas Time Series

df['Price MAVG'] = df['Price'].rolling(window=3).mean() # Where 3 is the amount of values we look back inclusive of current value

# The log return of today - yesterday, provides a value for every row in df besides first row, change shift to change the interval
ts_df['Monthly Log Return'] = np.log(ts_df['Adj Close'] / ts_df['Adj Close'].shift(1)) # log(a) - log(b) = log(a/b)

# 2.5% percentile (Bottom 1/25 of values)
p2_5 = ts_df['Monthly Log Return'].quantile(0.025)

# Shifting values down
ts_df['Sell'] = ts_df['Buy'].shift(1) # The sell column is now the buy column shifted down by 1 row

# Get a df with just the last day of every month, useful when last trading day isn't month end
monthly_df = ts_df.set_index('Date')
monthly_prices = monthly_df.groupby(pd.Grouper(freq='ME')).tail(1)['Adj Close']