In [1]:
import numpy as np
import pandas as pd
import os
import math
from datetime import date as pdate
from sklearn import linear_model
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
import statistics

In [2]:
stocks_hist_data = pd.read_csv('data/stocks_hist_data.csv')
print(stocks_hist_data.head())

   Unnamed: 0        appl        boa       ford        msft
0           0  149.373917  34.902847  12.001248  225.529037
1           1  147.297668  35.446327  12.742081  252.451523
2           2  191.055511  27.824205  14.591180  339.859192
3           3  182.436554  28.165203  11.884687  332.205750
4           4  177.030594  25.863573  11.815763  326.597137


## SOFR Swap

In [3]:
sofr_curve_data = pd.read_csv('data/sofr_curve.csv')
sofr_curve_tenors = sofr_curve_data[['T','Tenor']]
sofr_curve = sofr_curve_data.drop(['T','Tenor','Unnamed: 253'], axis=1).T
sofr_curve.columns = sofr_curve_tenors['T']
sofr_curve.index = pd.to_datetime(sofr_curve.index).date
sofr_curve = sofr_curve.sort_index()
sofr_curve

T,0.002778,0.083333,0.166667,0.250000,0.500000,0.750000,1.000000,2.000000,3.000000,4.000000,...,15.000000,16.000000,17.000000,18.000000,19.000000,20.000000,25.000000,30.000000,35.000000,40.000000
2022-10-31,0.039191,0.038721,0.038670,0.040536,0.044577,0.046004,0.046449,0.044583,0.042002,0.040318,...,0.037151,0.037057,0.036907,0.036698,0.036433,0.036111,0.034091,0.032350,0.030552,0.028708
2022-11-01,0.039604,0.039023,0.038886,0.040725,0.044849,0.046448,0.046970,0.045022,0.042344,0.040614,...,0.036802,0.036682,0.036511,0.036287,0.036010,0.035678,0.033645,0.031979,0.030238,0.028478
2022-11-02,0.039948,0.039286,0.039100,0.040852,0.044884,0.046580,0.047203,0.045496,0.042749,0.040868,...,0.036855,0.036701,0.036498,0.036248,0.035953,0.035613,0.033627,0.031936,0.030292,0.028608
2022-11-03,0.040389,0.039585,0.039350,0.041154,0.045281,0.047107,0.047894,0.046594,0.043833,0.041825,...,0.037221,0.037069,0.036886,0.036657,0.036372,0.036022,0.033811,0.032134,0.030407,0.028655
2022-11-04,0.045965,0.042343,0.038795,0.040611,0.045212,0.046752,0.047500,0.046097,0.043385,0.041503,...,0.037687,0.037557,0.037380,0.037152,0.036870,0.036534,0.034424,0.032558,0.030723,0.028933
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-24,0.053105,0.053084,0.053202,0.053424,0.053690,0.053293,0.052503,0.048399,0.045999,0.044850,...,0.044093,0.044056,0.043972,0.043841,0.043663,0.043437,0.041874,0.040238,0.038438,0.036493
2023-10-25,0.052981,0.053057,0.053259,0.053490,0.053749,0.053393,0.052653,0.048791,0.046595,0.045594,...,0.045289,0.045268,0.045198,0.045078,0.044906,0.044683,0.043076,0.041358,0.039564,0.037641
2023-10-26,0.053047,0.053089,0.053218,0.053414,0.053567,0.053089,0.052243,0.048044,0.045645,0.044538,...,0.044269,0.044244,0.044171,0.044049,0.043882,0.043668,0.042182,0.040589,0.038760,0.036764
2023-10-27,0.052989,0.053040,0.053185,0.053368,0.053486,0.052991,0.052115,0.047758,0.045284,0.044200,...,0.044460,0.044461,0.044411,0.044309,0.044157,0.043955,0.042508,0.040985,0.039167,0.037148


In [4]:
# swap params 
N = 100e6
T = 10 # maturity in years
swap_rate = 0.042 


def discount_factor(zero_rates, tenors):
    """
    discount factors based on zero rates 

    Args:
    zero_rates (list): zero rates for each tenor (continuously compounded)
    tenor (list): maturities 

    Returns:
    np.array: discount factors
    """
    Z = np.array(zero_rates)
    T = np.array(tenors)
    return np.exp(-Z*T)

def forward_curve(zero_rates, tenors, interval=1):
    """
    forward rates based on zero rates 

    Args:
    zero_rates (list): zero rates for each tenor (continuously compounded)
    tenor (list): maturities 
    interval (int): interval for forward rates

    Returns:
    np.array: forward rates
    """
    DF = discount_factor(zero_rates, tenors)
    DF_start = np.concatenate([[1], DF[:-1]]) # first DF=1. we dont consider forward swap here
    DF_end = DF
    F = (DF_start - DF_end) / (DF_end * interval)
    return F

def pv_swap(zero_rates, tenors, forward_rates=None, swap_rate=0.042, interval=1, N = 100e6):
    """
    present value of a swap based on zero rates and forward rates
    
    Args:
    zero_rates (list): zero rates for each tenor (continuously compounded)
    tenor (list): maturities
    forward_rates (list): forward rates for each tenor
    swap_rate (float): fixed swap rate
    interval (int): interval for forward rates
    notional (float): notional amount
    
    Returns:
    float: present value of the swap
    """
    DF = discount_factor(zero_rates, tenors)
    F = forward_curve(zero_rates, tenors) if forward_rates is None else np.array(forward_rates)
    pv_fix = swap_rate*sum(interval*DF)
    pv_flt = sum(interval*F*DF)
    return N*(pv_flt - pv_fix)

def swap_pnl_1d_full(zero_rates_t0, zero_rates_t1, tenors):
    """
    pnl of a swap based on zero rates at t0 and t1 for 1 day 

    Args:
    zero_rates_t0 (list): zero rates at time t0
    zero_rates_t1 (list): zero rates at time t1
    tenors (list): maturities

    Returns: 
    float: pnl of the swap 
    """
    pv_t0 = pv_swap(zero_rates_t0, tenors)
    pv_t1 = pv_swap(zero_rates_t1, tenors)
    return pv_t1 - pv_t0

In [5]:
# filter sofr curve to get 1-10 years tenors
sofr_10y_tenor = sofr_curve.loc[:, sofr_curve.columns.isin(range(1,11))]
sofr_10y_tenor

T,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0
2022-10-31,0.046449,0.044583,0.042002,0.040318,0.039297,0.038534,0.037979,0.037648,0.037465,0.037353
2022-11-01,0.046970,0.045022,0.042344,0.040614,0.039522,0.038658,0.038007,0.037605,0.037368,0.037209
2022-11-02,0.047203,0.045496,0.042749,0.040868,0.039675,0.038776,0.038117,0.037704,0.037458,0.037300
2022-11-03,0.047894,0.046594,0.043833,0.041825,0.040507,0.039537,0.038820,0.038328,0.038002,0.037796
2022-11-04,0.047500,0.046097,0.043385,0.041503,0.040303,0.039421,0.038782,0.038373,0.038128,0.037988
...,...,...,...,...,...,...,...,...,...,...
2023-10-24,0.052503,0.048399,0.045999,0.044850,0.044305,0.044036,0.043889,0.043816,0.043798,0.043826
2023-10-25,0.052653,0.048791,0.046595,0.045594,0.045200,0.045017,0.044911,0.044867,0.044876,0.044926
2023-10-26,0.052243,0.048044,0.045645,0.044538,0.044086,0.043893,0.043808,0.043795,0.043829,0.043898
2023-10-27,0.052115,0.047758,0.045284,0.044200,0.043762,0.043636,0.043643,0.043702,0.043793,0.043908


In [6]:
tenors = list(range(1,11))
sofr_curve_t0 = sofr_10y_tenor.iloc[-1].values

sofr_curve_exclude_last = sofr_10y_tenor.values[:-1] 

swap_pnl_full_hist = []

for sofr_curve_today in sofr_curve_exclude_last:
    pnl = swap_pnl_1d_full(sofr_curve_t0, sofr_curve_today, tenors)
    swap_pnl_full_hist.append(pnl)

print(swap_pnl_full_hist)

[np.float64(-5401404.460717557), np.float64(-5487784.874865042), np.float64(-5397127.786791556), np.float64(-4930364.575828766), np.float64(-4810358.893058364), np.float64(-4514791.135917757), np.float64(-5264990.21565821), np.float64(-5212199.616088958), np.float64(-7821633.753293622), np.float64(-7622713.398640407), np.float64(-8296505.400750626), np.float64(-9343897.982544808), np.float64(-8602766.971620202), np.float64(-8125139.577424783), np.float64(-7996587.473650858), np.float64(-8820245.707488373), np.float64(-9222571.098642284), np.float64(-9277819.38293984), np.float64(-9329914.117189676), np.float64(-9000929.69608635), np.float64(-9469861.787309863), np.float64(-10960093.189748228), np.float64(-11148985.901660614), np.float64(-10228636.764480958), np.float64(-11187030.766374419), np.float64(-11977251.829819202), np.float64(-11082822.559102807), np.float64(-10363688.320748616), np.float64(-10017566.21154738), np.float64(-11049882.438340543), np.float64(-11012751.710828245), n

## Stocks 

In [7]:
df_AAPL_hist = pd.read_csv('data/AAPL.csv').set_index('Date')
df_MSFT_hist = pd.read_csv('data/MSFT.csv').set_index('Date')
df_FORD_hist = pd.read_csv('data/F.csv').set_index('Date')
df_BAC_hist = pd.read_csv('data/BAC.csv').set_index('Date')

df_AAPL_hist.index = pd.to_datetime(df_AAPL_hist.index).date
df_MSFT_hist.index = pd.to_datetime(df_MSFT_hist.index).date
df_FORD_hist.index = pd.to_datetime(df_FORD_hist.index).date
df_BAC_hist.index = pd.to_datetime(df_BAC_hist.index).date

df_AAPL_hist = df_AAPL_hist.sort_index()
df_MSFT_hist = df_MSFT_hist.sort_index()
df_FORD_hist = df_FORD_hist.sort_index()
df_BAC_hist = df_BAC_hist.sort_index()

  df_AAPL_hist.index = pd.to_datetime(df_AAPL_hist.index).date
  df_MSFT_hist.index = pd.to_datetime(df_MSFT_hist.index).date
  df_FORD_hist.index = pd.to_datetime(df_FORD_hist.index).date
  df_BAC_hist.index = pd.to_datetime(df_BAC_hist.index).date


In [8]:
## interpolation is not required since the stocks are traded on the same days 
# relative daily change of stock prices 
df_AAPL_hist['Adj Close'] = df_AAPL_hist['Adj Close'].pct_change()
df_MSFT_hist['Adj Close'] = df_MSFT_hist['Adj Close'].pct_change()
df_FORD_hist['Adj Close'] = df_FORD_hist['Adj Close'].pct_change()
df_BAC_hist['Adj Close'] = df_BAC_hist['Adj Close'].pct_change()
 
aapl = df_AAPL_hist['Adj Close'].to_list()
msft = df_MSFT_hist['Adj Close'].to_list()
ford = df_FORD_hist['Adj Close'].to_list()
bac = df_BAC_hist['Adj Close'].to_list()

df_stocks = {
    'aapl': aapl,
    'msft': msft,
    'ford': ford,
    'bac': bac
}

df_stocks = pd.DataFrame(df_stocks, columns=['aapl', 'msft', 'ford', 'bac'])
df_stocks = df_stocks.iloc[1:] # remove the first row since it is NaN
df_stocks

# corr_matrix = df_stocks.corr()
# # print(corr_matrix)

Unnamed: 0,aapl,msft,ford,bac
1,-0.013900,0.119375,0.061730,0.015571
2,-0.023735,-0.138241,-0.082038,-0.018325
3,0.020859,0.161921,0.072349,0.005568
4,-0.061972,-0.162231,-0.053187,-0.011048
5,-0.001947,0.033326,0.018854,0.025077
...,...,...,...,...
246,0.079380,0.089880,-0.067423,-0.048806
247,0.065017,0.076401,0.180805,0.083885
248,0.032537,0.016122,0.104499,0.007896
249,-0.069726,-0.014020,-0.173093,-0.005061


### Historical VaR - Full Revaluation and Sensitivity 

In [9]:
# full revaluation 1d pnl evaluation 
def pnl1d_full(stocks_returns, w = [1e6, 1e6, 1e6, 1e6]):
    """
    stocks_returns: DataFrame of stocks returns
    w: list of notional weights
    """
    return (w[0]*((1+stocks_returns[0])-1) + w[1]*((1+stocks_returns[1])-1) + w[2]*((1+stocks_returns[2])-1) + w[3]*((1+stocks_returns[3])-1))

# sensitivity based 1d pnl evaluation 
def pnl1d_sensi(stocks_returns, w = [1e6, 1e6, 1e6, 1e6]):
    """
    stocks_returns: DataFrame of stocks returns
    w: list of notional weights
    """
    return [w[i]*stocks_returns[i] for i in range(len(w))]


In [11]:
# historical VaR
confidence_level = 0.95

hist_returns = df_stocks.to_numpy().tolist()

pnl1d_full_hist = list(pnl1d_full(s) for s in hist_returns)+list(swap_pnl_full_hist)
var1d_full_hist = np.abs(np.percentile(pnl1d_full_hist, confidence_level))

pnl1d_sensi_hist = [pnl1d_sensi(s) for s in hist_returns]
var1d_sensi_hist = np.abs(np.percentile(pnl1d_sensi_hist, confidence_level))

print("")
print("")
print("============================================================================================================================")
print("Historical VaR:")
print(f"VaR [1d, {confidence_level}%], Full Revaluation: {var1d_full_hist:,.0f}") 
print(f"VaR [1d, {confidence_level}%], Sensitivity: {var1d_sensi_hist:,.0f}") 
print("============================================================================================================================")




Historical VaR:
VaR [1d, 0.95%], Full Revaluation: 11,949,757
VaR [1d, 0.95%], Sensitivity: 267,644
