In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
# RMM index
f = 'data/RMM_clean.csv'
df = pd.read_csv(f, index_col=0, parse_dates=True)

# select period
df = df['1980-01-01':'1999-12-31']
df = df.loc[((df.index.month <= 4) | (df.index.month >= 11))] 

n = df.shape[0]
print(f'Number of samples = {n}')

# Z500
f = 'data/MERRA2.H.500.anom.points.csv'
h500 = pd.read_csv(f, index_col=0, parse_dates=True)
h500 = h500.loc[df.index]

# x and y
x = df[['RMM1', 'RMM2']].values
y = h500.values

print(f'{x.shape = :}')
print(f'{y.shape = :}')

Number of samples = 3625
x.shape = (3625, 2)
y.shape = (3625, 3)


$$y
= \begin{bmatrix} y_1 & y_2 & y_3 \end{bmatrix} 
= Ap
= \begin{bmatrix} x_1 & x_2 & 1 \end{bmatrix} 
\begin{bmatrix} 
    \beta_{1, 1} & \beta_{2, 1} & \beta_{3, 1} \\ 
    \beta_{1, 2} & \beta_{2, 2} & \beta_{3, 2} \\ 
    \beta_{1, 0} & \beta_{2, 0} & \beta_{3, 0} \\ 
\end{bmatrix} 

In [3]:
A = np.hstack([x, np.ones((n, 1))])
print(f'{A.shape = :}')
print(A[:5])

A.shape = (3625, 3)
[[ 0.81158602 -0.0664676   1.        ]
 [ 0.70508802 -0.0296843   1.        ]
 [ 0.66273302  0.0371988   1.        ]
 [ 0.61483997  0.21363001  1.        ]
 [ 0.70287699  0.490769    1.        ]]


In [5]:
p, res, rnk, s = np.linalg.lstsq(A, y, rcond=None)

print(f'{p.shape = :}')
print(f'{p = :}')
print(f'Sums of squared residuals = {res}')
print(f'Rank of matrix A (number of linearly independent columns): {rnk}')
print(f'Singular values of A: {s}')

p.shape = (3, 3)
p = [[ 11.06543568  -3.57836581  -2.1927536 ]
 [-19.66071532   7.35680147  -0.61678283]
 [-12.24433374  -3.36135726  -2.41285948]]
Sums of squared residuals = [61035901.50265467 34716670.08485769   561830.73390595]
Rank of matrix A (number of linearly independent columns): 3
Singular values of A: [67.71598801 61.62431145 60.19286619]


In [7]:
# Reconstruction
y_recon = A @ p

# Residual
y_res = y - y_recon
print(f'{y_res.shape = :}')

# sum of squared residuals
print(f'Sums of squared residuals = {(y_res**2).sum(axis=0)}')

y_res.shape = (3625, 3)
Sums of squared residuals = [61035901.50265458 34716670.08485775   561830.73390595]
