# Regression 
multiple predictors to multiple predictants

$$\begin{bmatrix} y_1 & y_2 & y_3 \end{bmatrix} 
= \begin{bmatrix} x_1 & x_2 & 1 \end{bmatrix} 
\begin{bmatrix} 
    \beta_{1, 1} & \beta_{2, 1} & \beta_{3, 1} \\ 
    \beta_{1, 2} & \beta_{2, 2} & \beta_{3, 2} \\ 
    \beta_{1, 0} & \beta_{2, 0} & \beta_{3, 0} \\ 
\end{bmatrix} 

In [10]:
import numpy as np
import pandas as pd
from scipy import signal, linalg
import matplotlib.pyplot as plt

In [11]:
# Read Nino indices
f = 'data/era5_nino.csv'
nino = pd.read_csv(f, index_col=0, parse_dates=True)

# Read T2m data
f = 'data/era5_monthly_t2m_points.csv'
df = pd.read_csv(f, index_col=0, parse_dates=True)

# Estimate anomaly
dfa = df.groupby(df.index.month).transform(lambda x: x-x.mean())

# Detrend
data = signal.detrend(dfa.values, axis=-1)

# Define predictor and predictand
x = nino[['nino3', 'nino4']].values
y = data[:, :3]
n = x.shape[0]

print(f'{x.shape = :}')
print(f'{y.shape = :}')

x.shape = (1008, 2)
y.shape = (1008, 3)


$$y
= \begin{bmatrix} y_1 & y_2 & y_3 \end{bmatrix} 
= Ap
= \begin{bmatrix} x_1 & x_2 & 1 \end{bmatrix} 
\begin{bmatrix} 
    \beta_{1, 1} & \beta_{2, 1} & \beta_{3, 1} \\ 
    \beta_{1, 2} & \beta_{2, 2} & \beta_{3, 2} \\ 
    \beta_{1, 0} & \beta_{2, 0} & \beta_{3, 0} \\ 
\end{bmatrix} 

In [12]:
A = np.hstack([x, np.ones((n, 1))])
print(f'{A.shape = :}')
print(A[:5])

A.shape = (1008, 3)
[[0.77482826 0.49944904 1.        ]
 [0.91584975 0.6616926  1.        ]
 [0.96427315 0.78748566 1.        ]
 [0.48696518 0.6775541  1.        ]
 [1.1274626  0.87392026 1.        ]]


In [13]:
p, res, rnk, s = linalg.lstsq(A, y)

print(f'{p.shape = :}')
print(f'{p = :}')
print(f'Sums of squared residuals = {res}')
print(f'Rank of matrix A (number of linearly independent columns): {rnk}')
print(f'Singular values of A: {s}')

p.shape = (3, 3)
p = [[-6.51854590e-02  1.68140215e-01 -1.56988096e-01]
 [ 1.28367018e-01 -2.26603304e-01  4.44847279e-02]
 [ 8.81011244e-08 -1.55595957e-07  3.07159735e-08]]
Sums of squared residuals = [ 695.95524571 1568.9124786  1554.14069974]
Rank of matrix A (number of linearly independent columns): 3
Singular values of A: [31.74901573 29.80843068  9.99211823]


In [14]:
# Reconstruction
y_recon = A @ p

# Residual
y_res = y - y_recon
print(f'{y_res.shape = :}')

# sum of squared residuals
print(f'Sums of squared residuals = {(y_res**2).sum(axis=0)}')

y_res.shape = (1008, 3)
Sums of squared residuals = [ 695.95524571 1568.9124786  1554.14069974]
