In [1]:
%load_ext jupyter_black

In [2]:
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(42)

# Task 19.

First, let us generate some multivariate data.

In [3]:
n = 1000
mu = np.array([3, 1])
sigma = np.array([[7, 1], [1, 6]])

x = np.random.multivariate_normal(mu, sigma, n)
x.shape

(1000, 2)

Then, we will find true values of distribution parameters

In [4]:
A = np.array([[-1, 1], [1, 0]])
mu_true = A @ mu
sigma_true = A @ sigma @ A.T

After that, we tranform the initial distribution as specified in theoretical part of this task.

In [5]:
k_est = np.vstack([x[:, 1] - x[:, 0], x[:, 0]]).T
k_est.shape

(1000, 2)

Let us estimate parameters of `k_est`'s distribution. Assuming that it is normal, we get that:

In [6]:
mu_est = np.mean(k_est, axis=0)
sigma_est = np.cov(k_est.T)

print(f"Difference of means: {mu_est - mu_true}")
print(f"Difference of covariance matrices:\n{sigma_est - sigma_true}")

Difference of means: [ 0.21170987 -0.14741463]
Difference of covariance matrices:
[[ 0.26119218  0.01953586]
 [ 0.01953586 -0.35591076]]


As we can see, the differences are minimal.

# Task 20.

First, let us generate some multivariate data.

In [7]:
n = 1000
mu = np.array([7, 2, 9])
sigma = np.array([[2, -1, 0], [-1, 2, -1], [0, -1, 2]])

x = np.random.multivariate_normal(mu, sigma, n)
x.shape

(1000, 3)

Then, we will find true values of distribution parameters

In [8]:
B = np.array([[0.5, -1.0, 0.5], [-1.5, 2, -0.5], [1.0, 0.0, 0.0]])
mu_true = B @ mu
sigma_true = B @ sigma @ B.T

Then, we tranform the initial distribution as specified in theoretical part of this task.

In [9]:
k_est = x @ B.T
k_est.shape

(1000, 3)

Let us estimate parameters of `k_est`'s distribution. Assuming that it is normal, we get that:

In [10]:
mu_est = np.mean(k_est, axis=0)
sigma_est = np.cov(k_est.T)

print(f"Difference of means: {mu_est - mu_true}")
print(f"Difference of covariance matrices:\n{sigma_est - sigma_true}")

Difference of means: [-0.09353196  0.10480245  0.0329825 ]
Difference of covariance matrices:
[[-0.10780617  0.18756786 -0.02402411]
 [ 0.18756786 -0.30305886  0.01377784]
 [-0.02402411  0.01377784  0.02937912]]


As we can see, the differences are minimal.

# Task 21.

Let us define a function that checks how similar parameters are:

In [11]:
def same_parameters_check(n, m):
    mu = np.random.rand(n + 1)
    sigma = np.diag(
        np.abs(np.random.rand(n + 1)) + 1
    )  # Generate a diagonal matrix with only positive values

    x = np.random.multivariate_normal(mu, sigma, m)

    C_inv = np.zeros(n + 1)
    C_inv[-1] = 1

    for i in range(1, n + 1):
        C_inv = np.vstack([C_inv, [i**j for j in range(n + 1)][::-1]])

    C = np.linalg.inv(C_inv)

    mu_true = C @ mu
    sigma_true = C @ sigma @ C.T

    k_est = x @ C.T

    mu_est = np.mean(k_est, axis=0)
    sigma_est = np.cov(k_est.T)

    print(f"Difference of means: {np.linalg.norm(mu_est - mu_true)}")
    print(f"Difference of covariance matrices: {np.linalg.norm(sigma_est - sigma_true)}")

In [13]:
same_parameters_check(10, 1000)

Difference of means: 3.670975271625084
Difference of covariance matrices:16857.8776138185
