# Step 3

Make a non-parametric estimation of conditional distributions, mathematical expectations and variances.

In [2]:
import numpy as np
from sklearn.neighbors import KernelDensity
import pandas as pd


res = pd.read_csv('data/NVIDIA Corporation.csv', header=0, encoding='unicode_escape')
df = res.drop(columns=['Date', 'Volume'])

# Train the KernelDensity estimator for each variable
kde_list = [KernelDensity(kernel='gaussian', bandwidth=0.5).fit(df[col].values.reshape(-1, 1)) for col in df.columns]

# Estimate the mathematical expectations of each variable
math_expects = [np.exp(kde.score_samples(np.array([[df[col].mean()]])))[0] for kde, col in zip(kde_list, df.columns)]
print("Estimation of mathematical expectations of each variable:\n", math_expects)

# Estimate the variances of each variable
variances = [np.exp(2 * kde.score_samples(np.array([[df[col].mean()]])))[0] - math_expect**2 for kde, col, math_expect in zip(kde_list, df.columns, math_expects)]
print("Estimation of variances of each variable:\n", variances)

# Estimate the conditional distributions of each variable given the others
cond_dists = []
for i, col1 in enumerate(df.columns):
    kde1 = kde_list[i]
    for col2 in df.columns[i+1:]:
        kde2 = kde_list[df.columns.get_loc(col2)]
        cond_dists.append(np.exp(kde1.score_samples(df[[col1]].values) - kde2.score_samples(df[[col2]].values)))
print("Estimation of conditional distributions of each variable given the others:\n", cond_dists)

Estimation of mathematical expectations of each variable:
 [0.0009483276199959811, 0.0016561930670903773, 0.0013697848841881203, 0.001294338947391737, 0.0011754366930743615]
Estimation of variances of each variable:
 [0.0, -4.235164736271502e-22, -2.117582368135751e-22, 0.0, -2.117582368135751e-22]
Estimation of conditional distributions of each variable given the others:
 [array([1.03162211, 0.97099838, 0.9794632 , ..., 0.37107561, 1.31370531,
       0.78469588]), array([1.01436039, 0.97536358, 0.96790977, ..., 0.39547177, 1.33401011,
       1.29757044]), array([1.04700366, 0.96279538, 1.01367021, ..., 0.4603048 , 1.10182078,
       0.98159549]), array([0.97077039, 0.89421575, 0.94019868, ..., 0.51596721, 1.08655508,
       1.03129253]), array([0.9832674 , 1.00449558, 0.98820432, ..., 1.06574445, 1.01545613,
       1.6535966 ]), array([1.01491006, 0.99155199, 1.03492425, ..., 1.24046096, 0.83871228,
       1.25092474]), array([0.94101355, 0.92092404, 0.95991221, ..., 1.39046382, 0.827