# Multivariate Data generation

In [16]:
import numpy as np
from scipy.stats import norm, lognorm, beta

In [17]:
# Let's start with three variables x1,x2,x3 and choose the distributions for each variable
# Assume variable x1 has normal distribution which parameters are mean and variance
mu_x1, sigma_x1 = 2, 1
# Assume variable x2 has normal distribution which parameters are mean and variance
#lambda_x2 = 1
mu_x2, sigma_x2= 5,2
# Assume variable x2 has uniform distribution which parameters are  -∞< a, b < ∞
a_x3, b_x3 = 3, 5

In [18]:
# Generate random samples from each distribution
n = 1000
x1 = np.random.normal(mu_x1, sigma_x1, n)  #  ---> Normal dist
x2 = np.random.normal(mu_x2, sigma_x2, n)
x3 = np.random.uniform(a_x3, b_x3, n)      #  ---> Uniform dist

In [19]:
# Calculate the correlation matrix 
#R = np.corrcoef(np.array([x1, x2, x3]))
R=[[ 1.0 ,  0.5 ,  0.3 ],
[ 0.5,  1.0 ,  0.7 ],
[ 0.3 , 0.7  , 1.0 ]]


In [20]:
# Calculate the Cholesky decomposition 
L = np.linalg.cholesky(R)

In [21]:
# Generate random standard normal variables (we generate random normal 3 variables to be able to transfer to desired distribution)
u = np.random.normal(0, 1, (3, n))

In [22]:
# Generate correlated random variables ( we multiply the generated normal variables with the lower triangle of the decomposed correlation)
z = np.dot(L, u)

In [23]:
# Transform the correlated random variables into the desired distributions
X = mu_x1 + sigma_x1*z[0]
Y = mu_x2 + sigma_x2*z[1]
Z = a_x3 + (b_x3 - a_x3)*z[2]

In [24]:
ro=np.array([X,Y,Z])

In [25]:
ro=ro[:,~np.isnan(ro).any(axis=0)]

In [26]:
ro.shape

(3, 1000)

In [27]:
Xm,Ym,Zm=ro[0],ro[1],ro[2]

In [28]:
# Print the correlation coefficients
print('Correlation coefficients:')
print(np.array(R))

# Print the correlation between the transformed variables
print('Correlation between transformed variables:')
print(np.corrcoef(np.array([Xm, Ym, Zm])))

Correlation coefficients:
[[1.  0.5 0.3]
 [0.5 1.  0.7]
 [0.3 0.7 1. ]]
Correlation between transformed variables:
[[1.         0.45188097 0.24407753]
 [0.45188097 1.         0.69379251]
 [0.24407753 0.69379251 1.        ]]


In [29]:
#Xm.mean(),Xm.std(),