In [51]:
from scipy.stats import norm
import numpy as np
import pandas as pd

# Define the mean and standard deviation for the target ranges
# The mean is calculated as the midpoint of the range, and the standard deviation
# is chosen such that the range covers approximately 95% of the data, given by
# (maximum - minimum) / 4, which represents 2 standard deviations.
systolic_mean, systolic_std = 130.5, (140 - 121) / 4
diastolic_mean, diastolic_std = 85.5, (90 - 81) / 4
heart_rate_mean, heart_rate_std = 80, (100 - 60) / 4

# Define the correlation matrix, specifying correlations between variables
# A correlation of 0.5 between systolic and diastolic pressures is assumed.
# No correlation is assumed between heart rate and either type of blood pressure.
correlation_matrix = np.array([
    [1, 0.5, 0],   # Systolic BP with itself, diastolic BP, and heart rate
    [0.5, 1, 0],   # Diastolic BP with systolic BP, itself, and heart rate
    [0, 0, 1]      # Heart rate with systolic BP, diastolic BP, and itself
])

# The means for the multivariate normal distribution; initially set to 0s because
# the transformation to actual values is done later.
means = [0, 0, 0]
stds = [1, 1, 1]

# Calculate the covariance matrix from the correlation matrix
# This matrix is needed for generating multivariate normal data with specified correlations.
covariance_matrix = np.dot(np.diag(stds), np.dot(correlation_matrix, np.diag(stds)))
random_data = np.random.multivariate_normal(means, covariance_matrix, size=100)

# Create DataFrame from the generated data
df_adjusted = pd.DataFrame(random_data, columns=['Z_Systolic', 'Z_Diastolic', 'Z_HeartRate'])

# Convert z-scores to actual blood pressure and heart rate values using the norm function
# The `ppf` (percent point function) is the inverse of the CDF (cumulative distribution function)
# and converts a probability (from the cdf of a z-score) into the corresponding value of the variable.
df_adjusted['Systolic Blood Pressure (mmHg)'] = norm(loc=systolic_mean, scale=systolic_std).ppf(norm.cdf(df_adjusted['Z_Systolic']))
df_adjusted['Diastolic Blood Pressure (mmHg)'] = norm(loc=diastolic_mean, scale=diastolic_std).ppf(norm.cdf(df_adjusted['Z_Diastolic']))
df_adjusted['Heart Beat (bpm)'] = norm(loc=heart_rate_mean, scale=heart_rate_std).ppf(norm.cdf(df_adjusted['Z_HeartRate']))

# Display the first few rows of the adjusted data
df_adjusted[['Systolic Blood Pressure (mmHg)', 'Diastolic Blood Pressure (mmHg)', 'Heart Beat (bpm)']].head()


Unnamed: 0,Systolic Blood Pressure (mmHg),Diastolic Blood Pressure (mmHg),Heart Beat (bpm)
0,130.220889,87.077747,81.073443
1,132.339762,87.391135,77.479006
2,122.441716,81.747054,87.219867
3,128.956687,83.533858,82.12321
4,132.972369,91.176611,80.872744
