In [8]:
import numpy as np
import pandas as pd

In [140]:
# Generate fake sites
np.random.seed(42)
n_sites = 5
age_min = (np.random.rand(n_sites)*50).astype(int)
sites = pd.DataFrame(data={'sex_ratio': np.random.rand(n_sites), 
                          'age_min': age_min, 
                          'age_max': (age_min+5+np.random.rand(n_sites)*50).astype(int),
                          'score_shift': np.random.randn(n_sites)/4,
                          'sample_size': (50+np.random.rand(n_sites)*500).astype(int)})
sites

Unnamed: 0,sex_ratio,age_min,age_max,score_shift,sample_size
0,0.155995,18,24,-0.142845,119
1,0.058084,47,100,-0.231021,196
2,0.866176,36,82,-0.653137,233
3,0.601115,29,44,0.237592,278
4,0.708073,7,21,0.204111,442


In [141]:
# implicit model

def model(age):
    age = np.atleast_2d(np.array(age))
    score = np.log(age+10) + 0.5*np.random.randn(np.shape(age)[1])
    return score

In [142]:
%matplotlib widget
import matplotlib.pyplot as plt

age = np.linspace(1, 120)

plt.plot(np.atleast_2d(np.array(age)).T, model(age).T)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x7f8a52665950>]

In [143]:
# Generate fake participants

participants = []
for site in sites.iterrows():
    print(f'Processing site # {site[0]}')
    for participant in range(int(site[1]['sample_size'])):
        sex = np.random.rand()>site[1]['sex_ratio']
        age = site[1]['age_min'] + np.random.rand() * (site[1]['age_max']-site[1]['age_min'])
        score = model(age) + site[1]['score_shift']
        participants.append([site[0], sex, age, score[0][0]])

Processing site # 0
Processing site # 1
Processing site # 2
Processing site # 3
Processing site # 4


In [144]:
df=pd.DataFrame(participants, columns=['site', 'sex', 'age', 'score'])
df.sex.replace({True: 'Female', False: 'Male'}, inplace=True)
df.head()

Unnamed: 0,site,sex,age,score
0,0,Female,20.962774,4.058959
1,0,Female,20.565246,3.320542
2,0,Female,21.051424,3.143296
3,0,Female,19.495753,3.980348
4,0,Female,18.461879,2.946585


In [145]:
import seaborn as sns

plt.figure()
sns.scatterplot(data=df, x='age', y='score', hue='site', style='site')
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …