In [1]:
import json
import glob
import re
import pandas as pd
from os import path
from statistics import mean
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats
import numpy as np

data_dir = '../out'

In [2]:
all_files = glob.glob(path.join(data_dir, "gen_*.json"))
gen_pattern = 'gen_0*([0-9]*).json$'

def extract_gen_number(filename):
    return int(re.search(gen_pattern, filename).group(1))

all_files.sort(key=extract_gen_number)

In [3]:
particle_dfs_list = []
meta_dfs_list = []
for filename in all_files:
    gen_number = extract_gen_number(filename)
    
    with open(filename) as f:
        data = json.load(f)
    new_p_df = pd.json_normalize(data['pop'], record_path=['normalised_particles'])
    new_p_df['generation'] = gen_number

    meta = {
        'generation': gen_number, 
        'tolerance': data['pop']['tolerance'], 
        'acceptance': data['pop']['acceptance']
    }
    new_m_df = pd.DataFrame(data=meta, index=[gen_number])

    meta_dfs_list.append(new_m_df)
    particle_dfs_list.append(new_p_df)

particle_df = pd.concat(particle_dfs_list)
particle_df = particle_df.reset_index()

meta_df = pd.concat(meta_dfs_list)
meta_df = meta_df.reset_index()
meta_df = meta_df.drop('index', axis=1)
print(meta_df)

particle_df['mean_score'] = particle_df['scores'].map(mean)
df = particle_df.drop('scores', axis=1)

particle_df.head()

    generation      tolerance  acceptance
0            1  1.797693e+308    1.000000
1            2   4.514583e-01    0.802568
2            3   3.025000e-01    0.851789
3            4   2.350000e-01    0.888099
4            5   2.000000e-01    0.884173
5            6   1.700000e-01    0.874126
6            7   1.350000e-01    0.883392
7            8   1.189583e-01    0.863558
8            9   1.050000e-01    0.871840
9           10   9.500000e-02    0.869565
10          11   8.750000e-02    0.845309
11          12   8.500000e-02    0.832639
12          13   8.250000e-02    0.868810
13          14   8.500000e-02    0.859845
14          15   8.000000e-02    0.827815


Unnamed: 0,index,scores,weight,parameters.heads,generation,mean_score
0,0,"[0.16999999999999993, 0.24999999999999992, 0.1...",0.001,0.529899,1,0.1975
1,1,"[0.5399999999999999, 0.5399999999999999, 0.52,...",0.001,0.146661,1,0.5275
2,2,"[0.16000000000000003, 0.22000000000000008, 0.1...",0.001,0.867661,1,0.175
3,3,"[0.1100000000000001, 0.06000000000000005, 0.11...",0.001,0.806108,1,0.105
4,4,"[0.10000000000000007, 0.09000000000000008, 0.1...",0.001,0.79726,1,0.12


In [4]:
import altair as alt
import math

meta_df['log_tolerance'] = meta_df['tolerance'].map(lambda x: math.log(1 + min(x, 0.5)))
meta_melt = meta_df.melt('generation', value_vars=['log_tolerance', 'acceptance'])

alt.Chart(
    meta_melt
).mark_bar(
).encode(
    x='generation:O',
    y='value'
).properties(
    width=600,
    height=100
).facet(
    row='variable:O'
).resolve_scale(
    y='independent'
)


In [23]:

# particle_df.drop(columns=['scores'], inplace=True)
# particle_df.head()
grouped = particle_df.groupby('generation')

points = np.linspace(0,1,100)
def myfunc(x):
    kernel = stats.gaussian_kde(x)
    kernel(points)

grouped.transform(myfunc)


Unnamed: 0,index,weight,parameters.heads,mean_score
0,,,,
1,,,,
2,,,,
3,,,,
4,,,,
...,...,...,...,...
14995,,,,
14996,,,,
14997,,,,
14998,,,,


In [5]:
mean_scores = particle_df['mean_score']
kernel = stats.gaussian_kde(mean_scores)

points = np.linspace(0,mean_scores.max(),1000)
density = kernel(points)

data = pd.DataFrame({
    'mean_score': points,
    'density': density,
})

alt.Chart(
    data
).mark_area(
).encode(
    x='mean_score',
    y='density'
)


In [6]:
print(particle_df)

alt.Chart(
    particle_df
).transform_density(
    'mean_score',
    as_=['score', 'density']
).mark_area(
).encode(
    x='score:Q',
    y='density:Q'
)

       index                                             scores    weight  \
0          0  [0.16999999999999993, 0.24999999999999992, 0.1...  0.001000   
1          1  [0.5399999999999999, 0.5399999999999999, 0.52,...  0.001000   
2          2  [0.16000000000000003, 0.22000000000000008, 0.1...  0.001000   
3          3  [0.1100000000000001, 0.06000000000000005, 0.11...  0.001000   
4          4  [0.10000000000000007, 0.09000000000000008, 0.1...  0.001000   
...      ...                                                ...       ...   
14995    995  [0.09000000000000008, 0.08000000000000007, 0.0...  0.000714   
14996    996  [0.16999999999999993, 0.04999999999999993, 0.1...  0.000686   
14997    997  [0.010000000000000007, 0.05999999999999994, 0....  0.001303   
14998    998  [0.030000000000000027, 0.10000000000000007, 0....  0.000612   
14999    999  [0.020000000000000014, 0.029999999999999916, 0...  0.001379   

       parameters.heads  generation  mean_score  
0              0.529899  

MaxRowsError: The number of rows in your dataset is greater than the maximum allowed (5000). For information on how to plot larger datasets in Altair, see the documentation

alt.Chart(...)