In [1]:
import pandas as pd
import matplotlib.pyplot as plt

def load_props(path):
    temp = pd.read_json(path)
    df = pd.json_normalize(temp.files)
    df['quality'] = temp.quality
    df['ratio'] = df.rawsize / df.filesize
    return df

In [2]:
load_props('rate_ssim_byQ/properties_010.json')

Unnamed: 0,image,width,height,channels,origchan,filesize,origsize,rawsize,mean_squared_error,peak_signal_noise_ratio,structural_similarity,quality,ratio
0,000000000139.jpg,640,426,3,3,10045,161811,817920,135.776146,26.802569,0.730682,10,81.425585
1,000000000285.jpg,586,640,3,3,24132,335861,1125120,322.996145,23.038830,0.652713,10,46.623570
2,000000000632.jpg,640,483,3,3,15182,155667,927360,314.487165,23.154774,0.667135,10,61.082861
3,000000000724.jpg,375,500,3,3,12768,130107,562500,229.887748,24.515645,0.735628,10,44.055451
4,000000000776.jpg,428,640,3,3,16493,176410,821760,333.949392,22.893997,0.537188,10,49.824774
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,000000581317.jpg,640,354,3,3,10340,116838,679680,374.348279,22.398045,0.669900,10,65.733075
4996,000000581357.jpg,612,612,3,3,11494,139751,1123632,103.976160,27.961466,0.775341,10,97.758135
4997,000000581482.jpg,640,427,1,3,16425,158235,273280,254.763934,24.069424,0.753820,10,16.638052
4998,000000581615.jpg,478,640,3,3,8524,142905,917760,57.457339,30.537349,0.795018,10,107.667762


In [3]:
_.structural_similarity.describe()

count    5000.000000
mean        0.726269
std         0.071960
min         0.339775
25%         0.683166
50%         0.732002
75%         0.774560
max         0.967155
Name: structural_similarity, dtype: float64

In [4]:
data = {q: load_props(f'rate_ssim_byQ/properties_{q:03d}.json') for q in range(5, 100, 10)}

In [5]:
stats = pd.DataFrame(index=data.keys(), data=[df.structural_similarity.describe() for df in data.values()])
stats.index.name = 'Q'
stats.drop(columns='count', inplace=True)
stats#.to_latex()

Unnamed: 0_level_0,mean,std,min,25%,50%,75%,max
Q,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
5,0.627101,0.08983,0.153293,0.574096,0.630873,0.685953,0.965715
15,0.773825,0.063745,0.359304,0.736047,0.780204,0.817623,0.969382
25,0.826165,0.054568,0.390045,0.794513,0.832005,0.864267,0.97955
35,0.854855,0.048374,0.419113,0.827557,0.859963,0.888305,0.976038
45,0.873986,0.043599,0.443987,0.849793,0.878538,0.903895,0.99096
55,0.886809,0.041301,0.467388,0.864498,0.891524,0.915035,0.99309
65,0.904728,0.035758,0.504513,0.88653,0.90904,0.928686,0.988051
75,0.921144,0.0331,0.576811,0.906112,0.925593,0.942438,0.997151
85,0.956206,0.024949,0.781857,0.943239,0.959744,0.97572,0.998514
95,0.994715,0.002615,0.980783,0.993057,0.994722,0.996941,0.999606


In [6]:
print(stats.to_latex(float_format='%.4f'))

\begin{tabular}{lrrrrrrr}
\toprule
{} &   mean &    std &    min &    25\% &    50\% &    75\% &    max \\
Q  &        &        &        &        &        &        &        \\
\midrule
5  & 0.6271 & 0.0898 & 0.1533 & 0.5741 & 0.6309 & 0.6860 & 0.9657 \\
15 & 0.7738 & 0.0637 & 0.3593 & 0.7360 & 0.7802 & 0.8176 & 0.9694 \\
25 & 0.8262 & 0.0546 & 0.3900 & 0.7945 & 0.8320 & 0.8643 & 0.9795 \\
35 & 0.8549 & 0.0484 & 0.4191 & 0.8276 & 0.8600 & 0.8883 & 0.9760 \\
45 & 0.8740 & 0.0436 & 0.4440 & 0.8498 & 0.8785 & 0.9039 & 0.9910 \\
55 & 0.8868 & 0.0413 & 0.4674 & 0.8645 & 0.8915 & 0.9150 & 0.9931 \\
65 & 0.9047 & 0.0358 & 0.5045 & 0.8865 & 0.9090 & 0.9287 & 0.9881 \\
75 & 0.9211 & 0.0331 & 0.5768 & 0.9061 & 0.9256 & 0.9424 & 0.9972 \\
85 & 0.9562 & 0.0249 & 0.7819 & 0.9432 & 0.9597 & 0.9757 & 0.9985 \\
95 & 0.9947 & 0.0026 & 0.9808 & 0.9931 & 0.9947 & 0.9969 & 0.9996 \\
\bottomrule
\end{tabular}



In [7]:
rstats = pd.DataFrame(index=data.keys(), data=[df.ratio.describe() for df in data.values()])
rstats.index.name = 'Q'
rstats.drop(columns='count', inplace=True)
rstats#.to_latex()

Unnamed: 0_level_0,mean,std,min,25%,50%,75%,max
Q,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
5,100.508415,36.36359,1.453401,75.611433,97.68604,125.082974,229.792803
15,51.342585,21.215167,1.436932,37.012449,47.771739,61.886364,210.037379
25,36.451917,16.079362,1.423543,25.968989,33.52065,43.626425,194.233425
35,28.96278,13.286116,1.411728,20.431562,26.510354,34.509284,174.906716
45,24.305496,11.379229,1.401132,17.131863,22.149491,28.772557,154.250235
55,21.464921,10.049181,1.391524,15.256193,19.604921,25.366592,144.084359
65,17.644559,8.288172,1.37887,12.544493,16.130576,20.673606,121.29605
75,14.778192,6.759163,1.359625,10.614692,13.567158,17.383485,103.306452
85,9.603277,4.043629,1.315396,7.053236,8.901381,11.175462,53.469482
95,5.559164,2.195015,1.19292,4.14509,5.128793,6.516071,32.916048


In [8]:
print(rstats.to_latex(float_format='%.2f'))

\begin{tabular}{lrrrrrrr}
\toprule
{} &   mean &   std &  min &   25\% &   50\% &    75\% &    max \\
Q  &        &       &      &       &       &        &        \\
\midrule
5  & 100.51 & 36.36 & 1.45 & 75.61 & 97.69 & 125.08 & 229.79 \\
15 &  51.34 & 21.22 & 1.44 & 37.01 & 47.77 &  61.89 & 210.04 \\
25 &  36.45 & 16.08 & 1.42 & 25.97 & 33.52 &  43.63 & 194.23 \\
35 &  28.96 & 13.29 & 1.41 & 20.43 & 26.51 &  34.51 & 174.91 \\
45 &  24.31 & 11.38 & 1.40 & 17.13 & 22.15 &  28.77 & 154.25 \\
55 &  21.46 & 10.05 & 1.39 & 15.26 & 19.60 &  25.37 & 144.08 \\
65 &  17.64 &  8.29 & 1.38 & 12.54 & 16.13 &  20.67 & 121.30 \\
75 &  14.78 &  6.76 & 1.36 & 10.61 & 13.57 &  17.38 & 103.31 \\
85 &   9.60 &  4.04 & 1.32 &  7.05 &  8.90 &  11.18 &  53.47 \\
95 &   5.56 &  2.20 & 1.19 &  4.15 &  5.13 &   6.52 &  32.92 \\
\bottomrule
\end{tabular}

