In [2]:
import json
import requests
import urllib.request
import io

from IPython.display import display, Math, Latex, Markdown, clear_output, HTML

import matplotlib.pyplot as plt
from matplotlib.dates import date2num, num2date
from matplotlib import dates as mdates
from matplotlib import ticker
from matplotlib.colors import ListedColormap
from matplotlib.patches import Patch

from tqdm.notebook import tqdm

from scipy.optimize import curve_fit
from scipy import stats
import numpy as np
import pandas as pd

In [3]:
_R = pd.read_pickle("double-gompertz-data.pkl")

In [4]:
errors = []
for country in _R:
    print(f"----> CHECKING {country}")
    for function in _R[country].index:
        if _R[country][function] is None:
            print(f"! ERROR {country} with {function}")
            if country not in errors:
                errors.append(country)

----> CHECKING Afghanistan
----> CHECKING Albania
----> CHECKING Algeria
----> CHECKING Andorra
----> CHECKING Angola
----> CHECKING Antigua and Barbuda
----> CHECKING Argentina
----> CHECKING Armenia
----> CHECKING Australia
----> CHECKING Austria
----> CHECKING Azerbaijan
----> CHECKING Bahamas
----> CHECKING Bahrain
----> CHECKING Bangladesh
! ERROR Bangladesh with dobgomp
----> CHECKING Barbados
----> CHECKING Belarus
----> CHECKING Belgium
----> CHECKING Belize
----> CHECKING Benin
----> CHECKING Bhutan
! ERROR Bhutan with dobgomp
----> CHECKING Bolivia
----> CHECKING Bosnia and Herzegovina
----> CHECKING Botswana
----> CHECKING Brazil
! ERROR Brazil with doblogit
----> CHECKING Brunei
----> CHECKING Bulgaria
----> CHECKING Burkina Faso
----> CHECKING Burma
! ERROR Burma with dobgomp
----> CHECKING Burundi
----> CHECKING Cabo Verde
! ERROR Cabo Verde with doblogit
----> CHECKING Cambodia
----> CHECKING Cameroon
----> CHECKING Canada
----> CHECKING Central African Republic
----> CH

In [5]:
print(f"COUNTRIES WITH ERRORS: {len(errors)}")
print(f"COUNTRIES TO BE USED: {len(_R.columns) - len(errors)}")

COUNTRIES WITH ERRORS: 17
COUNTRIES TO BE USED: 168


In [6]:
def get_stats(min_cases=100):

    total_results = {
        "Function": [],
        "Nations": [],
        "Days": [],
        "Samples": [],
        "Slope": [],
        "Intercept": [],
        "Rho": [],
        "p-value": [],
        "Std Err": [],
    }
    total_data = {}

    for function in tqdm(_R.index, leave=False):
        if function == "measured":
            continue
        RSD = {"country": [], "slope": [], "intercept": [], "r_value": [], "p_value": [], "std_err": []}
        for country in tqdm(_R.columns, leave=False):
            if country in errors:
                continue
            if _R[country]["measured"]["valore"].tail(1).values[0] < min_cases:
                continue
            slope, intercept, r_value, p_value, std_err = stats.linregress(
                _R[country]["measured"]["valore"],
                _R[country][function]["valore"]
            )
            if slope == 1.0 or intercept == 0.0 or r_value == 1.0 or p_value == 0.0 or std_err == 0.0:
                if country not in errors:
                    errors.append(country)
                    print(f"--------> ERROR {country}")
                    continue
            RSD["country"].append(country)
            RSD["slope"].append(slope)
            RSD["intercept"].append(intercept)
            RSD["r_value"].append(r_value)
            RSD["p_value"].append(p_value)
            RSD["std_err"].append(std_err)

        RSDdf = pd.DataFrame(data=RSD)
        mean_slope = np.mean(RSDdf['slope'])
        mean_intercept = np.mean(RSDdf['intercept'])
        mean_r_value = np.mean(RSDdf['r_value'])
        mean_p_value = np.mean(RSDdf['p_value'])
        mean_std_err = np.mean(RSDdf['std_err'])

        total_results["Function"].append(function)
        total_results["Nations"].append(RSDdf["country"].size)
        total_results["Days"].append(_R[country]["measured"]["valore"].size)
        total_results["Samples"].append(RSDdf["country"].size * _R[country]["measured"]["valore"].size)
        total_results["Slope"].append(mean_slope)
        total_results["Intercept"].append(mean_intercept)
        total_results["Rho"].append(mean_r_value)
        total_results["p-value"].append(mean_p_value)
        total_results["Std Err"].append(mean_std_err)

        total_data.update({function: [RSDdf]})

    TOTdf = pd.DataFrame(data=total_results)
    DATdf = pd.DataFrame(total_data)

    return TOTdf, DATdf

In [7]:
for min_cases in [0, 100, 500, 1000, 2000, 5000, 10000, 20000]:
    display(Markdown(f"## Casi minimi {min_cases}"))
    TOT, DAT = get_stats(min_cases)
    display(HTML(TOT.to_html(notebook=True, col_space=100, columns=[
        "Function", "Nations", "Days", "Samples", "Slope", "Intercept", "Rho", "p-value", "Std Err",
    ], index=False)))
    TOT.to_pickle(f"double-gompertz-stats-{min_cases}-mean.pkl")
    DAT.to_pickle(f"double-gompertz-stats-{min_cases}-data.pkl")

## Casi minimi 0

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

--------> ERROR Sao Tome and Principe
--------> ERROR Yemen


HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

--------> ERROR MS Zaandam
--------> ERROR South Sudan
--------> ERROR Western Sahara


HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

Function,Nations,Days,Samples,Slope,Intercept,Rho,p-value,Std Err
logit,166,93,15438,0.989002,25.132105,0.995207,4.108973e-46,0.008257
gomp,166,93,15438,0.992591,5.091736,0.995799,4.810899e-47,0.007503
doblogit,163,93,15159,0.996693,2.737228,0.998207,5.268991e-67,0.004997
dobgomp,163,93,15159,0.994428,4.727219,0.996844,1.762817e-45,0.006281


## Casi minimi 100

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

Function,Nations,Days,Samples,Slope,Intercept,Rho,p-value,Std Err
logit,121,93,11253,0.991689,34.439719,0.99703,2.382145e-47,0.006449
gomp,121,93,11253,0.995816,6.969305,0.997878,1.762017e-47,0.005392
doblogit,121,93,11253,0.998182,3.676204,0.999114,1.2340680000000002e-81,0.003642
dobgomp,121,93,11253,0.996201,6.361699,0.998072,2.328315e-45,0.004801


## Casi minimi 500

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

Function,Nations,Days,Samples,Slope,Intercept,Rho,p-value,Std Err
logit,92,93,8556,0.992434,45.181528,0.997854,4.235672e-49,0.005574
gomp,92,93,8556,0.99763,9.068515,0.998818,2.596661e-85,0.004387
doblogit,92,93,8556,0.998922,4.774485,0.999465,1.414291e-99,0.002957
dobgomp,92,93,8556,0.998067,8.281315,0.999024,2.596661e-85,0.003828


## Casi minimi 1000

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

Function,Nations,Days,Samples,Slope,Intercept,Rho,p-value,Std Err
logit,74,93,6882,0.991556,56.038352,0.997811,5.26597e-49,0.00544
gomp,74,93,6882,0.997834,11.161796,0.998926,7.659408999999999e-90,0.004172
doblogit,74,93,6882,0.998959,5.89166,0.999477,1.7583080000000002e-99,0.002859
dobgomp,74,93,6882,0.998211,10.208164,0.999099,7.638054999999999e-90,0.00368


## Casi minimi 2000

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

Function,Nations,Days,Samples,Slope,Intercept,Rho,p-value,Std Err
logit,56,93,5208,0.990036,73.777508,0.997715,6.958602999999999e-49,0.005349
gomp,56,93,5208,0.998066,14.523613,0.999036,1.009149e-89,0.003929
doblogit,56,93,5208,0.999128,7.681236,0.999565,4.7740509999999997e-104,0.002637
dobgomp,56,93,5208,0.998301,13.343939,0.999135,1.009149e-89,0.003556


## Casi minimi 5000

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

Function,Nations,Days,Samples,Slope,Intercept,Rho,p-value,Std Err
logit,41,93,3813,0.987743,99.983467,0.997559,9.504434e-49,0.005199
gomp,41,93,3813,0.998371,19.291896,0.999185,4.524365e-102,0.003625
doblogit,41,93,3813,0.999239,10.252889,0.99962,6.520655e-104,0.002397
dobgomp,41,93,3813,0.998647,17.691137,0.999296,4.491526e-102,0.0032


## Casi minimi 10000

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

Function,Nations,Days,Samples,Slope,Intercept,Rho,p-value,Std Err
logit,28,93,2604,0.982971,145.397008,0.996885,1.391721e-48,0.005565
gomp,28,93,2604,0.998272,27.545053,0.999135,6.624963e-102,0.003597
doblogit,28,93,2604,0.999294,14.586274,0.999647,9.548100999999999e-104,0.002215
dobgomp,28,93,2604,0.998623,25.261813,0.999268,6.5768769999999995e-102,0.003096


## Casi minimi 20000

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=185.0), HTML(value='')))

Function,Nations,Days,Samples,Slope,Intercept,Rho,p-value,Std Err
logit,16,93,1488,0.972789,246.713997,0.995845,2.4355109999999998e-48,0.005453
gomp,16,93,1488,0.998892,43.022343,0.999445,1.241411e-104,0.002849
doblogit,16,93,1488,0.999469,24.092451,0.999735,3.873059e-110,0.001916
dobgomp,16,93,1488,0.99911,37.599265,0.999555,1.241515e-104,0.00236


In [83]:
from scipy.stats import f_oneway, ttest_ind

ANOVA = []

for country in _R:
    print(country)
    try:
        p_vals = []
        for i in range(_R[country]["measured"]["valore"].size):
            print(i)
            _, p_val = f_oneway(
                _R[country]["measured"]["valore"][i],
                _R[country]["logit"]["valore"][i],
                _R[country]["doblogit"]["valore"][i],
                _R[country]["gomp"]["valore"][i],
                _R[country]["dobgomp"]["valore"][i]
            )
            p_vals.append(p_val)
        ANOVA.append([
            country, np.mean(p_vals)
        ])
    except Exception as err:
        print(err)
        continue

Afghanistan
0
zero-dimensional arrays cannot be concatenated
Albania
0
zero-dimensional arrays cannot be concatenated
Algeria
0
zero-dimensional arrays cannot be concatenated
Andorra
0
zero-dimensional arrays cannot be concatenated
Angola
0
zero-dimensional arrays cannot be concatenated
Antigua and Barbuda
0
zero-dimensional arrays cannot be concatenated
Argentina
0
zero-dimensional arrays cannot be concatenated
Armenia
0
zero-dimensional arrays cannot be concatenated
Australia
0
zero-dimensional arrays cannot be concatenated
Austria
0
zero-dimensional arrays cannot be concatenated
Azerbaijan
0
zero-dimensional arrays cannot be concatenated
Bahamas
0
zero-dimensional arrays cannot be concatenated
Bahrain
0
zero-dimensional arrays cannot be concatenated
Bangladesh
0
'NoneType' object is not subscriptable
Barbados
0
zero-dimensional arrays cannot be concatenated
Belarus
0
zero-dimensional arrays cannot be concatenated
Belgium
0
zero-dimensional arrays cannot be concatenated
Belize
0
zero

In [78]:
ANOVAdf = pd.DataFrame(ANOVA, columns=["country", "ANOVA p-value"])
ANOVAdf.set_index("country", inplace=True)

In [79]:
ANOVAdf

Unnamed: 0_level_0,ANOVA p-value
country,Unnamed: 1_level_1


In [71]:
display(HTML(ANOVAdf.to_html()))

Unnamed: 0_level_0,ANOVA p-value
country,Unnamed: 1_level_1


In [82]:
_R[country]["measured"]["valore"].size

93

In [110]:
_R["Algeria"]["measured"]["valore"]

0        0
1        0
2        0
3        0
4        0
      ... 
88    2629
89    2718
90    2811
91    2910
92    3007
Name: valore, Length: 93, dtype: int64

In [133]:
f = []
for _ in range(93):
    f.append({"measured": [], "logit": [], "gomp": [], "doblogit": [], "dobgomp": []})
for country in _R:
    for func in f[0]:
        try:
            for i, v in enumerate(_R[country][func]["valore"]):
                f[i][func].append(v)
        except Exception as err:
            pass
            

In [134]:
f[0]

{'measured': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  548,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  2,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  2,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 'logit': [-5.0,
  -6.0,
  -16.0,
  -5.0,
  -0.0,
  0.0,
  -

In [118]:
F

[[],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 []]

In [153]:
_, p_val = f_oneway(
DAT["logit"][0]["std_err"],
#DAT["doblogit"][0]["std_err"],
#DAT["gomp"][0]["std_err"],
DAT["dobgomp"][0]["std_err"]
)

In [154]:
print(f"{p_val:.4f}")

0.0156
