In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

from IPython.display import display, HTML

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import datetime
from datetime import timedelta  

import urllib, json
import requests
import io


from scipy.optimize import curve_fit
from scipy.optimize import minimize
from scipy.special import loggamma

import statsmodels.api as sm

from sklearn.linear_model import LinearRegression

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as mtick
from matplotlib.ticker import NullFormatter
from matplotlib.ticker import FuncFormatter
from matplotlib.dates import WeekdayLocator
from matplotlib.dates import MonthLocator
from matplotlib.dates import AutoDateLocator
from matplotlib.pyplot import cm

import seaborn as sns


import math

!pip install lmfit
from lmfit import Minimizer, Parameters, report_fit

In [None]:
# wrapper to make a dict look like a class, to simplify access to members
# https://goodcode.io/articles/python-dict-object/
    
class objectview(object):
    def __init__(self, d):
        self.__dict__ = d
        
class objdict(dict):
    def __getattr__(self, name):
        if name in self:
            return self[name]
        else:
            raise AttributeError("No such attribute: " + name)

    def __setattr__(self, name, value):
        self[name] = value

    def __delattr__(self, name):
        if name in self:
            del self[name]
        else:
            raise AttributeError("No such attribute: " + name)
            
      
d = {'a': 1, 'b': 2}

o1 = objectview(d)
print(o1.a)

o2= objdict(d)
print(o2.b)
o2.c = 3
print(o2.c)

params = objdict({})
params.mixing   = 1
params.mixing_s = 1
params.phi      = 1
params.q        = 1
params.gamma    = 1/5
params.lag      = 1

print(params)


In [None]:
#To get the population at Country/Region, Province/State or county level; data from Johns Hopkins.
class DataPopulation():

    def __init__(self):
        url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv'
        r = requests.get(url).content
        self.data = pd.read_csv(io.StringIO(r.decode('utf-8')))  
        self.data['Admin2'] = self.data['Admin2'].fillna('')
        self.data['Province_State'] = self.data['Province_State'].fillna('')
        
        #get a dictionary of state abbreviations states['NY']='New York'
        url = 'https://worldpopulationreview.com/static/states/abbr-name.csv'
        r = requests.get(url).content
        data = pd.read_csv(io.StringIO(r.decode('utf-8')), header=None)  
        self.states = dict(data.values.tolist())
        #display(self.states)
  
    
    def get(self, country_region, province_state, county):
        d = self.data[self.data['Country_Region'] == country_region]
        d= d[d['Province_State'] == province_state]
        d = d[d['Admin2']==county]    
        return d['Population'].sum()

    def report(self, country_region, province_state):
        d  = self.data[self.data['Country_Region']==country_region]
        if province_state != '':
            d = d[d['Province_State']==province_state]
        return d
            
#dp = DataPopulation()
#display(HTML(dp.report('US','New York').to_html()))
#print(dp.get('US','New York','New York City'))



In [None]:

def load_kaggle_jh():
    train = pd.read_csv("../input/covid19-global-forecasting-week-4/train.csv")
    
    train['date'] = train['Date'].apply(lambda x: (datetime.datetime.strptime(x, '%Y-%m-%d')))

    train['death'] = train['Fatalities']
    train['positive'] = train['ConfirmedCases']

    train['Province_State'].fillna('',inplace=True)

    train['state'] = train['Province_State']
#    train.loc[train['Country_Region'].isin(Europe),'state']=train.loc[train['Country_Region'].isin(Europe),'Country_Region']

    train['region'] = train['Country_Region']
#    train.loc[train['Country_Region'].isin(Europe),'region']='EU'

    train['county'] = ''
    
    return train[['region','state','county','date','positive','death']]

def load_kaggle_week5():
    
    data = pd.read_csv("../input/covid19-global-forecasting-week-5/train.csv")
    #display(temp.head())

    data['Province_State'].fillna('',inplace=True)
    data['County'].fillna('',inplace=True)
    data['date'] = data['Date'].apply(lambda x: (datetime.datetime.strptime(x, '%Y-%m-%d')))

    return data

def reformat_week5(data,region, state, county):
    
    c = data[data['Country_Region']==region]
    c = c[c['Province_State']==state]
    c = c[c['County']==county]

    c = c.pivot_table(index =["Country_Region","Province_State","County","date"], columns = "Target", values = "TargetValue", aggfunc = "sum").reset_index()
    c = c.sort_values(by='date',ascending=True)
    #display(c)
    
    c = c.rename(columns={"Fatalities": "death", "ConfirmedCases": "positive", 'Country_Region':'region', 'Province_State':'state', 'County':'county'})
    
    c['death'] = c['death'].cumsum()
    c['positive'] = c['positive'].cumsum()
    
    return c
    
    

def load_covidtracking_states(abbrv):
    
    url = 'https://covidtracking.com/api/states/daily'
    
    r = requests.get(url)

    data = pd.DataFrame(r.json())
    data['date'] = pd.to_datetime(data['date'], format='%Y%m%d')
    #data = data.fillna(0)

    data['region'] = 'US'
    data['state'] = data['state'].replace(abbrv) #US_States_codes) #replace abbreviation by State's full name
    
    return data

def load_jhu_global():
    
    #--------------
    url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
    r = requests.get(url).content
    data = pd.read_csv(io.StringIO(r.decode('utf-8')))

    key_columns = ['Province/State','Country/Region','Lat','Long']
    d1 = pd.melt(data, id_vars=key_columns, var_name='date', value_name='death') 
    
    #--------------
    url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
    r = requests.get(url).content
    data = pd.read_csv(io.StringIO(r.decode('utf-8')))

    key_columns = ['Province/State','Country/Region','Lat','Long']
    d2 = pd.melt(data, id_vars=key_columns, var_name='date', value_name='positive') 
    
    #--------------
    d3 = d2.merge(d1, how='outer', on=key_columns.append('date'))
    d3['date'] = pd.to_datetime(d3['date'], format='%m/%d/%y').copy()   

    d3['region'] = d3['Country/Region'].fillna('')
    d3['state'] = d3['Province/State'].fillna('')
    d3['county'] = ''
    return d3[['region','state','county','date','positive','death']]    
    
def load_jhu_counties():
    
    #--------------
    url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv'
    r = requests.get(url).content
    data = pd.read_csv(io.StringIO(r.decode('utf-8')))

    key_columns = ['UID','iso2','iso3','code3','FIPS','Admin2','Province_State','Country_Region','Lat','Long_','Combined_Key','Population']
    d1 = pd.melt(data, id_vars=key_columns, var_name='date', value_name='death') 

    #--------------
    url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv'
    r = requests.get(url).content
    data = pd.read_csv(io.StringIO(r.decode('utf-8')))
    
    key_columns = ['UID','iso2','iso3','code3','FIPS','Admin2','Province_State','Country_Region','Lat','Long_','Combined_Key'] #population is only in the death file
    d2 = pd.melt(data, id_vars=key_columns, var_name='date', value_name='positive') 
    
    #--------------
    d3 = d2.merge(d1, how='outer', on=key_columns.append('date'))
    d3['date'] = pd.to_datetime(d3['date'], format='%m/%d/%y').copy()   
    
    d3['region'] = 'US'
    d3['state'] = d3['Province_State']
    d3['county'] = d3['Admin2'].fillna('')
    return d3[['region','state','county','Population','date','positive','death']]

def load_nytimes_counties():
    url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
    r = requests.get(url).content
    data = pd.read_csv(io.StringIO(r.decode('utf-8')))
    data['date'] = pd.to_datetime(data['date'], format='%Y-%m-%d').copy()   
    data = data.rename(columns={"deaths": "death", "cases": "positive"})
    data['region'] = 'US'
    return data


    
class Database():
    
    def __init__(self):
        
        #initialize population database from Johns Hopkins data set
        self.population = DataPopulation()
        
        #load global data from Johns Hopkins University
        self.jhGlobal = load_jhu_global()
        
        #load US county-level data from Johns Hopkins
        self.jhUS = load_jhu_counties()

        self.ctsData = load_covidtracking_states(self.population.states)
        
        
        self.kData   = load_kaggle_jh()   #Week4 and before format
        self.kData5   = load_kaggle_week5() #Week5 format
            
        self.ntData  = load_nytimes_counties()

        
    def get(self, source, region, state, county, cutoff_positive, cutoff_death, truncate):
        
        population = self.population.get(region, state, county)
        
        if source == 'Johns Hopkins':
            if region == 'US':
                data = self.jhUS
            else:
                data = self.jhGlobal

        if source == 'NY Times':
            data = self.ntData
        
        if source == 'CovidTracking':
            data = self.ctsData
            
        if source == 'Kaggle4':
            data = self.kData
            
        if source == 'Kaggle5':
            data = reformat_week5(self.kData5, region, state, county)
                
        
        c = data[data['region']==region]
        if state != '':
            c = c[c['state']==state]
            if county != '':
                c = c[c['county']==county]

        c = c.groupby(['date']).sum().reset_index()  #aggregate county data to state level
        c = c.sort_values(by='date', ascending=True)

        #find the first date when the positive count cutoff was reached by this STATE, and keep only these days for calibration
        minDateP = c[c['positive']>cutoff_positive]['date'].min()
        minDateD = c[c['death']>cutoff_death]['date'].min()
        minDate = min(minDateP, minDateD)

        #keep only the records after as the earliest cutoff has been reached
        c = c[c['date']>=minDate].copy()  

        #keep only the given number of days from the beginning, or remove the given number of days from the end
        if truncate != 0:
            c = c[:truncate].copy()  #keep only the given number of days

        #calculate the number of days since the cutoff
        c['Days'] = (c['date'] - minDate) / np.timedelta64(1, 'D')

        x = c['Days'].to_numpy().copy()
        positives = c['positive'].to_numpy().copy()
        fatalities = c['death'].to_numpy().copy()

        return population, c['date'], x, positives, fatalities, (minDateP-minDate).days, (minDateD-minDate).days

class Data():
    
    database = Database()
    
    def __init__(self, source="Johns Hopkins", region="US", state="New York", county="", cutoff_positive=10, cutoff_death=10, truncate=0):
        self.source = source
        self.region = region
        self.state = state
        self.county = county
        self.cutoff_positive = cutoff_positive
        self.cutoff_death = cutoff_death
        self.truncate = truncate

        self.population, self.xd, self.x, self.positives, self.fatalities, self.minP, self.minD = self.database.get(source=source, region=region, state=state, county=county, cutoff_positive=cutoff_positive, cutoff_death=cutoff_death, truncate=truncate)
        self.minDate = self.xd.iat[0]

        self.dfatalities = np.diff(self.fatalities[self.minD:]).astype(float)
        self.dfatalities[self.dfatalities <= 0] = np.nan

        self.dpositives = np.diff(self.positives[self.minP:]).astype(float)
        self.dpositives[self.dpositives <= 0] = np.nan       
        
    


In [None]:
#COMPARE DATA SOURCES for the US
#-------------------------------

def format_plot(ax, scale='linear', title=''):
    
    ax.grid(axis='y', which='both')
    ax.grid(axis='x', which='major')
    ax.legend()
    ax.set_title(title)
    
    #ax.yaxis.set_major_formatter(FuncFormatter(lambda x, p: '{:.1f}'.format(x)))       
    ax.set_yscale(scale)
    ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:,g}'.format(y)))
    if scale=='log':
        ax.set_ylim(bottom=1)

    ax.xaxis.set_major_locator(WeekdayLocator())
    #ax.xaxis.set_minor_locator(WeekdayLocator())
    #ax.xaxis.set_major_locator(AutoDateLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b-%d'))    


d1 = Data(source='Johns Hopkins', region='US', state='New York', county='', cutoff_positive=1, cutoff_death=1, truncate=0)
d2 = Data(source='NY Times', region='US', state='New York', county='', cutoff_positive=1, cutoff_death=1, truncate=0)
d3 = Data(source='CovidTracking', region='US', state='New York', county='', cutoff_positive=1, cutoff_death=1, truncate=0)
d4 = Data(source='Kaggle4', region='US', state='New York', county='', cutoff_positive=1, cutoff_death=1, truncate=0)
d5 = Data(source='Kaggle5', region='US', state='New York', county='', cutoff_positive=1, cutoff_death=1, truncate=0)

plots=[d1,d2,d3,d4,d5]
fig,axs = plt.subplots(2,2,figsize=(24,12))

markers=['o:','1:','2:','3-','x:']
for idx, plot in enumerate(plots):
    axs[0][0].plot(plot.xd, plot.fatalities, markers[idx], fillstyle='none',label=plot.source)    
    axs[1][0].plot(plot.xd, plot.positives, markers[idx], fillstyle='none', label=plot.source)    

    axs[0][1].plot(plot.xd[plot.minD+1:], plot.dfatalities, markers[idx], fillstyle='none', label=plot.source)    
    axs[1][1].plot(plot.xd[plot.minP+1:], plot.dpositives, markers[idx], fillstyle='none', label=plot.source)    

titles = ['Cumul Fatalities', 'Daily Fatalities', 'Cumul Positive', 'Daily Positives']
i=0
for ax in axs:
    for a in ax:
        format_plot(a,'linear',titles[i])
        i=i+1
fig.autofmt_xdate() 

print('d1.Population=',d1.population)


In [None]:
#COMPARE DATA SOURCES for other regions
#--------------------------------------

region = 'Italy'
state  = ''

d1 = Data(source='Johns Hopkins', region=region, state=state, county='', cutoff_positive=1, cutoff_death=1, truncate=0)
d4 = Data(source='Kaggle4', region=region, state=state, county='', cutoff_positive=1, cutoff_death=1, truncate=0)
d5 = Data(source='Kaggle5', region=region, state=state, county='', cutoff_positive=1, cutoff_death=1, truncate=0)

plots=[d1,d4,d5]
fig,axs = plt.subplots(2,2,figsize=(24,12))

markers=['o:','2:','3-','x:']
for idx, plot in enumerate(plots):
    axs[0][0].plot(plot.xd, plot.fatalities, markers[idx], fillstyle='none',label=plot.source)    
    axs[1][0].plot(plot.xd, plot.positives, markers[idx], fillstyle='none', label=plot.source)    

    axs[0][1].plot(plot.xd[plot.minD+1:], plot.dfatalities, markers[idx], fillstyle='none', label=plot.source)    
    axs[1][1].plot(plot.xd[plot.minP+1:], plot.dpositives, markers[idx], fillstyle='none', label=plot.source)    

titles = ['Cumul Fatalities', 'Daily Fatalities', 'Cumul Positive', 'Daily Positives']
i=0
for ax in axs:
    for a in ax:
        format_plot(a,'linear',titles[i])
        i=i+1
fig.autofmt_xdate() 

print('d1.Population=',d1.population)


In [None]:

# t<t1    => beta0 const
# t>t2    => beta2 const
# t1<t<t2 => cubic to be continuous and differentiable at t1 and t2
def smooth_step(x, params):
    beta0 = params['beta0']
    beta2 = params['beta2']
    t1    = params['t1']
    t2    = params['t2']

    b = beta0 * np.ones_like(x)   #t<t1

    s = (x-t1)/(t2-t1)   #s is in [0,1]
    b = np.where( (t1<=x)&(x<=t2), beta2 + (beta0-beta2)*(2*s**3 - 3*s**2 +1), b)   # y = 2.s^3 -3.s^2 + 1 has the C1 property we want for the result at t1 and t2

    b = np.where( (t2<x), beta2, b)
    
    return b

x = np.arange(0,100)
y = np.zeros(100)
y = smooth_step(x, {'beta0':4/7, 'beta2':2/7, 't1': 20, 't2': 60})
plt.plot(x, y,label='interv')
plt.grid()

def piecewiselin(x, params):  
    #r  = params[0]
    b0 = params['beta0']
    n  = params['segments'] #number of segments
    init_beta = params['init_beta']

    bi1 = b0
    ti1 = x[0]

    if n==0:
        b = b0 * np.ones(len(x))
    else:
        b = np.zeros(len(x))
        i=1
        while i<=n:

            bi = params['beta{}'.format(i)]
            ti = params['t{}'.format(i)]
            
            if i==1 and init_beta == 'const':
                b = np.where( (ti1<=x)&(x<=ti), bi1, b)
            else:
                b = np.where( (ti1<=x)&(x<=ti), (x-ti1)*(bi-bi1)/(ti-ti1)+bi1, b)
            i=i+1
            bi1=bi
            ti1=ti

        b = np.where(x>=ti,bi,b)
    
    return b

x = np.arange(100)
b0 = piecewiselin(x, {'init_beta':'', 'segments':0, 'beta0':3/7})
b1 = piecewiselin(x, {'init_beta':'', 'segments':1, 't1':20, 'beta0':3.1/7, 'beta1':2.1/7})
b2 = piecewiselin(x, {'init_beta':'const', 'segments':2, 't1':20, 't2':80, 'beta0':3.2/7, 'beta1':2.2/7, 'beta2':1/7})
b2l = piecewiselin(x, {'init_beta':'', 'segments':2, 't1':20, 't2':80, 'beta0':3.2/7, 'beta1':2.2/7, 'beta2':1/7})
plt.plot(x,b0,label='b0')
plt.plot(x,b0,label='b0')
plt.plot(x,b1,label='b1')
plt.plot(x,b2,label='b2')
plt.plot(x,b2l,label='b2l')
plt.grid()
plt.legend()
plt.show()


def piecewiseconst(x, params):  
    b0 = params['beta0']
    n  = params['segments'] #number of segments


    b = b0 * np.ones(len(x))
    i=1
    while i<=n:
        
        bi = params['beta{}'.format(i)]
        ti = params['t{}'.format(i)]
        b = np.where( x>=ti, bi, b)
        i=i+1
        
    return b


def testing_rate(x, params):  

    n  = params['testing_segments'] #number of segments
    ri1 = params['detection_rate']
    ti1 = x[0]

    if n==0:
        r = ri1 * np.ones(len(x))
    else:
        r = np.zeros(len(x))
        i=1
        while i<=n:

            ri = params['detection_rate{}'.format(i)]
            ti = params['testing_time{}'.format(i)]

            r = np.where( (ti1<=x)&(x<=ti), (x-ti1)*(ri-ri1)/(ti-ti1)+ri1, r)
            i=i+1
            ri1=ri
            ti1=ti

        r = np.where(x>=ti,ri,r)
    
    return r

plt.plot(x, testing_rate(x, {'testing_segments':2, 'detection_rate':5e-2,'testing_time1':20, 'detection_rate1':10e-2, 'testing_time2':30, 'detection_rate2':20e-2}),label='testing')
plt.grid()
plt.legend()
plt.show()


#-------------------------------------------------------
#calculate the contact rate over time, according to the profile given by the intervention function and the calibration params (betai, ti)
def contact_rate(x,params):

    interv_functions = {
                        'smooth step'        : smooth_step, 
                        'piecewise linear'   : piecewiselin,
                        'piecewise constant' : piecewiseconst
                       }

    intervention    = params['interv']
    
    if intervention in interv_functions:
        interv_func = interv_functions[intervention]
        interv = interv_func(x, params)
    else:
        interv = beta0 * np.ones_like(x)
    
    return interv



x = np.arange(100)
b0 = piecewiseconst(x, {'segments':0, 'beta0':3/7})
b1 = piecewiseconst(x, {'segments':1, 't1':20, 'beta0':3.1/7, 'beta1':2.1/7})
b2 = piecewiseconst(x, {'segments':2, 't1':20, 't2':80, 'beta0':3.2/7, 'beta1':2.2/7, 'beta2':1.3/7})
plt.plot(x,b0,label='const 0')
plt.plot(x,b1,label='const 1')
plt.plot(x,b2,label='const 2')
plt.grid()
plt.legend()
plt.show()



def seed_infection(x, params):
    init = params['seed_init']
    halflife = params['seed_halflife']
    return init * np.power(0.5, x/halflife)

plt.plot(x, seed_infection(x,{'seed_init':10, 'seed_halflife':5}),label='seed')
plt.grid() 
plt.legend()
plt.show()



In [None]:
#######################################################
# SIR models
########################################################

#-------------------------------------------------------
# basic daily integration of a modified SIR model
# the function returns a numpy matrix, with a row per day and the following columns (cumulative results since day of inception)
cS   = 0  #Susceptible people
cE   = 1  #Exposed, incubating but not infectious
cI   = 2  #Infectious
cT   = 3  #Testing
cR   = 4  #Recovered after infectious (cumulative)
cC   = 5  #Critical: seriously ill after initial infectious period, will die in the next period; assume isolated, so they are not contaminating other people
cF   = 6  #Fatalities (cumulative)
cP   = 7  #Positive cases (cumulative results of positive tests, recovered people are not included)
cI1  = 8
cI2  = 9
cNum = 10


def SEIRF(x, params):

    population      = params['population']
    e0              = params['e0']
    i0              = params['i0']
    t0              = params['t0']
    p0              = params['p0']
    f0              = params['f0']
    c0              = params['c0']
    beta0           = params['beta0']
    gamma_incub     = params['gamma_incub']
    gamma_infec     = params['gamma_infec']
    gamma_crit      = params['gamma_crit']
    gamma_pos       = params['gamma_pos']
    death_rate      = params['death_rate']
    detection_rate  = params['detection_rate']      
    mixing          = params['mixing']
    intervention    = params['interv']
    
    #array of results, the columns are indexed by cS, cE, etc.
    y = np.zeros((x.size,cNum))

    
    #force introduction of exposed people into the population
    #at a decreasing rate over time
    #the function seed_infection needs 'seed_init' and 'seed_halflife' and calculates init * 0.5^(t/halflife)  (where t=0 is the first day corresonding to min(minD, minP))
    if 'seed' in params and params['seed']==True:
        seed = seed_infection(x, params)
    else:
        seed = np.zeros_like(x)

    interv = contact_rate(x, params)   
    
    detect = testing_rate(x, params)
    
    for i in range(0,x.size):
        
        if i==0:
            
            #initial conditions
            exposed    = e0
            infectious = i0
            testing    = t0
            positives  = p0    
            fatalities = f0    
            critical   = c0
            
            recovered = (f0+c0) / death_rate
            
            susceptible = population - exposed - infectious - critical - fatalities - recovered
          
        else:    
            
            #beta = intervention(i, beta0, beta2, t1, t2)
            beta = interv[i]
            detection_rate = detect[i]
    
            newlyexposed = beta * susceptible * (infectious/population)**mixing  + seed[i]
            newlyinfectious = gamma_incub * exposed
            newlycritical = death_rate * gamma_infec * infectious
            newlyrecovered = (1-death_rate) * gamma_infec * infectious
            newfatalities = gamma_crit * critical
        
            d_susceptible = - newlyexposed
            
            d_exposed = newlyexposed - newlyinfectious

            d_testing = detection_rate * newlyinfectious - gamma_pos * testing
            d_positives = gamma_pos * testing

            d_infectious = newlyinfectious - newlycritical - newlyrecovered
            
            d_recovered = newlyrecovered
            
            d_critical = newlycritical - newfatalities
            
            d_fatalities = newfatalities
            
            susceptible += d_susceptible
            exposed     += d_exposed
            positives   += d_positives
            infectious  += d_infectious
            testing     += d_testing
            recovered   += d_recovered
            critical    += d_critical
            fatalities  += d_fatalities

        y[i,cS] = susceptible
        y[i,cE] = exposed
        y[i,cI] = infectious
        y[i,cT] = testing
        y[i,cR] = recovered
        y[i,cC] = critical
        y[i,cF] = fatalities
        y[i,cP] = positives  
            
    return y            

#-------------------------------------------------------
#initialize the SEIRF model from a given i0 and beta, assuming we are in the early exponential growth with coherent number of exposed, infectious, etc..
# in early stages, when S ~ 1 : assume 
# E = alpha * I and 
# C = b * I 
# I = i0 * exp[(beta0-gamma_infec)*t]
# alpha and b can be solved from the dynamics
    
def init_SEIRF(i0, beta0, constants):

    gamma_infec     = constants['gamma_infec']
    gamma_incub     = constants['gamma_incub']
    gamma_crit      = constants['gamma_crit']
    gamma_pos       = constants['gamma_pos']
    death_rate      = constants['death_rate']
    detection_rate  = constants['detection_rate']
    
    e = gamma_infec/gamma_incub
    disc = (1-e)**2 + 4*e*beta0/gamma_infec
    a = (-(1-e)+math.sqrt(disc))/2
    
    b = death_rate * gamma_infec / (gamma_incub * a + gamma_crit - gamma_infec )
    
    mu = detection_rate * gamma_incub * a / (gamma_incub*a + gamma_pos - gamma_infec)
    
    p = constants.copy()
    p['i0'] = i0
    p['beta0'] = beta0
    p['e0'] = a * i0
    p['c0'] = b * i0
    p['t0'] = mu * i0
    
    return p

def init_SEIRF_doubling(fr, doubling, constants):
    gamma_infec     = constants['gamma_infec']
    gamma_incub     = constants['gamma_incub']
    gamma_crit      = constants['gamma_crit']
    gamma_pos       = constants['gamma_pos']
    death_rate      = constants['death_rate']
    detection_rate  = constants['detection_rate']

    #(1) E' = beta * S * I - gamma_incub * E
    #(2) I' = gamma_incub * E - gamma_infec * I
    #(3) C' = death_rate * gamma_infec * I - gamma_crit * C
    #(4) F' = gamma_crit * C
    #(5) T' = detection_rate * gamma_incub * E - gamma_pos * T
    #(6) P' = gamma_pos * T
    
    # in early stages, when S ~ 1 : assume 
    #(7) E = alpha * I and 
    #(8) C = b * I 
    # from data, calibrate ra, fa to 
    #(9) I' = ra * exp(fa*t)  ; doubling_time = ln(2)/fa
    
    # from (2) and (7):
    #(10) I' = (gamma_incub * alpha - gamma_infec)*I => I = I0 * exp[(gamma_incub * alpha - gamma_infec)*t]
    #(11) by matching growth rate in (9) and (10)  alpha = (fa + gamma_infec) / gamma_incub
    
    #(12) from (1), (7) E' = (beta / alpha - gamma_incub)* E => E = E0 * exp[(beta / alpha - gamma_incub)*t]
    #(13) by matching growth rate in (12) and (10) beta = (fa + gamma_incub)*alpha
    
    fa = math.log(2)/doubling
    
    alpha = (fa + gamma_infec)/gamma_incub
    beta  = (fa + gamma_incub)*alpha
    b = death_rate * gamma_infec / (fa + gamma_crit)
    mu = detection_rate * gamma_incub * alpha / (fa + gamma_pos)

    i0 = fr * (gamma_incub * alpha + gamma_crit - gamma_infec) / (death_rate * gamma_crit * gamma_infec)
    c0 = b * i0
    e0 = alpha * i0
    
    p = constants.copy()
    p['i0']   = i0
    p['beta0'] = beta
    p['e0']   = alpha * i0
    p['c0']   = b * i0
    p['t0']   = mu * i0
    
    return p

#use same columns as SEIRF
def SIRF(x, params):

    population      = params['population']
    i0              = params['i0']
    p0              = params['p0']
    f0              = params['f0']
    gamma           = params['gamma_infec']
    death_rate      = params['death_rate']    
    detection_rate  = params['detection_rate']    
    mixing          = params['mixing']
    
    y = np.zeros((x.size,cNum))

    if 'seed' in params and params['seed']==True:
        seed = seed_infection(x, params)
    else:
        seed = np.zeros_like(x)
    
    interv = contact_rate(x, params)   

#    if intervention=='piecewise linear':
#        interv = piecewiselin(x, params) 
#    else:
#        #intervention=='piecewise constant':
#        interv = piecewiseconst(x, params) 
    
    for i in range(0,x.size):
        
        if i==0:
            
            #initial conditions
            infectious = i0
            positives  = p0    
            fatalities = f0    
            recovered = f0 / death_rate
            
            susceptible = population - infectious  - fatalities - recovered
          
        else:    

            beta = interv[i]

            newlyinfectious = beta * susceptible * math.pow(infectious,mixing) / population + seed[i]
            newfatalities = death_rate * gamma * infectious
            newlyrecovered = (1-death_rate) * gamma * infectious
        
            d_susceptible = - newlyinfectious
            
            d_positives = detection_rate * newlyinfectious

            d_infectious = newlyinfectious - newlyrecovered - newfatalities
            
            d_recovered = newlyrecovered
            
            d_fatalities = newfatalities
            
            susceptible += d_susceptible
            positives   += d_positives
            infectious  += d_infectious
            recovered   += d_recovered
            fatalities  += d_fatalities

        y[i,cS] = susceptible
        y[i,cI] = infectious
        y[i,cR] = recovered
        y[i,cF] = fatalities
        y[i,cP] = positives  
            
    return y            

#-------------------------------------------------------


params = {}
params['population']      = 1e6
params['p0']              = 1
params['f0']              = 1
params['beta0']           = 2/7
params['i0']              = 10
params['gamma_incub']     = 1/4
params['gamma_infec']     = 1/7
params['gamma_pos']       = 1/14
params['gamma_crit']      = 1/14
params['death_rate']      = 0.5e-2
params['detection_rate']  = 5e-2
params['testing_segments'] = 0
params['mixing']          = 1
params['segments']        = 0
params['interv']          = 'piecewise linear'   #'piecewise constant'   'smooth step'
params['init_beta']       = ''  #'const'
params = init_SEIRF(params['i0'], params['beta0'], params)

#params1 = params.copy()
#params1['segments'] = 2
#params1['t1']  = 20
#params1['beta1'] = params1['beta0']
#params1['t2']  = 27
#params1['beta2'] = 1.9/7

params1 = params.copy()
params1['testing_segments'] = 2
params1['testing_time1']  = 20
params1['detection_rate1'] = 2*params1['detection_rate']
params1['testing_time2']  = 50
params1['detection_rate2'] = 3*params1['detection_rate']

print('-------------')
for i,(k,v) in enumerate(params1.items()):
    print(k,'\t\t:',v)
    
x = np.arange(200)
y0 = SEIRF(x, params)
y1 = SEIRF(x, params1)
interv0         = contact_rate(x, params) 
interv1         = contact_rate(x, params1) 

fix, axs = plt.subplots(2,3, figsize=(18,12))

axs[0][0].plot(x, y0[:,cI], label='SEIRF 0')
axs[0][0].plot(x, y1[:,cI], label='SEIRF 1')

axs[0][1].plot(x, y1[:,cP] - y0[:,cP], label='diff P')

axs[1][0].plot(x, interv0, label='interv 0')
axs[1][1].plot(x, interv0, label='interv 0')
axs[1][0].plot(x, interv1, label='interv 1')
axs[1][1].plot(x, interv1, label='interv 1')

for ax in axs:
    for a in ax:
        a.grid()
        a.legend()
        #a.set_yscale('log')
plt.show()




In [None]:
#MAXIMUM LIKELIHOOD
#GENERAL THEORY OF FITTING EPIDEMIOLOGIVAL MODEL: https://www.sciencedirect.com/science/article/pii/S2468042719300491
#INTRO TO MLE, POISSON and NEGATIVE BINOMIAL
#https://towardsdatascience.com/a-gentle-introduction-to-maximum-likelihood-estimation-9fbff27ea12f
#https://towardsdatascience.com/an-illustrated-guide-to-the-poisson-regression-model-50cccba15958
#https://towardsdatascience.com/negative-binomial-regression-f99031bb25b4
#THEORY OF POISSON+GAMMA MIXTURE and equivalence to Negative Binomial https://gregorygundersen.com/blog/2019/09/16/poisson-gamma-nb/
#Generalized Linear Model and STATSMODELS:
#https://towardsdatascience.com/generalized-linear-models-9cbf848bb8ab
#https://www.statsmodels.org/stable/glm.html   
#HOW TO CREATE CUSTOM MODEL FOR STATSMODELS https://austinrochford.com/posts/2015-03-03-mle-python-statsmodels.html 
from scipy import stats
#stats.chisqprob = lambda chisq, df: stats.chi2.sf(chisq, df)

def model_expgrowth(x, params, constants):  
    r = params[0]
    b = params[1]
    return np.exp(b * x) * r

def model_expgrowthquad(x, params, constants):  
    r = params[0]
    b2 = params[1]
    b1 = params[2]
    return np.exp(b2 * np.power(x,2) + b1 * x) * r


def loglik_leastsquarerel(y, yhat, alpha):
    #y[] are observed values; yhat are model prediction
    leastsquare = np.nansum( (np.log(yhat)-np.log(y))**2  )  
    return leastsquare

def loglik_leastsquare(y, yhat, alpha):
    #y[] are observed values; yhat are model prediction
    leastsquare = np.nansum( (yhat-y)**2  )  
    return leastsquare

def loglik_poisson(y, yhat, alpha):
    #y[] are observed values; yhat are model prediction
    negLL = -np.nansum( -yhat + y * np.log(yhat)  )  #removed constant terms for minimization
    return negLL

def loglik_negbin(y, yhat, alpha):
    #y[] are observed values; yhat are model prediction
    r = 1/alpha
    #negLL = -np.nansum( loggamma(y+r) -loggamma(y+1) - loggamma(r) + y*np.log(yhat) + r*np.log(r) - (y+r)*np.log(yhat+r) )
    negLL = -np.nansum( y*np.log(yhat)  - (y+r)*np.log(yhat+r) )  #removed constant terms to speed minimization <more sensitive to initial guess ???
    return negLL       

#fit a model using maximum likelihood
def fit_model(x, y, model_func, constants, loglik_func, guess, bounds, alpha=1):
   
    #this function is called by the scipy's minimize()
    #it returns the negative log likelihood of the model prediction given the model parameters (optimization target) and the constants
    #it is closure to access calibration data x (to compute the prediction) and y (to compute the likelihood)
    def regression_func(params, constants):
        #make a prediction using the given model params
        yhat = model_func(x, params, constants)
        # compute negative of likelihood 
        negLL = loglik_func(y, yhat, alpha)
        return negLL    
    
    mle = minimize(regression_func, x0=guess, bounds=bounds, args=constants, method="L-BFGS-B")#, method ="Nelder-Mead")
    #display(mle)
    
    res = model_func(x, mle.x, constants)
    return mle.x, mle.fun, res    #mle.x is the array of calibrated model parameters; mle.fun is the loglikelihood (without constant terms); res is the model forecast for input range

#find the inflection point in the data
def findsplit(x, y, model_func, constants, loglik_func, guess, bounds, n_min, n_max, window=0, alpha=1, conf=0.05):
    
    #calculate likelihood of fit at every possible split
    for split in range(n_min, n_max):
        
        params_left, mle_left, res_l = fit_model(x[:split-window], y[:split-window], model_func, constants, loglik_func, guess, bounds, alpha)
        params_right, mle_right, res_r = fit_model(x[split+window:], y[split+window:], model_func, constants, loglik_func, guess, bounds, alpha)

        if (split==n_min) or (mle_left+mle_right <= min_mle):
            min_split = split
            min_mle = mle_left+mle_right
            res_left = res_l
            res_right = res_r
            p_left = params_left
            p_right = params_right
    
    #calculate liklihood of fit over entire range, without split
    params, mle, res = fit_model(x, y, model_func, constants, loglik_func, guess, bounds, alpha)

    #test for significant improvement if we split the range in two
    lr = - 2 * (min_mle - mle)
    p = stats.chi2.sf(lr, 2) #2 more degrees of freedom in split regression than in full regression
    #print('split ll:{:,.0f} full ll:{:,.0f} lr:{:,.0f} p:{}'.format(min_mle, mle, lr, p))
    
    if p>conf:
        min_split = 0
        min_mle = mle
        res_left = res
        res_right = []
        p_left = params
        p_right = []
    
    buff = np.empty(2*window)
    buff[:]=np.nan
    res = np.append(res_left, buff)
    res = np.append(res, res_right)
    
    r = objdict({})
    r.Split = min_split
    r.Stages = []
    r.Stages.append(p_left)
    r.Stages.append(p_right)
    r.Predict = res

    return r #min_split, p_left, p_right, res   #min_split will be zero if the optimal solution is no split; params are in the left variables


#------------------------------
#estimate alpha for negative binomial
#https://dius.com.au/2017/08/03/using-statsmodels-glms-to-model-beverage-consumption/
def calc_alpha(data,fit):
    var = (np.power(data-fit,2) - data)/fit
    X = fit[:,np.newaxis]
    ols = LinearRegression(fit_intercept=False).fit(np.nan_to_num(X), np.nan_to_num(var))
    alpha = ols.coef_[0]
    return alpha




In [None]:

def study(source, region, state, cutoff_positive,cutoff_death, init_window, recent_window, truncate):

    print('------------')
    print(source,': ', region, '-', state)
    
    d = Data(source=source, region=region, state=state, county="", cutoff_positive=cutoff_positive, cutoff_death=cutoff_death, truncate=truncate)

    n = len(d.fatalities)
    print('n:{} minD:{}, minP:{}'.format(n,d.minD, d.minP))

    rpt={'state':state}

    #--------------------------------
    fig, axs = plt.subplots(2,3,figsize=(18,12))

    axs[0][0].set_title('{} {} - cumul counts - log scale'.format(d.region, d.state))
    axs[0][0].plot(d.x, d.fatalities,'r+:', label='fatalities')
    axs[0][0].plot(d.x, d.positives,'b+:', label='positives')

    axs[0][1].set_title('{} {} - fatalities - linear scale'.format(d.region, d.state))
    axs[0][1].plot(d.x, d.fatalities,'r+:', label='fatalities')

    axs[0][2].set_title('{} {} - positives - linear scale'.format(d.region, d.state))
    axs[0][2].plot(d.x, d.positives,'b+:', label='positives')

    axs[1][0].set_title('{} {} - daily counts - log scale'.format(d.region, d.state))
    axs[1][0].plot(d.x[d.minD+1:], d.dfatalities,'r+:', label='fatalities')
    axs[1][0].plot(d.x[d.minP+1:], d.dpositives,'b+:', label='positives')

    axs[1][1].set_title('{} {} - daily fatalities - linear scale'.format(d.region, d.state))
    axs[1][1].plot(d.x[d.minD+1:], d.dfatalities,'r+:', label='fatalities')

    axs[1][2].set_title('{} {} - daily positives - linear scale'.format(d.region, d.state))
    axs[1][2].plot(d.x[d.minP+1:], d.dpositives,'b+:', label='positives')


    window=init_window
  
    params, mle, predict = fit_model(d.x[d.minD:d.minD+window], d.fatalities[d.minD:d.minD+window], model_expgrowth, [], loglik_poisson, [1,3/7], [(1e-6,1e6),(-5/7,10/7)], alpha=1)
    axs[0][0].plot(d.x[d.minD:d.minD+window], predict,'k-')
    axs[0][1].plot(d.x[d.minD:d.minD+window], predict,'k-')
    rpt['Initial Fatalities Doubling Time']=math.log(2)/params[1]
    print("Initial Fatalities Doubling Time over first {} days:{:.1f}".format(window, math.log(2)/params[1]))

    
    params, mle, predict = fit_model(d.x[d.minD:d.minD+window], d.dfatalities[:window], model_expgrowth, [], loglik_poisson, [1,3/7], [(1e-6,1e6),(-5/7,10/7)], alpha=1)
    axs[1][0].plot(d.x[d.minD:d.minD+window], predict,'k-')
    axs[1][1].plot(d.x[d.minD:d.minD+window], predict,'k-')
    rpt['Initial Daily Fatalities Doubling Time']=math.log(2)/params[1]
    print("Initial Daily Fatalities Doubling Time over first {} days:{:.1f}".format(window, math.log(2)/params[1]))

    params, mle, predict = fit_model(d.x[d.minD:d.minD+window], d.positives[d.minD:d.minD+window], model_expgrowth, [], loglik_poisson, [1,3/7], [(1e-3,1e6),(-5/7,10/7)], alpha=1)
    axs[0][0].plot(d.x[d.minD:d.minD+window], predict,'k-')
    axs[0][2].plot(d.x[d.minD:d.minD+window], predict,'k-')
    rpt['Initial Positive Doubling Time']=math.log(2)/params[1]
    print("Initial Positives Doubling Time over first {} days:{:.1f}".format(window, math.log(2)/params[1]))

    params, mle, predict = fit_model(d.x[d.minD:d.minD+window], d.dpositives[d.minD:d.minD+window], model_expgrowth, [], loglik_poisson, [1,3/7], [(1e-3,1e6),(-5/7,10/7)], alpha=1)
    axs[1][0].plot(d.x[d.minD:d.minD+window], predict,'k-')
    axs[1][2].plot(d.x[d.minD:d.minD+window], predict,'k-')
    rpt['Initial Daily Positive Doubling Time']=math.log(2)/params[1]
    print("Initial Daily Positives Doubling Time over first {} days:{:.1f}".format(window, math.log(2)/params[1]))


    window=recent_window

    params, mle, predict = fit_model(d.x[-window:], d.dfatalities[-window:], model_expgrowth, [], loglik_poisson, [1,0.7/7], [(1e-3,1e6),(-5/7,5/7)], alpha=1)
    axs[1][0].plot(d.x[-window:], predict,'k-')
    axs[1][1].plot(d.x[-window:], predict,'k-')
    rpt['Recent Fatalities Doubling Time']=math.log(2)/params[1]
    print("Recent Fatalities Doubling Time over last {} days:{:.1f}".format(window, math.log(2)/params[1]))

    params, mle, predict = fit_model(d.x[-window:], d.dpositives[-window:], model_expgrowth, [], loglik_poisson, [1,0.7/7], [(1e-3,1e6),(-5/7,5/7)], alpha=1)
    axs[1][0].plot(d.x[-window:], predict,'k-')
    axs[1][2].plot(d.x[-window:], predict,'k-')
    rpt['Recent Positive Doubling Time']=math.log(2)/params[1]
    print("Recent Positives Doubling Time over last {} days:{:.1f}".format(window, math.log(2)/params[1]))

    for ax in axs:
        ax[0].set_yscale('log')
        for a in ax:
            a.legend()
            a.grid()
    plt.show()
    
    return rpt

#rpt=[]
#rpt.append(study(source='Johns Hopkins', region='US', state='New York', cutoff_positive=1, cutoff_death=1, init_window=14, recent_window=42, truncate=0))
#rpt.append(study(source='Johns Hopkins', region='US', state='New York', cutoff_positive=1, cutoff_death=1, init_window=14, recent_window=14, truncate=38))
#print(rpt)


In [None]:
import warnings                                  # `do not disturbe` mode
warnings.filterwarnings('ignore')

def update_model(params, constants):

    m        = constants['segments']

    n        = constants['n']
    ti1      = constants['border_0']
    n_window = constants['n_window']
    
    constants['i0'] = params[0]
    constants['beta0'] = params[1]
    for i in range(1,m+1):
        
        constants['beta{}'.format(i)] = params[i+1]
        
        ti = n-1 - (m-i) * n_window
        ti1 = params[i+1+m] * (ti-ti1-n_window) + ti1 + n_window
        constants['t{}'.format(i)]   = ti1
        #print (params[3], constants['t1'])
        

#constants = {'segments':2, 'n':10, 'border_0':3, 'n_window':1}
#params = [0,0, 0,0, 0.5,0.5]   #[i0,bet0,..betam,t1,..tm]
#update_model(params, constants)
#print(constants)

def model_SEIRF_pieces_d(x, params, constants):
    
    i0 = params[0]
    beta0 = params[2]
    model = init_SEIRF(i0, beta0, constants)
    update_model(params, model)
    
    y = SEIRF(x, model)    
    return y[:,cF]
    

def model_SEIRF_pieces_p(x, params, constants):
    
    p = constants.copy()
    p['t0']             = params[0]
    p['detection_rate'] = params[1]
    p['gamma_pos']      = params[2]
    p['p0']             = params[3]
    
    y = SEIRF(x, p)
    return y[:,cP]

def format_calib_pieces(state, model, params, predict, beta_t):
    
    r = {}
    r['state']    = state
    r['model']    = model

    r['R0_0'] = params['beta0']/params['gamma_infec']
    
    m = params['segments']
    for i in range(1,m+1):
        r['t{}'.format(i)] = params['t{}'.format(i)]
        r['R0_{}'.format(i)] = params['beta{}'.format(i)] / params['gamma_infec']

    r['mixing'] = params['mixing'] 

    r['gamma_incub'] = 1/params['gamma_incub'] 
    r['gamma_infec'] = 1/params['gamma_infec']
    r['gamma_pos']   = 1/params['gamma_pos'] 
    r['gamma_crit']  = 1/params['gamma_crit'] 

    r['death_rate']     = params['death_rate']
    r['detection_rate'] = params['detection_rate']
    
    r['e0'] = params['e0'] 
    r['i0'] = params['i0']
    r['p0'] = params['p0']
    r['f0'] = params['f0']
    r['c0'] = params['c0'] 
    r['t0'] = params['t0'] 
    
    r['predict'] = predict
    r['beta(t)'] =  beta_t
    
    return r.copy()

#--------------------------------------
def calibrate_SEIRF_pieces(d, num_segments, n_window, border_0, constants):
    
    rpt=[]

    constants['population'] = d.population
    constants['p0']         = d.positives[d.minD]
    constants['f0']         = d.fatalities[d.minD]       
    constants['segments']   = num_segments
    constants['n']          = d.x[-1] 
    constants['border_0']   = border_0
    constants['n_window']   = n_window

    gamma_infec = constants['gamma_infec']
    
    #[i0,beta0,..,beta(m),t1...t(m)]
    guess = [10, 2*gamma_infec] 
    bounds_l = [0.1, 0.01*gamma_infec]
    bounds_h = [100e3, 15*gamma_infec]
    
    m = constants['segments']
    for i in range(1,m+1): #beta(i)
        guess.append(2*gamma_infec)
        bounds_l.append(0.01*gamma_infec)
        bounds_h.append(15*gamma_infec)

    for i in range(1,m+1): #t(i)
        guess.append(1)     
        bounds_l.append(0)
        bounds_h.append(1)
        
    
    def fit_SEIRF_fatalities(xdata, *params):
        i0 = params[0]
        beta0 = params[1]
        m = init_SEIRF(i0, beta0, constants)  #closure to access 'constants' when this is called by curve_fit()
        update_model(params, m)
        y = SEIRF(xdata, m)    
        return y[:,cF]

    popt, pcov = curve_fit(fit_SEIRF_fatalities, d.x[d.minD:].astype(np.float), d.fatalities[d.minD:], p0=guess, bounds=(bounds_l,bounds_h))

    #calculate the positives parameters
    model = init_SEIRF(popt[0], popt[1], constants)
    update_model(popt, model)

    def fit_SEIRF_positives(xdata, *params):
        m = model.copy()  #closure to access current 'model' params when this is called by curve_fit
        m['detection_rate'] = params[0]
        y = SEIRF(xdata, m)
        return y[:,cP]
    
    popt, pcov = curve_fit(fit_SEIRF_positives, d.x[d.minD:], d.positives[d.minD:], p0=[3e-2], bounds=((1e-2),(15e-2)))

    model['detection_rate'] = popt[0]

    y = SEIRF(d.x[d.minD:], model)
    return model, y


#--------------------------------------
alpha = 0.1

source          = 'Johns Hopkins'
region          = 'US'
state           = 'New York'
cutoff_positive = 1
cutoff_death    = 1

constants = {'gamma_incub'   : 1/4, 
             'gamma_infec'   : 1/7, 
             'gamma_pos'     : 1/14, 
             'gamma_crit'    : 1/14, 
             'death_rate'    : 0.5e-2, 
             'testing_segments' : 0,
             'detection_rate': 3e-2,            
             'mixing'        : 1,
             'interv'        : 'piecewise linear',
             'init_beta'      : ''}   #'' or 'const'
   
  
    
d = Data(source=source, region=region, state=state, county="", cutoff_positive=cutoff_positive, cutoff_death=cutoff_death, truncate=0)
print(d.region, '-', d.state)

study(source=source, region=region, state=state, cutoff_positive=1, cutoff_death=1, init_window=14, recent_window=14, truncate=0)
study(source=source, region=region, state=state, cutoff_positive=1, cutoff_death=1, init_window=14, recent_window=14, truncate=28+d.minD)  

n = len(d.fatalities)
print('n:{} minD:{}, minP:{}'.format(n,d.minD, d.minP))


#--------------------------------
rpt = []
#--------------------------------

#calibrate on first 2 weeks after minD
d1 = Data(source=source, region=region, state=state, county="", cutoff_positive=cutoff_positive, cutoff_death=cutoff_death, truncate=d.minD+14)
p1, y1 = calibrate_SEIRF_pieces(d1, num_segments=0, border_0=d1.minD, n_window=1, constants=constants.copy())
y1 = SEIRF(d.x[d.minD:], p1)
b1 = piecewiselin(d.x[d.minD:], p1)
print(p1)
print(p1['beta0']/p1['gamma_infec'])

#--------------------------------
s = [1,2]
for segments in s:
    #p, y = calibrate_SEIRF_pieces(d, num_segments=segments, border_0 = d.minD, n_window=7, interv='piecewise constant')
    #b = piecewiseconst(d.x[d.minD:], p)
    #rpt.append(format_calib_pieces(d.state, 'constant segments:{}'.format(segments+1), p, y, b))

    p, y = calibrate_SEIRF_pieces(d, num_segments=segments, border_0=d.minD, n_window=7, constants=constants.copy())
    b = piecewiselin(d.x[d.minD:], p)
    rpt.append(format_calib_pieces(d.state, 'linear segments:{}'.format(segments+1), p, y, b))

#--------------------------------
#Report
prpt = pd.DataFrame(rpt)
formatters={'death_rate': '{:.1%}'.format, 'detection_rate': '{:.1%}'.format}
display(HTML(prpt.drop(columns=['predict','beta(t)']).to_html(index=False, formatters=formatters, float_format='{:,.2f}'.format)))


#--------------------------------
fig, axs = plt.subplots(2,3,figsize=(18,12))

axs[0][0].set_title('{} {} - cumul counts - log scale'.format(d.region, d.state))
axs[0][0].plot(d.x, d.fatalities,'r+:', label='fatalities')
axs[0][0].plot(d.x, d.positives,'b+:', label='positives')

axs[0][1].set_title('{} {} - fatalities - linear scale'.format(d.region, d.state))
axs[0][1].plot(d.x, d.fatalities,'r+:', label='fatalities')

axs[0][2].set_title('{} {} - positives - linear scale'.format(d.region, d.state))
axs[0][2].plot(d.x, d.positives,'b+:', label='positives')

axs[1][0].set_title('{} {} - daily counts - log scale'.format(d.region, d.state))
axs[1][0].plot(d.x[d.minD+1:], d.dfatalities,'r+:', label='fatalities')
axs[1][0].plot(d.x[d.minP+1:], d.dpositives,'b+:', label='positives')

axs[1][1].set_title('{} {} - daily fatalities - linear scale'.format(d.region, d.state))
axs[1][1].plot(d.x[d.minD+1:], d.dfatalities,'r+:', label='fatalities')

axs[1][2].set_title('{} {} - daily positives - linear scale'.format(d.region, d.state))
axs[1][2].plot(d.x[d.minP+1:], d.dpositives,'b+:', label='positives')

#calibration on 14 days
axs[0][0].plot(d.x[d.minD:], y1[:,cF],'r-', label='init')
axs[0][1].plot(d.x[d.minD:d.minD+14], y1[:14,cF],'r-', label='init')
axs[1][0].plot(d.x[d.minD+1:], np.diff(y1[:,cF]),'r-', label='init')
axs[1][1].plot(d.x[d.minD+1:d.minD+14], np.diff(y1[:14,cF]),'r-', label='init')

colors = plt.cm.jet(np.linspace(0,1,len(rpt)))
for idx, r in enumerate(rpt):
    
    y = r['predict']
    
    axs[0][0].plot(d.x[d.minD:], y[:,cF],'-', color=colors[idx], label=r['model'])
    axs[0][0].plot(d.x[d.minD:], y[:,cP],'-', color=colors[idx], )

    axs[0][1].plot(d.x[d.minD:], y[:,cF],'-', color=colors[idx], label=r['model'])
    axs[0][2].plot(d.x[d.minD:], y[:,cP],'-', color=colors[idx], label=r['model'])

    axs[1][0].plot(d.x[d.minD+1:], np.diff(y[:,cF]),'-', color=colors[idx], label=r['model'])
    axs[1][0].plot(d.x[d.minD+1:], np.diff(y[:,cP]),'-', color=colors[idx])
    axs[1][1].plot(d.x[d.minD+1:], np.diff(y[:,cF]),'-', color=colors[idx], label=r['model'])
    axs[1][2].plot(d.x[d.minD+1:], np.diff(y[:,cP]),'-', color=colors[idx], label=r['model'])

for ax in axs:
    ax[0].set_yscale('log')
    for a in ax:
        a.legend()
        a.grid()
plt.show()


#--------------------------------
fig, axs = plt.subplots(3,2,figsize=(12,12))
axs[0][0].set_title('{} {} - relative fatalities errors counts'.format(d.region, d.state))
axs[0][1].set_title('{} {} - relative positives errors counts'.format(d.region, d.state))
axs[1][0].set_title('{} {} - absolute fatalities errors counts'.format(d.region, d.state))
axs[1][1].set_title('{} {} - absolute positives errors counts'.format(d.region, d.state))

axs[2][0].set_title('{} {}- beta'.format(d.region, d.state))
axs[2][1].set_title('{} {}- beta'.format(d.region, d.state))


axs[0][0].plot(d.x[d.minD:d.minD+14], -(y1[:14,cF] - d.fatalities[d.minD:d.minD+14])/d.fatalities[d.minD:d.minD+14],'r:', label='init')
axs[0][1].plot(d.x[d.minD:d.minD+14], -(y1[:14,cP] - d.positives[d.minD:d.minD+14])/d.positives[d.minD:d.minD+14],'r:', label='init')
axs[1][0].plot(d.x[d.minD:d.minD+14], -(y1[:14,cF] - d.fatalities[d.minD:d.minD+14]),'r:', label='init')
axs[1][1].plot(d.x[d.minD:d.minD+14], -(y1[:14,cP] - d.positives[d.minD:d.minD+14]),'r:', label='init')
    
axs[2][0].plot(d.x[d.minD:d.minD+14], b1[:14],'r:', label='init')
axs[2][1].plot(d.x[d.minD:d.minD+14], b1[:14],'r:', label='init')

for idx, r in enumerate(rpt):
    
    y = r['predict']
    b = r['beta(t)']
    
    axs[0][0].plot(d.x[d.minD:], -(y[:,cF] - d.fatalities[d.minD:])/d.fatalities[d.minD:],'+-', color=colors[idx], label=r['model'])
    axs[0][1].plot(d.x[d.minD:], -(y[:,cP] - d.positives[d.minD:])/d.positives[d.minD:],'+-', color=colors[idx], label=r['model'])
    axs[1][0].plot(d.x[d.minD:], -(y[:,cF] - d.fatalities[d.minD:]),'+-', color=colors[idx], label=r['model'])
    axs[1][1].plot(d.x[d.minD:], -(y[:,cP] - d.positives[d.minD:]),'+-', color=colors[idx], label=r['model'])
    
    axs[2][0].plot(d.x[d.minD:], b,'-', color=colors[idx], label=r['model'])
    axs[2][1].plot(d.x[d.minD:], b,'-', color=colors[idx], label=r['model'])


for ax in axs:
    for a in ax:
        a.legend()
        a.grid()
#axs[1][0].set_yscale('symlog',linscale=10)
#axs[1][1].set_yscale('symlog',linscale=10)
plt.show()


In [None]:
#---------------------------------------------------------------
#LMFIT Parameters structure does not support string values; these get passed in a separate dict
#this function brings all of the SEIRF params into a single dict
#---------------------------------------------------------------
def merge_params(params, constants):
    p = params.valuesdict()  #params should be a LMFIT Parameters instance; constants should be a dict
    for i, (k,v) in enumerate(constants.items()):
        if k not in p:  #do not override values that may already be in the Parameters array
            p[k]=v
    return p


#---------------------------------------------------------------
#function called by LMFIT Minimizer
#the ret parameter is used to select a column from the SEIRF result array; all columns are returned if ret==0
#if data=None, the function returns the values from SEIRF, otherwise it compares the results of SEIRF to the given data and returns the residuals (this is used by LMFIT)
#params should be a LMFIT Parameters instance; constance a dictionary
#---------------------------------------------------------------
def lmfit_SEIRF_both(params, x, constants, data=None):
    
    parvals = merge_params(params, constants)
        
    i0 = parvals['i0']
    beta0 = parvals['beta0']
    model = init_SEIRF(i0, beta0, parvals)
    
    y = SEIRF(x, model)

    
    scaleP = parvals['scaleP']
    
    if data is None:
        return y
    else:
        res = np.append(y[:,cF], scaleP * y[:,cP])  #calibrate on fatalities and positives at the same time; scale positives to not give too much weight to their larger numbers
        res = res - data
        
        #check for NaN and print debug info
        if ~np.isfinite(res).all():
            print('#######error')
            print(parvals)
            print(y[:,cF])

        #res(isinf(res)|isnan(res)) = 100e6
        
        return res
        
        
#---------------------------------------------------------------
#function called by LMFIT Minimizer
#the ret parameter is used to select a column from the SEIRF result array; all columns are returned if ret==0
#if data=None, the function returns the values from SEIRF, otherwise it compares the results of SEIRF to the given data and returns the residuals (this is used by LMFIT)
#params should be a LMFIT Parameters instance; constance a dictionary
#---------------------------------------------------------------
def lmfit_SEIRF(params, x, constants, data=None, ret=0):
    
    parvals = merge_params(params, constants)
        
    i0 = parvals['i0']
    beta0 = parvals['beta0']
    model = init_SEIRF(i0, beta0, parvals)
    
    y = SEIRF(x, model) 
    
    if ret==0:
        return y
    else:
        if data is None:
            return y[:,ret] #ret should be cF, cP, etc... to get
        else:
            return y[:,ret]-data


#---------------------------------------------------------------
#
#---------------------------------------------------------------
def calibrate_positives(d, constants, startx):
    
    params = Parameters()
    params.add_many( 
        ('testing_segments', 0,    False),
        ('gamma_pos',        1/14, False),    
        ('detection_rate',   3e-2, True, 1e-2, 30e-2),
    )
    
    #calibrate using confirmed positives data
    fitter = Minimizer(lmfit_SEIRF, params, fcn_args=(d.x[startx:], constants, d.positives[startx:], cP))
    result = fitter.minimize()

    #result.params.pretty_print()
    #report_fit(result)

    y = lmfit_SEIRF(result.params, d.x[startx:], constants)
    
    p = merge_params(result.params,constants)

    b = contact_rate(d.x[startx:], p)
    
    return p, y, b


#---------------------------------------------------------------
#
#---------------------------------------------------------------
def calibrate_fatalities(d, constants, startx):
    
    params = Parameters()
    params.add_many( 
        ('death_rate',   0.5e-2, True, 0.1e-2, 5e-2),
    )
    
    #calibrate using confirmed positives data
    fitter = Minimizer(lmfit_SEIRF, params, fcn_args=(d.x[startx:], constants, d.fatalities[startx:], cF))
    result = fitter.minimize()

    #result.params.pretty_print()
    #report_fit(result)

    y = lmfit_SEIRF(result.params, d.x[startx:], constants)
    
    p = merge_params(result.params,constants)

    b = contact_rate(d.x[startx:], p)
    
    return p, y, b

#---------------------------------------------------------------
#Piece-wise linear contact rate with a single varying time point
#---------------------------------------------------------------
def report_calib(label, d, p):
    
    print('=====================')
    print(label, ': ', d.region, ' ', d.state)
    print('=====================')

    print("Population:\t{:,.0f}".format(d.population))
    print("P0:\t\t{:,.0f}".format(p['p0']))
    print("F0:\t\t{:,.0f}".format(p['f0']))
    print("I0:\t\t{:,.0f}".format(p['i0']))

    print("Incub:\t\t{:.1f}".format(1/p['gamma_incub']))
    print("Infec:\t\t{:.1f}".format(1/p['gamma_infec']))
    print("Testing:\t{:.1f}".format(1/p['gamma_pos']))
    print("Crit:\t\t{:.1f}".format(1/p['gamma_crit']))

    print("R0\t\t{:.1f}".format(p['beta0']/p['gamma_infec']))
    for i in range(1, p['segments']):
        if 'beta{}'.format(i) in p:
            print("{:.0f}\t\t{:.1f}".format(p['t{}'.format(i)], p['beta{}'.format(i)]/p['gamma_infec']))
        
    print("IFR:\t\t{:.2%}".format(p['death_rate']))
    
    print("Detect:\t\t{:.2%}".format(p['detection_rate']))
    for i in range(1, p['testing_segments']):
        print("{:.0f}\t\t{:.1f}".format(p['testing_time{}'.format(i)], p['detection_rate{}'.format(i)]))
    

#---------------------------------------------------------------
#Piece-wise linear contact rate with a single varying time point
#---------------------------------------------------------------
def calibrate1(d, label, border0, border1, startx):
    
    tmin = startx
    tmax = d.x[-1]

    params = Parameters()
    #(name, value, vary, min, max, expr)
    params.add_many( 
                     ('gamma_incub',    1/4, False),
                     ('gamma_infec',    1/7, False),
                     ('gamma_pos',      1/14, False),
                     ('gamma_crit',     1/14, False),
                     ('death_rate',     0.5e-2, False),
                     ('detection_rate', 3e-2, False, 1e-2, 30e-2),
                     ('mixing',         1, False),

                     ('testing_segments', 0, False),
        
                     ('population',     d.population, False),
                     ('f0',             d.fatalities[startx], False),
                     ('p0',             d.positives[startx], False),

                     ('seed_init',      10        , True, 0, 100),#number of exposed cases seeded every day
                     ('seed_halflife',  10        , True, 1, 100),#days to halve the number of daily seeded exposed cases

                     ('segments',       1         , False),

                     ('i0',             10        , True, 1e-3, 1e6),
                     ('beta0',          2/7       , True, 0.01/7, 15/7),                 
                     ('beta1',          0.7/7     , True, 0.01/7, 15/7),
                     ('t1',             tmin+border0      , True, tmin+border0, tmax-border1),
                   )

    #lmfit Parameters cannot accept string values so they get passed in a separate argument
    constants = { 
                  'interv' : 'piecewise linear',
                  'init_beta':'',
                  'seed': False   
                }

    fitter = Minimizer(lmfit_SEIRF, params, fcn_args=(d.x[startx:],constants, d.fatalities[startx:],cF))
    result = fitter.minimize()
    #result = minimize(residual, params, args=(x, y0, None))

    #result.params.pretty_print()
    #report_fit(result)

    p = merge_params(result.params, constants)
    
    p, y, b = calibrate_positives(d, p, startx)

    report_calib(label, d, p)
    
    #y = lmfit_SEIRF(result.params, d.x[startx:], constants)
    #p = merge_params(result.params,constants)
    #b = contact_rate(d.x[startx:], p)

    return {'label':label, 'p':p, 'y':y, 'b':b, 'startx':startx}


#---------------------------------------------------------------
#Smooth step function 
#---------------------------------------------------------------
def calibrate2(d, label, border0, border1, window, startx):
    tmin = startx
    tmax = d.x[-1]
    
    params = Parameters()
    #(name, value, vary, min, max, expr)
    params.add_many( 
                     ('gamma_incub',    1/4, False),
                     ('gamma_infec',    1/7, False),
                     ('gamma_pos',      1/14, False),
                     ('gamma_crit',     1/14, False),
                     ('death_rate',     0.5e-2, False),
                     ('detection_rate', 3e-2, False, 1e-2, 30e-2),
                     ('mixing',         1, False),

                     ('testing_segments', 0, False),

                     ('population',     d.population, False),
                     ('f0',             d.fatalities[startx], False),
                     ('p0',             d.positives[startx], False),

                     ('seed_init',      10        , True, 0, 100),#number of exposed cases seeded every day
                     ('seed_halflife',  10        , True, 1, 100),#days to halve the number of daily seeded exposed cases

                     ('segments',       2         , False),

                     ('i0',             10        , True, 1e-3, 1e6),
                     ('beta0',          2/7       , True, 0.01/7, 15/7),                 
                     ('beta2',          0.7/7     , True, 0.01/7, 15/7),
                     ('t1',             tmin      , True, tmin+border0, tmax-border1-window),
                     ('t2',             tmin      , True, tmin+border0+window, tmax-border1, 't1+{}'.format(window)),
                   )

    #lmfit Parameters cannot accept string values so they get passed in a separate argument
    constants = { 
                  'interv' : 'smooth step',
                  'init_beta':'',
                  'seed': False   
                }

    fitter = Minimizer(lmfit_SEIRF, params, fcn_args=(d.x[startx:],constants, d.fatalities[startx:],cF))
    result = fitter.minimize()

    #result.params.pretty_print()
    #report_fit(result)

    p = merge_params(result.params, constants)
    
    p, y, b = calibrate_positives(d, p, startx)

    report_calib(label, d, p)
    
    #y = lmfit_SEIRF(result.params, d.x[startx:], constants)
    #p = merge_params(result.params,constants)
    #b = contact_rate(d.x[startx:], p)

    return {'label':label, 'p':p, 'y':y, 'b':b, 'startx':startx}

 
#---------------------------------------------------
#Piece-wise linear contact rate on regular time grid 
#---------------------------------------------------
def calibrate3(d, label, window, startx):

    tmin = startx
    tmax = d.x[-1]

    windows = int((tmax-tmin) // window)  #number of windows
    extradays = (tmax-tmin) % window  #extra days will be added to the window at the middle of the data

    params = Parameters()
    #(name, value, vary, min, max, expr)
    params.add_many( 
                     ('gamma_incub',    1/4, False),
                     ('gamma_infec',    1/7, False),
                     ('gamma_pos',      1/14, False),
                     ('gamma_crit',     1/14, False),
                     ('death_rate',     0.5e-2, False),
                     ('detection_rate', 3e-2, False),
                     ('mixing',         1, False),


                     ('testing_segments', 0, False),

                     ('population',     d.population, False),
                     ('f0',             d.fatalities[startx], False),
                     ('p0',             d.positives[startx], False),

                     ('seed_init',      10        , False, 0, 100),#number of exposed cases seeded every day
                     ('seed_halflife',  10        , False, 1, 100),#days to halve the number of daily seeded exposed cases

                     ('segments',       windows-1         , False),

                     ('i0',             10        , True, 1e-3, 1e6),
                     ('beta0',          2/7       , True, 0.01/7, 15/7),                 
                   )

    ti = startx
    for i in range(1, windows):  #force last window to be constant (built-in property of the 'piecewise linear' function, it remains constant after the last ti)
        ti = ti + window + (extradays if i==windows//2 else 0)
        params.add('t{}'.format(i),value=ti,vary=False)
        params.add('beta{}'.format(i), value= 2/7, vary=True,  min=0.01/7, max=15/7)
        #print(ti)


    #lmfit Parameters cannot accept string values so they get passed in a separate argument
    constants = { 'interv':'piecewise linear',
                  'init_beta':'',  #first window constant
                  'seed': False   
                }

    fitter = Minimizer(lmfit_SEIRF, params, fcn_args=(d.x[startx:],constants, d.fatalities[startx:],cF))
    result = fitter.minimize()

    #result.params.pretty_print()
    #report_fit(result)
    
    p = merge_params(result.params, constants)
    
    p, y, b = calibrate_positives(d, p, startx)
    
    report_calib(label, d, p)
    
    #y = lmfit_SEIRF(result.params, d.x[startx:], constants)
    #p = merge_params(result.params,constants)
    #b = contact_rate(d.x[startx:], p)

    return {'label':label, 'p':p, 'y':y, 'b':b, 'startx':startx}

    

In [None]:
#---------------------------------------------------
#Piece-wise linear solves times and beta for a given number of segments 
#---------------------------------------------------
def calibrate4(d, label, segments, window, startx):

    tmin = startx
    tmax = d.x[-1]

    params = Parameters()
    #(name, value, vary, min, max, expr)
    params.add_many( 
                     ('gamma_incub',    1/4, False),
                     ('gamma_infec',    1/7, False),
                     ('gamma_pos',      1/14, False),
                     ('gamma_crit',     1/14, False),
                     ('death_rate',     0.5e-2, False),
                     ('detection_rate', 3e-2, False),
                     ('mixing',         1, False),

                     ('testing_segments', 0, False),

                     ('population',     d.population, False),
                     ('f0',             d.fatalities[startx], False),
                     ('p0',             d.positives[startx], False),

                     ('seed_init',      10        , False, 0, 100),#number of exposed cases seeded every day
                     ('seed_halflife',  10        , False, 1, 100),#days to halve the number of daily seeded exposed cases

                     ('segments',       segments         , False),

                     ('i0',             10        , True, 1e-3, 1e6),
                     ('beta0',          2/7       , True, 0.01/7, 15/7),                 
                   )

    params.add('t0',value=tmin, vary=False)
    for i in range(1, segments+1):  

        ti   = tmax - (segments-i) * window
        ti1  = 't{}'.format(i-1)
        auxi = 'aux{}'.format(i-1)
        
        params.add(auxi,value=0.1, vary=True, min=0, max=1)
        params.add('t{}'.format(i), vary=True,expr="{auxi}*({ti}-{ti1}-{window})+{ti1}+{window}".format(auxi=auxi, ti=ti, ti1=ti1, window=window))

        params.add('beta{}'.format(i), value= 2/7, vary=True,  min=0.01/7, max=15/7)
        #print(ti)


    #lmfit Parameters cannot accept string values so they get passed in a separate argument
    constants = { 'interv':'piecewise linear',
                  'init_beta':'', 
                  'seed': False   
                }

    fitter = Minimizer(lmfit_SEIRF, params, fcn_args=(d.x[startx:],constants, d.fatalities[startx:],cF))
    result = fitter.minimize()

    #result.params.pretty_print()
    #report_fit(result)
    
    p = merge_params(result.params, constants)
    
    p, y, b = calibrate_positives(d, p, startx)
    
    report_calib(label, d, p)
    
    #y = lmfit_SEIRF(result.params, d.x[startx:], constants)
    #p = merge_params(result.params,constants)
    #b = contact_rate(d.x[startx:], p)

    return {'label':label, 'p':p, 'y':y, 'b':b, 'startx':startx}



In [None]:
#---------------------------------------------------
#Piece-wise linear solves times and beta for a given number of segments 
#---------------------------------------------------
def calibrate4_pos(d, label, segments, window, startx):

    tmin = startx
    tmax = d.x[-1]

    params = Parameters()
    #(name, value, vary, min, max, expr)
    params.add_many( 
                     ('gamma_incub',    1/4, False),
                     ('gamma_infec',    1/7, False),
                     ('gamma_pos',      1/14, False),
                     ('gamma_crit',     1/14, False),
                     ('death_rate',     0.5e-2, False),
                     ('detection_rate', 5e-2, False),
                     ('mixing',         1, False),

                     ('testing_segments', 0, False),

                     ('population',     d.population, False),
                     ('f0',             d.fatalities[startx], False),
                     ('p0',             d.positives[startx], False),

                     ('seed_init',      10        , False, 0, 100),#number of exposed cases seeded every day
                     ('seed_halflife',  10        , False, 1, 100),#days to halve the number of daily seeded exposed cases

                     ('segments',       segments         , False),

                     ('i0',             10        , True, 1e-3, 1e6),
                     ('beta0',          2/7       , True, 0.01/7, 15/7),                 
                   )

    params.add('t0',value=tmin, vary=False)
    for i in range(1, segments+1):  

        ti   = tmax - (segments-i) * window
        ti1  = 't{}'.format(i-1)
        auxi = 'aux{}'.format(i-1)
        
        params.add(auxi,value=0.1, vary=True, min=0, max=1)
        params.add('t{}'.format(i), vary=True,expr="{auxi}*({ti}-{ti1}-{window})+{ti1}+{window}".format(auxi=auxi, ti=ti, ti1=ti1, window=window))

        params.add('beta{}'.format(i), value= 2/7, vary=True,  min=0.01/7, max=15/7)
        #print(ti)


    #lmfit Parameters cannot accept string values so they get passed in a separate argument
    constants = { 'interv':'piecewise linear',
                  'init_beta':'', 
                  'seed': False   
                }

    fitter = Minimizer(lmfit_SEIRF, params, fcn_args=(d.x[startx:],constants, d.positives[startx:],cP))
    result = fitter.minimize()

    #result.params.pretty_print()
    #report_fit(result)
    
    p = merge_params(result.params, constants)
    
    p, y, b = calibrate_fatalities(d, p, startx)
    
    report_calib(label, d, p)
    
    #y = lmfit_SEIRF(result.params, d.x[startx:], constants)
    #p = merge_params(result.params,constants)
    #b = contact_rate(d.x[startx:], p)

    return {'label':label, 'p':p, 'y':y, 'b':b, 'startx':startx}

In [None]:

#---------------------------------------------------
#Brute force timing for multiple segments 
#---------------------------------------------------
def calibrate5(d, label, segments, border0, border1, window, step, startx):

    tmin = startx
    tmax = d.x[-1]

    #lmfit Parameters cannot accept string values so they get passed in a separate argument
    constants = { 'interv':'piecewise constant',
                  'init_beta':'', 
                  'seed': False   
                }
    
    params = Parameters()
    #(name, value, vary, min, max, expr)
    params.add_many( 
                     ('gamma_incub',    1/4, False),
                     ('gamma_infec',    1/7, False),
                     ('gamma_pos',      1/14, False),
                     ('gamma_crit',     1/14, False),
                     ('death_rate',     0.5e-2, False),
                     ('detection_rate', 3e-2, False),
                     ('mixing',         1, False),

                     ('testing_segments', 0, False),

                     ('population',     d.population, False),
                     ('f0',             d.fatalities[startx], False),
                     ('p0',             d.positives[startx], False),

                     ('seed_init',      10        , False, 0, 100),#number of exposed cases seeded every day
                     ('seed_halflife',  10        , False, 1, 100),#days to halve the number of daily seeded exposed cases

                     ('segments',       segments         , False),

                     ('i0',             10        , True, 1e-3, 1e6),
                     ('beta0',          2/7       , True, 0.01/7, 15/7),                 
                   )

    #helper structures to iterate through all possible segments
    #boundaries is a dictionary of the segments t1, t2, ... tn 
    boundaries = {}
    for i in range(1,segments+1):
        ti = 't{}'.format(i)
        boundaries[ti]=border0 + (i-1)*window
        
        params.add(ti, value=boundaries[ti], vary=False)
        params.add('beta{}'.format(i), value= 2/7, vary=True,  min=0.01/7, max=15/7)
        
        
    def reset_val(c,i): #set ti (and all tj for j>i) to their minimum value (given ti-1): tj=tj-1+window for j>=i
        if i<=segments:
            c['t{}'.format(i)] = c['t{}'.format(i-1)] + window
            reset_val(c,i+1)

    def next_val(c, i): #calculate the next value
        c['t{}'.format(i)] = c['t{}'.format(i)] + step
        reset_val(c, i+1)
        if i>1 and c['t{}'.format(segments)] > tmax - border1:
            next_val(c, i-1)

    #iterate through all possible values of t1,...tn and find the optimal contact rates for each configuration; return the minmum solution
    is_first=True
    rpt=[]
    while True:
        #print('--------------')
        #print(constants)
        
        try:

            #set the boundary values
            for idx,(k,v) in enumerate(boundaries.items()):
                params['t{}'.format(i)].set(value=v)
            
            #print(boundaries)
            
            #find the optimal beta for these boundary values
            fitter = Minimizer(lmfit_SEIRF, params, fcn_args=(d.x[startx:],constants, d.fatalities[startx:],cF))
            result = fitter.minimize()
            mle = result.redchi

            if (is_first) or (mle <= min_mle):
                min_params = result.params.copy()
                min_mle = mle
                is_first=False

            rpt.append({'boundaries':boundaries, 'mle':mle})
        except:
            print('######### error', boundaries)
            
        if segments==0:
            break          
        next_val(boundaries, segments)
        if boundaries['t{}'.format(segments)] > tmax - border1:
            break


    fitter = Minimizer(lmfit_SEIRF, min_params, fcn_args=(d.x[startx:], constants, d.fatalities[startx:],cF))
    result = fitter.minimize()

    #result.params.pretty_print()
    #report_fit(result)
    
    p = merge_params(result.params, constants)
    
    p, y, b = calibrate_positives(d, p, startx)
    
    report_calib(label, d, p)
    
    #y = lmfit_SEIRF(result.params, d.x[startx:], constants)
    #p = merge_params(result.params,constants)
    #b = contact_rate(d.x[startx:], p)

    return {'label':label, 'p':p, 'y':y, 'b':b, 'startx':startx}

In [None]:
#---------------------------------------------------------------
#Smooth step function solving on fatalities and positives
#---------------------------------------------------------------
def calibrate6(d, label, border0, border1, window, startx):
    tmin = startx
    tmax = d.x[-1]
    
    params = Parameters()
    #(name, value, vary, min, max, expr)
    params.add_many( 
                     ('gamma_incub',    1/4,                     False),
                     ('gamma_infec',    1/7,                     False),
                     ('gamma_pos',      1/7,                     True, 1/100, 1/2),
                     ('gamma_crit',     1/21,                    True, 1/42, 1/2),
                     ('death_rate',     0.5e-2,                  False, 0.01e-2, 10e-2),

                     ('mixing',         1,                       False),

                     ('testing_segments',       0,               False),
                     ('testing_time1',          (tmin+tmax)/2,   False, tmin+21, tmax-21),
                     ('testing_time2',          tmax,            False),
                     ('detection_rate',         3e-2,            True, 1e-2, 20e-2),
                     ('detection_rate1',        3e-2,            True, 1e-2, 20e-2),    
                     ('detection_rate2',        3e-2,            True, 1e-2, 20e-2),   
        
                     ('population',     d.population,            False),
                     ('f0',             d.fatalities[startx],    False, 0, max(10,10*d.fatalities[startx])),
                     ('p0',             d.positives[startx],     False, 0, max(10,10*d.positives[startx])),

                     ('seed_init',      10,                      False, 0, 100),#number of exposed cases seeded every day
                     ('seed_halflife',  10,                      False, 1, 100),#days to halve the number of daily seeded exposed cases

                     ('segments',       2,                       False),

                     ('i0',             10,                      True, 1e-3, 1e6),
                     ('beta0',          2/7,                     True, 0.01/7, 15/7),                 
                     ('beta2',          0.7/7,                   True, 0.01/7, 15/7),
                     ('t1',             tmin,                    True, tmin+border0, tmax-border1-window),
                     ('t2',             tmin,                    True, tmin+border0+window, tmax-border1, 't1+{}'.format(window)),
                   )

    scaleP = d.fatalities.max()/d.positives.max()
    data = np.append(d.fatalities[startx:], scaleP * d.positives[startx:])  #calibrate on fatalities and positives at the same time; scale positives to not give too much weight to their larger numbers
    #data = np.nan_to_num(data)
    
    #lmfit Parameters cannot accept string values so they get passed in a separate argument
    constants = { 
                  'interv' : 'smooth step',
                  'init_beta':'',
                  'seed': False,
                  'scaleP':scaleP
                }
   
    fitter = Minimizer(lmfit_SEIRF_both, params, fcn_args=(d.x[startx:],constants, data))
    result = fitter.minimize()

    #result.params.pretty_print()
    #report_fit(result)
    
    y = lmfit_SEIRF(result.params, d.x[startx:], constants)

    p = merge_params(result.params,constants)
    
    b = contact_rate(d.x[startx:], p)

    report_calib(label, d, p)
    
    return {'label':label, 'p':p, 'y':y, 'b':b, 'startx':startx}

    

In [None]:
#---------------------------------------------------
#Piece-wise linear solves times and beta for a given number of segments 
#---------------------------------------------------
def calibrate7(d, label, segments, window, startx):

    tmin = startx
    tmax = d.x[-1]

    params = Parameters()
    #(name, value, vary, min, max, expr)
    params.add_many( 
                     ('gamma_incub',    1/4, False),
                     ('gamma_infec',    1/7, False),
                     ('gamma_pos',      1/21, False, 1/100, 1/2),   #1/14   #6, 1/14
                     ('gamma_crit',     1/28, False, 1/42, 1/2),    #1/14   #6, 1/21
                     ('death_rate',     0.5e-2, False),
                     ('mixing',         1, False),

                     ('testing_segments', 0, False),
                     ('detection_rate',         3e-2,            True, 1e-2, 20e-2),

                     ('population',     d.population, False),
                     ('f0',             d.fatalities[startx], False),
                     ('p0',             d.positives[startx], False),

                     ('seed_init',      10        , False, 0, 100),#number of exposed cases seeded every day
                     ('seed_halflife',  10        , False, 1, 100),#days to halve the number of daily seeded exposed cases

                     ('segments',       segments         , False),

                     ('i0',             10        , True, 1e-3, 1e6),
                     ('beta0',          2/7       , True, 0.01/7, 15/7),                 
                   )

    params.add('t0',value=tmin, vary=False)
    for i in range(1, segments+1):  

        ti   = tmax - (segments-i) * window
        ti1  = 't{}'.format(i-1)
        auxi = 'aux{}'.format(i-1)
        
        params.add(auxi,value=0.1, vary=True, min=0, max=1)
        params.add('t{}'.format(i), vary=True,expr="{auxi}*({ti}-{ti1}-{window})+{ti1}+{window}".format(auxi=auxi, ti=ti, ti1=ti1, window=window))

        params.add('beta{}'.format(i), value= 2/7, vary=True,  min=0.01/7, max=15/7)
        #print(ti)

    scaleP = d.fatalities.max()/d.positives.max()
    data = np.append(d.fatalities[startx:], scaleP * d.positives[startx:])  #calibrate on fatalities and positives at the same time; scale positives to not give too much weight to their larger numbers
    #data = np.nan_to_num(data)
    
    #lmfit Parameters cannot accept string values so they get passed in a separate argument
    constants = { 
                  'interv' : 'piecewise linear',
                  'init_beta':'',
                  'seed': False,
                  'scaleP':scaleP
                }
   
    fitter = Minimizer(lmfit_SEIRF_both, params, fcn_args=(d.x[startx:],constants, data))
    result = fitter.minimize()

    #result.params.pretty_print()
    #report_fit(result)
    
    y = lmfit_SEIRF(result.params, d.x[startx:], constants)
    p = merge_params(result.params,constants)    
    b = contact_rate(d.x[startx:], p)

    report_calib(label, d, p)
    
    return {'label':label, 'p':p, 'y':y, 'b':b, 'startx':startx}



In [None]:
rpt=[]

#rpt.append(calibrate1(d, label='linear minD', border0=7, border1=7, startx=d.minD))
rpt.append(calibrate1(d, label='linear minP', border0=7, border1=7, startx=d.minP))

#rpt.append(calibrate2(d, label='step minD', border0=7, border1=0, window=7, startx=d.minD))
rpt.append(calibrate2(d, label='step minP', border0=7, border1=0, window=7, startx=d.minP))

#rpt.append(calibrate3(d, label='grid minD', window=7, startx=d.minD))
rpt.append(calibrate3(d, label='grid minP', window=7, startx=d.minP))

#rpt.append(calibrate4(d, label='segments minD', segments=4, window=7, startx=d.minD))
rpt.append(calibrate4(d, label='segments fatalities', segments=5, window=7, startx=d.minP))

rpt.append(calibrate4_pos(d, label='segments positives', segments=5, window=7, startx=d.minP))

#rpt.append(calibrate5(d, label='brute minD', segments=2, border0=0, border1=7, window=7, step=7, startx=d.minD))
rpt.append(calibrate5(d, label='brute minP', segments=2, border0=0, border1=7, window=7, step=7, startx=d.minP))

#rpt.append(calibrate6(d, label='step minD', border0=0, border1=0, window=7, startx=d.minD))
rpt.append(calibrate6(d, label='step minP', border0=0, border1=0, window=7, startx=d.minP))

rpt.append(calibrate7(d, label='segments both', segments = 5, window=7, startx=d.minP))

fig,axs = plt.subplots(7,2,figsize=(24,42))

axs[0][0].plot(d.xd, d.fatalities, '+:')
axs[1][0].plot(d.xd, d.fatalities, '+:')
axs[0][1].plot(d.xd[d.minD+1:], d.dfatalities, '+:')
axs[1][1].plot(d.xd[d.minD+1:], d.dfatalities, '+:')

axs[3][0].plot(d.xd, d.fatalities, '+:')
axs[3][0].plot(d.xd, d.positives, '+:')
axs[4][0].plot(d.xd, d.positives, '+:')
axs[3][1].plot(d.xd[d.minD+1:], d.dfatalities, '+:')
axs[3][1].plot(d.xd[d.minP+1:], d.dpositives, '+:')
axs[4][1].plot(d.xd[d.minP+1:], d.dpositives, '+:')

for r in rpt:
    label = r['label']
    p = r['p']
    y = r['y']
    b = r['b']
    startx = r['startx']
    
    axs[0][0].plot(d.xd[startx:], y[:,cF], '-', label=label)
    axs[1][0].plot(d.xd[startx:], y[:,cF], '-', label=label)
    axs[0][1].plot(d.xd[startx+1:], np.diff(y[:,cF]), '-', label=label)
    axs[1][1].plot(d.xd[startx+1:], np.diff(y[:,cF]), '-', label=label)

    axs[2][0].plot(d.xd, np.zeros_like(d.x), '-')
    axs[2][0].plot(d.xd[startx:], d.fatalities[startx:] - y[:,cF], ':', label=label)
    axs[2][1].plot(d.xd[startx+1:], np.diff(d.fatalities[startx:]) - np.diff(y[:,cF]), ':', label=label)

    axs[3][0].plot(d.xd[startx:], y[:,cP], '-', label=label)
    axs[4][0].plot(d.xd[startx:], y[:,cP], '-', label=label)
    axs[3][1].plot(d.xd[startx+1:], np.diff(y[:,cP]), '-', label=label)
    axs[4][1].plot(d.xd[startx+1:], np.diff(y[:,cP]), '-', label=label)

    axs[3][0].plot(d.xd[startx:], y[:,cF], '-', label=label)
    axs[3][1].plot(d.xd[startx+1:], np.diff(y[:,cF]), '-', label=label)
    
    
    axs[5][0].plot(d.xd, np.zeros_like(d.x), '-')
    axs[5][0].plot(d.xd[startx:], d.positives[startx:] - y[:,cP], ':', label=label)
    axs[5][1].plot(d.xd[startx+1:], np.diff(d.positives[startx:]) - np.diff(y[:,cP]), ':', label=label)
    
    axs[6][0].plot(d.xd, np.zeros_like(d.x), '-')
    axs[6][1].plot(d.xd, np.zeros_like(d.x), '-')
    axs[6][0].plot(d.xd[startx:], b/p['gamma_infec'], '-', label=label)
    axs[6][1].plot(d.xd[startx:], b/p['gamma_infec'], '-', label=label)

titles = ['Cumul Fatalities (log)', 'Daily Fatalities (log)', 'Cumul Fatalities (linear)', 'Daily Fatalities (linear)', 'Cumulative Fatalities - Model', 'Daily Fatalities - Model', 
          'Cumul Positives (log)', 'Daily Positives (log)', 'Cumul Positives (linear)', 'Daily Positives (linear)', 'Cumulative Positives - Model', 'Daily Positives - Model', 'R0','R0']
i=0
for row,ax in enumerate(axs):
    for a in ax:
        scale = 'log' if row==0 or row==3 else 'linear'
        format_plot(a,scale,titles[i])
        i=i+1
#axs[0][0].set_yscale('log')
#axs[0][1].set_yscale('log')

axs[2][0].axhline(linewidth=1)
axs[2][1].axhline(linewidth=1)

axs[5][0].axhline(linewidth=1)
axs[5][1].axhline(linewidth=1)

fig.autofmt_xdate() 