In [2]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np

%matplotlib inline
mpl.rcParams['figure.figsize'] = (16,10)
pd.set_option('display.max_rows', 500)

import plotly.graph_objects as go

# Data load

In [3]:
df_analyse = pd.read_csv('../data/processed/COVID_small_sync_timeline_table.csv', sep=';')
df_analyse.sort_values('date', ascending = True).head()


Unnamed: 0,doubling every two days,doubling every 4 days,doubling every 10 days,Italy,US,Spain,Germany,"Korea, South",date
0,100.0,100.0,100.0,155.0,104.0,120.0,130.0,104.0,0
1,141.421356,118.920712,107.177346,229.0,174.0,165.0,159.0,204.0,1
2,200.0,141.421356,114.869835,322.0,222.0,222.0,196.0,433.0,2
3,282.842712,168.179283,123.114441,453.0,337.0,259.0,262.0,602.0,3
4,400.0,200.0,131.950791,655.0,451.0,400.0,482.0,833.0,4


In [4]:
countr_list = df_analyse.columns[1:]

# Helper functions

In [5]:
def quick_plot(x_in, df_input, y_scale='log', slider=False):
    """ Quick basic plot for quick static evaluation of a time series
    
        you can push selective columns of your data frame by .iloc[:,[0,6,7,8]]
        
        Parameters:
        ----------
        x_in : array 
            array of date time object, or array of numbers
        df_input : pandas dataframe 
            the plotting matrix where each column is plotted
            the name of the column will be used for the legend
        scale: str
            y-axis scale as 'log' or 'linear'
        slider: bool
            True or False for x-axis slider
    
        
        Returns:
        ----------
        
    """
    fig = go.Figure()

    for each in df_input.columns:
        fig.add_trace(go.Scatter(
                        x=x_in,
                        y=df_input[each],
                        name=each,
                        opacity=0.8))
    
    fig.update_layout(autosize=True,
        width=1024,
        height=768,
        font=dict(
            family="PT Sans, monospace",
            size=18,
            color="#7f7f7f"
            )
        )
    fig.update_yaxes(type=y_scale),
    fig.update_xaxes(tickangle=-45,
                 nticks=20,
                 tickfont=dict(size=14,color="#7f7f7f")
                )
    if slider==True:
        fig.update_layout(xaxis_rangeslider_visible=True)
    fig.show(renderer='notebook_connected')
      
    

In [11]:
quick_plot(df_analyse.date,
           df_analyse.iloc[:, 3:-1],
          y_scale = 'log',
           slider = True
          )

# Fitting a polynomial curve

## I highly recommend section: Hyperparameters and Model Validation from Jake VanderPlas

*This function is from the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook).*

https://scikit-learn.org/stable/auto_examples/linear_model/plot_polynomial_interpolation.html#sphx-glr-auto-examples-linear-model-plot-polynomial-interpolation-py


In [14]:
# check that all data are there
df_poly_check = df_analyse.iloc[0:27, 3:-1].reset_index()
df_poly_check.head()

Unnamed: 0,index,Italy,US,Spain,Germany,"Korea, South"
0,0,155.0,104.0,120.0,130.0,104.0
1,1,229.0,174.0,165.0,159.0,204.0
2,2,322.0,222.0,222.0,196.0,433.0
3,3,453.0,337.0,259.0,262.0,602.0
4,4,655.0,451.0,400.0,482.0,833.0
5,5,888.0,519.0,500.0,670.0,977.0
6,6,1128.0,711.0,673.0,799.0,1261.0
7,7,1694.0,1109.0,1073.0,1040.0,1766.0
8,8,2036.0,1561.0,1695.0,1176.0,2337.0
9,9,2502.0,2157.0,2277.0,1457.0,3150.0


In [None]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline


def PolynomialRegression(degree=2, **kwargs):
    return make_pipeline(PolynomialFeatures(degree),
                         LinearRegression(**kwargs))