In [1]:
import numpy as np
import pandas as pd
import sapdi
from pmdarima.arima import auto_arima
np.random.seed(20)
ws = sapdi.get_workspace(name='covid_connection')
dc = ws.get_datacollection(name='datasets')
with dc.open('GCP_IN_data.csv').get_reader() as reader:
    df_all_states = pd.read_csv(reader)

In [2]:
# pip install pmdarima

In [3]:
df_all_states.Date = [x[2:] for x in df_all_states.Date]
df_all_states.Date = pd.to_datetime(df_all_states.Date,dayfirst=True)
kerala = df_all_states[df_all_states['State/UnionTerritory']=='Kerala']
karnataka = df_all_states[df_all_states['State/UnionTerritory']=='Karnataka']
Maharashtra = df_all_states[df_all_states['State/UnionTerritory']=='Maharashtra']

In [4]:
# Maharashtra.set_index('Date',inplace=True)

# Maharashtra_deaths = Maharashtra[['Deaths']]

# mh_deaths_log = np.log(Maharashtra_deaths.Deaths + 1)

# from pmdarima.arima import auto_arima
# stepwise_model = auto_arima(mh_deaths_log, start_p=0, start_q=0, max_p=4, max_q=4, trace=True,
#                            error_action='ignore',
#                             seasonal=False,
#                            suppress_warnings=True,stepwise=False, out_of_sample_size =3,
#                            information_criterion='oob')
# stepwise_model.fit(mh_deaths_log)
# future_forecast = stepwise_model.predict(n_periods=10)



# np.expm1(future_forecast)

# np.expm1(5.993961)

In [5]:
def generate_future_timestamps(df,num_prediction):
    last_date = df.index[-1]
    prediction_dates = pd.date_range(last_date,freq='D', periods=num_prediction+1).tolist()
    return prediction_dates

In [6]:
final_forecasts = pd.DataFrame()
forecast_dates = generate_future_timestamps(df_all_states.set_index('Date'),10)
for state in [karnataka,Maharashtra]:
    state_name = state.iloc[1]['State/UnionTerritory']
    print(f'processing for state : {state_name}')
    
    df_forecast = pd.DataFrame()
    df_forecast['Date'] = forecast_dates[1:]
    df_forecast['State/UnionTerritory'] = state_name
    
    for column in ['Cured','Deaths','Confirmed']:
        
        print(f'Forecasting for {state_name} -- {column}:')
        temp_cases_df = state[['Date',column]]
        temp_cases_df = temp_cases_df.set_index("Date")
        temp_cases_log = np.log(temp_cases_df[column] + 1)
        
        
        stepwise_model = auto_arima(temp_cases_log, start_p=0, start_q=0, max_p=4, max_q=4, trace=True,
                           error_action='ignore',
                            seasonal=False,
                           suppress_warnings=True,stepwise=False, out_of_sample_size =3,
                           information_criterion='oob')
        stepwise_model.fit(temp_cases_log)
        df_forecast[column] = np.expm1(stepwise_model.predict(n_periods=10))
        
#     print( df_forecast.head())   
    final_forecasts = pd.concat([final_forecasts,df_forecast])

processing for state : Karnataka
Forecasting for Karnataka -- Cured:
Fit ARIMA(0,1,0)x(0,0,0,0) [intercept=True]; AIC=-34.084, BIC=-30.221, Time=0.092 seconds
Fit ARIMA(0,1,1)x(0,0,0,0) [intercept=True]; AIC=-34.611, BIC=-28.816, Time=0.393 seconds
Fit ARIMA(0,1,2)x(0,0,0,0) [intercept=True]; AIC=-33.799, BIC=-26.072, Time=0.409 seconds
Fit ARIMA(0,1,3)x(0,0,0,0) [intercept=True]; AIC=-31.798, BIC=-22.139, Time=1.091 seconds
Fit ARIMA(0,1,4)x(0,0,0,0) [intercept=True]; AIC=-29.816, BIC=-18.225, Time=1.902 seconds
Fit ARIMA(1,1,0)x(0,0,0,0) [intercept=True]; AIC=-35.216, BIC=-29.420, Time=0.221 seconds
Fit ARIMA(1,1,1)x(0,0,0,0) [intercept=True]; AIC=-33.511, BIC=-25.784, Time=0.394 seconds
Fit ARIMA(1,1,2)x(0,0,0,0) [intercept=True]; AIC=-31.798, BIC=-22.139, Time=0.793 seconds
Fit ARIMA(1,1,3)x(0,0,0,0) [intercept=True]; AIC=-30.156, BIC=-18.565, Time=2.193 seconds
Fit ARIMA(1,1,4)x(0,0,0,0) [intercept=True]; AIC=-28.202, BIC=-14.679, Time=2.996 seconds
Fit ARIMA(2,1,0)x(0,0,0,0) [int

In [19]:
final_forecasts['active_cases'] = final_forecasts.Confirmed - (final_forecasts.Cured + final_forecasts.Deaths)

In [17]:
final_forecasts

Unnamed: 0,Date,State/UnionTerritory,Cured,Deaths,Confirmed,active_cases
0,2020-04-30,Karnataka,239.425085,21.29169,536.344735,275.62796
1,2020-05-01,Karnataka,252.765549,22.66283,539.742943,264.314563
2,2020-05-02,Karnataka,291.017168,24.118308,543.6505,228.515023
3,2020-05-03,Karnataka,329.44842,25.663311,543.919349,188.807618
4,2020-05-04,Karnataka,350.124731,27.303346,542.046879,164.618802
5,2020-05-05,Karnataka,395.353063,29.044258,539.220609,114.823288
6,2020-05-06,Karnataka,451.21089,30.892251,534.124603,52.021462
7,2020-05-07,Karnataka,484.701867,32.853913,526.979022,9.423242
8,2020-05-08,Karnataka,539.11364,34.936235,518.403401,-55.646474
9,2020-05-09,Karnataka,616.195281,37.146639,508.117356,-145.224564


In [23]:
final_forecasts['total_beds'] = np.where(final_forecasts['State/UnionTerritory'] == 'Karnataka', 1000 ,20000)
final_forecasts['available_beds'] = final_forecasts.total_beds - final_forecasts.active_cases

In [24]:
final_forecasts

Unnamed: 0,Date,State/UnionTerritory,Cured,Deaths,Confirmed,active_cases,total_beds,available_beds
0,2020-04-30,Karnataka,239.425085,21.29169,536.344735,275.62796,1000,724.37204
1,2020-05-01,Karnataka,252.765549,22.66283,539.742943,264.314563,1000,735.685437
2,2020-05-02,Karnataka,291.017168,24.118308,543.6505,228.515023,1000,771.484977
3,2020-05-03,Karnataka,329.44842,25.663311,543.919349,188.807618,1000,811.192382
4,2020-05-04,Karnataka,350.124731,27.303346,542.046879,164.618802,1000,835.381198
5,2020-05-05,Karnataka,395.353063,29.044258,539.220609,114.823288,1000,885.176712
6,2020-05-06,Karnataka,451.21089,30.892251,534.124603,52.021462,1000,947.978538
7,2020-05-07,Karnataka,484.701867,32.853913,526.979022,9.423242,1000,990.576758
8,2020-05-08,Karnataka,539.11364,34.936235,518.403401,-55.646474,1000,1055.646474
9,2020-05-09,Karnataka,616.195281,37.146639,508.117356,-145.224564,1000,1145.224564
