# Filling NaN values using ARIMA

In [1]:
import pandas as pd
import numpy as np
import pmdarima as pm

S = pd.read_csv('../Data/States.csv', header = [0, 1], index_col = 0)
D = pd.read_csv('../Data/Districts.csv', header = [0, 1], index_col = 0)

Based on observations, We can subset ourselves to analysis of the years from 2015 to 2019.<br>
Which reduces the amount of NaN values for us to fill up.

In [2]:
S = S[[str(i) for i in range(2005, 2020)]]

In [3]:
D = D[[str(i) for i in range(2005, 2020)]]

In [4]:
S.loc['Nagaland']

Year  Monsoon
2005  Pre        3.795926
      Post       3.795926
2006  Pre        3.795926
      Post       3.795926
2007  Pre        4.228889
      Post       3.485714
2008  Pre        3.983333
      Post       2.810313
2009  Pre        3.174375
      Post       3.215000
2010  Pre        3.215000
      Post       4.205694
2011  Pre        6.006389
      Post       6.006389
2012  Pre        6.006389
      Post       4.340556
2013  Pre        4.977500
      Post       4.950000
2014  Pre        4.362500
      Post       4.362500
2015  Pre        4.553889
      Post       3.728611
2016  Pre        4.733611
      Post       3.149583
2017  Pre        3.377361
      Post       3.193472
2018  Pre        3.193472
      Post       3.347500
2019  Pre        3.405625
      Post       4.398333
Name: Nagaland, dtype: float64

In [5]:
S.loc['Nagaland']['2006']['Post'] = S.loc['Nagaland']['2005']['Post']

In [6]:
S.to_csv('../Data/States.csv')

In [7]:
D.isna().sum(axis = 1).sort_values().tail(15)

Jharkhand_Latehar                0
Jharkhand_Lohardaga              0
Jharkhand_Pakur                  0
Jharkhand_Palamu                 0
Jharkhand_Pashchimi Singhbhum    0
Karnataka_Koppal                 0
Jharkhand_Purbi Singhbhum        0
Jharkhand_Ranchi                 0
Jharkhand_Sahibganj              0
Jharkhand_Saraikela-kharsawan    0
Jharkhand_Simdega                0
Karnataka_Bagalkot               0
Karnataka_Bangalore              0
Jharkhand_Ramgarh                0
Nct Of Delhi                     0
dtype: int64

In [8]:
ds = list(D.isna().sum(axis = 1).sort_values().tail(14).index)

As we can see, there are only 14 Districts with NaN values. <br>We need to devise suitable filler techniques to fill in the NaN values.

In [9]:
D.loc[ds].T.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Jharkhand_Lohardaga,Jharkhand_Pakur,Jharkhand_Palamu,Jharkhand_Pashchimi Singhbhum,Karnataka_Koppal,Jharkhand_Purbi Singhbhum,Jharkhand_Ranchi,Jharkhand_Sahibganj,Jharkhand_Saraikela-kharsawan,Jharkhand_Simdega,Karnataka_Bagalkot,Karnataka_Bangalore,Jharkhand_Ramgarh,Nct Of Delhi
Year,Monsoon,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2005,Pre,6.756667,7.42,8.670769,6.7275,9.02037,10.533846,7.923333,7.617143,8.483333,7.154286,11.433333,11.949565,5.792,10.916882
2005,Post,4.846667,4.741429,7.010769,4.1675,8.352143,5.499231,5.081333,6.975714,5.12,4.428571,10.492069,10.447727,4.316,10.448077
2006,Pre,7.0,6.281429,7.587692,6.9375,9.514815,9.24,6.262,7.232857,8.066667,6.46,10.425517,11.364762,5.718,11.204151
2006,Post,3.273333,3.021429,5.496923,3.8625,8.512593,4.602308,3.429333,2.915714,3.438333,3.685714,9.46069,12.038636,3.222,10.736681
2007,Pre,7.828333,7.407143,7.134615,5.87875,9.693333,9.862308,6.74,8.674286,8.333333,6.4,9.896552,12.760455,6.75,11.564304
2007,Post,3.145,5.04,4.421538,3.60625,7.531852,3.915385,3.081333,2.915714,5.406667,3.722857,7.579667,12.239545,5.842,11.373361
2008,Pre,6.465,6.848571,7.953077,6.0575,8.180357,8.760769,7.03,6.461429,7.866667,5.947143,9.824333,13.635455,7.54,11.83215
2008,Post,4.075,4.532857,4.979231,3.93375,7.506429,3.9,4.200667,4.805,4.323333,3.437143,8.145667,11.008261,5.2,11.172885
2009,Pre,7.598333,6.535714,7.484615,6.3125,8.898929,10.234615,6.715333,5.91125,7.171667,7.007143,9.561667,12.911667,6.632,12.007952
2009,Post,5.391667,5.151429,6.992308,3.8575,6.646786,5.213077,4.066667,4.515,4.321667,4.605714,5.721333,11.812083,4.812,11.320349


As we can see above, the NaN values at the bottom can be filled using ARIMA forecasting.<br>
Which can be done only after the values at the start of the period are filled using mean for no previous values and ffill for the other ones as we see many static values through these periods.

In [10]:
def arima(ser, no_of_p = 1):
    arima_series = pm.auto_arima(
        y = ser,
        start_p = 0,
        max_p = 2,
        d = 1,
        start_q = 0,
        max_q = 2,
        trace = False,
        seasonal = False,
        error_action = 'ignore',
        suppress_warnings = True
    )
    return list(arima_series.predict(n_periods = no_of_p))

The above function uses ARIMA to predict a number of values based on the given values.

In [11]:
D.loc[ds[0]]

Year  Monsoon
2005  Pre        6.756667
      Post       4.846667
2006  Pre        7.000000
      Post       3.273333
2007  Pre        7.828333
      Post       3.145000
2008  Pre        6.465000
      Post       4.075000
2009  Pre        7.598333
      Post       5.391667
2010  Pre        7.723333
      Post       5.745000
2011  Pre        8.112000
      Post       3.936000
2012  Pre        7.306000
      Post       4.998000
2013  Pre        6.696667
      Post       3.581667
2014  Pre        5.571667
      Post       6.273333
2015  Pre        3.055000
      Post       5.245000
2016  Pre        6.851667
      Post       3.120000
2017  Pre        7.710000
      Post       3.360000
2018  Pre        7.410000
      Post       3.780000
2019  Pre        7.820000
      Post       4.660000
Name: Jharkhand_Lohardaga, dtype: float64

In [12]:
list(D.loc[ds[0]].iloc[:-8:2])

[6.756666666666668,
 7.0,
 7.828333333333334,
 6.465,
 7.598333333333333,
 7.723333333333333,
 8.112,
 7.306,
 6.696666666666667,
 5.571666666666667,
 3.055]

In [13]:
pred = arima(list(D.loc[ds[0]].iloc[:-8:2]))
print(pred)
D.loc[ds[0]]['2016']['Pre'] = pred[0]
D.loc[ds[0]]['2016']['Pre']

[3.055]


3.055

In [14]:
D.loc[ds[0]]

Year  Monsoon
2005  Pre        6.756667
      Post       4.846667
2006  Pre        7.000000
      Post       3.273333
2007  Pre        7.828333
      Post       3.145000
2008  Pre        6.465000
      Post       4.075000
2009  Pre        7.598333
      Post       5.391667
2010  Pre        7.723333
      Post       5.745000
2011  Pre        8.112000
      Post       3.936000
2012  Pre        7.306000
      Post       4.998000
2013  Pre        6.696667
      Post       3.581667
2014  Pre        5.571667
      Post       6.273333
2015  Pre        3.055000
      Post       5.245000
2016  Pre        3.055000
      Post       3.120000
2017  Pre        7.710000
      Post       3.360000
2018  Pre        7.410000
      Post       3.780000
2019  Pre        7.820000
      Post       4.660000
Name: Jharkhand_Lohardaga, dtype: float64

In [15]:
D.loc[ds[1]]

Year  Monsoon
2005  Pre        7.420000
      Post       4.741429
2006  Pre        6.281429
      Post       3.021429
2007  Pre        7.407143
      Post       5.040000
2008  Pre        6.848571
      Post       4.532857
2009  Pre        6.535714
      Post       5.151429
2010  Pre        7.572857
      Post       4.080000
2011  Pre        7.638571
      Post       4.531429
2012  Pre        7.372857
      Post       7.372857
2013  Pre        7.651429
      Post       3.454286
2014  Pre        5.551429
      Post       4.868571
2015  Pre        6.191429
      Post       5.110000
2016  Pre        7.827143
      Post       4.851429
2017  Pre        6.608571
      Post       4.578571
2018  Pre        6.070000
      Post       3.574286
2019  Pre        7.080000
      Post       3.615714
Name: Jharkhand_Pakur, dtype: float64

In [16]:
D.loc[ds[1]]['2008']['Post'] = D.loc[ds[1]]['2007']['Post']
D.loc[ds[1]]['2008']['Post']

5.04

We assigned the previous value to the nan value present in 2008 because the values have been static until this Post Monsoon season.

In [17]:
D.loc[ds[2]]

Year  Monsoon
2005  Pre         8.670769
      Post        7.010769
2006  Pre         7.587692
      Post        5.496923
2007  Pre         7.134615
      Post        4.421538
2008  Pre         7.953077
      Post        4.979231
2009  Pre         7.484615
      Post        6.992308
2010  Pre         9.879231
      Post        8.724615
2011  Pre        10.540000
      Post        6.622308
2012  Pre         9.047692
      Post        5.505385
2013  Pre         6.313077
      Post        6.266154
2014  Pre         9.252308
      Post        6.982308
2015  Pre         7.071538
      Post        6.765385
2016  Pre         8.312308
      Post        4.626923
2017  Pre         8.859231
      Post        5.257692
2018  Pre         8.274615
      Post        4.840000
2019  Pre         9.868462
      Post        6.037692
Name: Jharkhand_Palamu, dtype: float64

In [18]:
D.loc[ds[2]]['2008']['Pre'] = D.loc[ds[2]]['2007']['Pre']
D.loc[ds[2]]['2008']['Pre']

7.134615384615384

In [19]:
D.loc[ds[3]]

Year  Monsoon
2005  Pre        6.727500
      Post       4.167500
2006  Pre        6.937500
      Post       3.862500
2007  Pre        5.878750
      Post       3.606250
2008  Pre        6.057500
      Post       3.933750
2009  Pre        6.312500
      Post       3.857500
2010  Pre        7.512500
      Post       6.220000
2011  Pre        7.715000
      Post       3.226250
2012  Pre        7.806250
      Post       3.432500
2013  Pre        7.231250
      Post       2.060000
2014  Pre        6.982500
      Post       4.093750
2015  Pre        7.416250
      Post       5.326250
2016  Pre        7.350000
      Post       3.686250
2017  Pre        7.993750
      Post       4.116250
2018  Pre        6.566250
      Post       2.787143
2019  Pre        6.398571
      Post       3.762500
Name: Jharkhand_Pashchimi Singhbhum, dtype: float64

In [20]:
pred = arima(list(D.loc[ds[3]].iloc[:-8:2]))
print(pred)
D.loc[ds[3]]['2016']['Pre'] = pred[0]
D.loc[ds[3]]['2016']['Pre']

[7.41625]


7.41625

In [21]:
D.loc[ds[4]]

Year  Monsoon
2005  Pre         9.020370
      Post        8.352143
2006  Pre         9.514815
      Post        8.512593
2007  Pre         9.693333
      Post        7.531852
2008  Pre         8.180357
      Post        7.506429
2009  Pre         8.898929
      Post        6.646786
2010  Pre         6.785000
      Post        5.380357
2011  Pre         7.638571
      Post        7.435926
2012  Pre         8.764074
      Post        8.216296
2013  Pre        11.579643
      Post       10.166786
2014  Pre        11.083571
      Post        8.683929
2015  Pre         9.061786
      Post        8.929630
2016  Pre         9.882222
      Post        7.194783
2017  Pre         8.899565
      Post        6.823478
2018  Pre         7.546522
      Post        8.273913
2019  Pre         8.495500
      Post        6.504000
Name: Karnataka_Koppal, dtype: float64

In [22]:
D.loc[ds[4]]['2006']['Post'] = D.loc[ds[4]]['2005']['Post']

D.loc[ds[4]]['2006']['Post'] 

8.352142857142857

In [23]:
D.loc[ds[5]]

Year  Monsoon
2005  Pre        10.533846
      Post        5.499231
2006  Pre         9.240000
      Post        4.602308
2007  Pre         9.862308
      Post        3.915385
2008  Pre         8.760769
      Post        3.900000
2009  Pre        10.234615
      Post        5.213077
2010  Pre         9.460000
      Post        7.428462
2011  Pre         9.593077
      Post        4.099231
2012  Pre         9.076923
      Post        5.950000
2013  Pre         9.925385
      Post        1.992308
2014  Pre         7.660000
      Post        4.650769
2015  Pre         7.131538
      Post        5.234615
2016  Pre         8.573846
      Post        5.658462
2017  Pre         9.241667
      Post        5.790000
2018  Pre         8.199000
      Post        3.701000
2019  Pre         7.076667
      Post        2.916667
Name: Jharkhand_Purbi Singhbhum, dtype: float64

In [24]:
D.loc[ds[5]]['2006']['Post'] = D.loc[ds[5]]['2005']['Post']
D.loc[ds[5]]['2006']['Post'] 

5.499230769230769

In [25]:
pred = arima(list(D.loc[ds[5]].iloc[1:-2:2]))
print(pred)

D.loc[ds[5]]['2019']['Post'] = pred[0]

D.loc[ds[5]]['2019']['Post']

[4.690283620991382]


4.690283620991382

In [26]:
D.loc[ds[6]]

Year  Monsoon
2005  Pre        7.923333
      Post       5.081333
2006  Pre        6.262000
      Post       3.429333
2007  Pre        6.740000
      Post       3.081333
2008  Pre        7.030000
      Post       4.200667
2009  Pre        6.715333
      Post       4.066667
2010  Pre        7.544667
      Post       6.119286
2011  Pre        8.059286
      Post       3.941429
2012  Pre        7.563571
      Post       4.150667
2013  Pre        7.007333
      Post       3.287333
2014  Pre        6.450000
      Post       4.362000
2015  Pre        6.295333
      Post       5.043333
2016  Pre        6.636667
      Post       3.494286
2017  Pre        5.576429
      Post       4.127143
2018  Pre        5.213571
      Post       4.802857
2019  Pre        8.310000
      Post       3.008333
Name: Jharkhand_Ranchi, dtype: float64

In [27]:
D.loc[ds[6]].iloc[:-3:2]

Year  Monsoon
2005  Pre        7.923333
2006  Pre        6.262000
2007  Pre        6.740000
2008  Pre        7.030000
2009  Pre        6.715333
2010  Pre        7.544667
2011  Pre        8.059286
2012  Pre        7.563571
2013  Pre        7.007333
2014  Pre        6.450000
2015  Pre        6.295333
2016  Pre        6.636667
2017  Pre        5.576429
2018  Pre        5.213571
Name: Jharkhand_Ranchi, dtype: float64

In [28]:
pred = arima(list(D.loc[ds[6]].iloc[1:-3:2]))
print(pred)

D.loc[ds[6]]['2019']['Pre'] = pred[0]
D.loc[ds[6]]['2019']['Pre'] 

[4.160481224536654]


4.160481224536654

In [29]:
pred = arima(list(D.loc[ds[6]].iloc[1:-3:2]), no_of_p = 2)
print(pred)

D.loc[ds[6]].iloc[1:-3:2]

D.loc[ds[6]]['2018']['Post'] = pred[0]
D.loc[ds[6]]['2019']['Post'] = pred[1]
D.loc[ds[6]][['2018', '2019']]

[4.160481224536654, 4.1572005084152535]


Year  Monsoon
2018  Pre        5.213571
      Post       4.160481
2019  Pre        4.160481
      Post       4.157201
Name: Jharkhand_Ranchi, dtype: float64

In [30]:
D.loc[ds[7]]

Year  Monsoon
2005  Pre        7.617143
      Post       6.975714
2006  Pre        7.232857
      Post       2.915714
2007  Pre        8.674286
      Post       2.915714
2008  Pre        6.461429
      Post       4.805000
2009  Pre        5.911250
      Post       4.515000
2010  Pre        6.393750
      Post       3.618750
2011  Pre        5.920000
      Post       3.321250
2012  Pre        7.381250
      Post       8.200000
2013  Pre        7.532857
      Post       4.018571
2014  Pre        5.094286
      Post       4.300000
2015  Pre        4.887500
      Post       3.487500
2016  Pre        6.675000
      Post       4.583750
2017  Pre        6.960000
      Post       3.913750
2018  Pre        6.116250
      Post       3.721250
2019  Pre        6.303750
      Post       2.544286
Name: Jharkhand_Sahibganj, dtype: float64

In [31]:
pred = arima(list(D.loc[ds[7]].iloc[1:-3:2]))
print(pred)

D.loc[ds[7]]['2019']['Pre'] = pred[0]
pred = arima(list(D.loc[ds[7]].iloc[1:-3:2]), no_of_p = 2)
print(pred)

D.loc[ds[7]].iloc[1:-3:2]

D.loc[ds[7]]['2018']['Post'] = pred[0]
D.loc[ds[7]]['2019']['Post'] = pred[1]
D.loc[ds[7]][['2018', '2019']]

[4.210256589689471]
[4.210256589689471, 4.1790767071377015]


Year  Monsoon
2018  Pre        6.116250
      Post       4.210257
2019  Pre        4.210257
      Post       4.179077
Name: Jharkhand_Sahibganj, dtype: float64

In [32]:
D.loc[ds[8]]

Year  Monsoon
2005  Pre        8.483333
      Post       5.120000
2006  Pre        8.066667
      Post       3.438333
2007  Pre        8.333333
      Post       5.406667
2008  Pre        7.866667
      Post       4.323333
2009  Pre        7.171667
      Post       4.321667
2010  Pre        8.420000
      Post       5.866667
2011  Pre        8.258333
      Post       3.883333
2012  Pre        8.371667
      Post       5.731667
2013  Pre        7.408333
      Post       1.615000
2014  Pre        7.130000
      Post       4.141667
2015  Pre        7.000000
      Post       5.255000
2016  Pre        7.135000
      Post       4.828333
2017  Pre        7.770000
      Post       4.766667
2018  Pre        6.530000
      Post       3.436667
2019  Pre        7.828333
      Post       4.375000
Name: Jharkhand_Saraikela-kharsawan, dtype: float64

In [33]:
D.loc[ds[8]]['2005']['Pre'] = np.nanmean(D.loc[ds[8]].iloc[::2])
D.loc[ds[8]]['2005']['Post'] = np.nanmean(D.loc[ds[8]].iloc[1::2])
D.loc[ds[8]]['2005']

Monsoon
Pre     7.718222
Post    4.434000
Name: Jharkhand_Saraikela-kharsawan, dtype: float64

In [34]:
pred = arima(list(D.loc[ds[8]].iloc[:-2:2]))
print(pred)

D.loc[ds[8]]['2019']['Pre'] = pred[0]
pred = arima(list(D.loc[ds[8]].iloc[1:-2:2]))
print(pred)

D.loc[ds[8]]['2019']['Post'] = pred[0]
D.loc[ds[8]]['2019']

[7.0178866493303484]
[4.2422586409086875]


Monsoon
Pre     7.017887
Post    4.242259
Name: Jharkhand_Saraikela-kharsawan, dtype: float64

In [35]:
D.loc[ds[9]]

Year  Monsoon
2005  Pre        7.154286
      Post       4.428571
2006  Pre        6.460000
      Post       3.685714
2007  Pre        6.400000
      Post       3.722857
2008  Pre        5.947143
      Post       3.437143
2009  Pre        7.007143
      Post       4.605714
2010  Pre        7.131429
      Post       5.172857
2011  Pre        7.090000
      Post       4.330000
2012  Pre        6.652857
      Post       4.784286
2013  Pre        6.412857
      Post       3.434286
2014  Pre        5.271429
      Post       3.968571
2015  Pre        3.057143
      Post       4.785714
2016  Pre        6.462857
      Post       3.144286
2017  Pre        7.114286
      Post       3.100000
2018  Pre        6.385714
      Post       3.592857
2019  Pre        7.081429
      Post       2.311429
Name: Jharkhand_Simdega, dtype: float64

In [36]:
pred = arima(list(D.loc[ds[9]].iloc[:-14:2]), no_of_p = 2)
print(pred)

D.loc[ds[9]]['2013']['Pre'] = pred[0]
D.loc[ds[9]]['2014']['Pre'] = pred[1]
pred = arima(list(D.loc[ds[9]].iloc[1:-14:2]), no_of_p = 2)
print(pred)

D.loc[ds[9]]['2013']['Post'] = pred[0]
D.loc[ds[9]]['2014']['Post'] = pred[1]
D.loc[ds[9]][['2013', '2014']]

[6.652857142857143, 6.652857142857143]
[4.784285714285714, 4.784285714285714]


Year  Monsoon
2013  Pre        6.652857
      Post       4.784286
2014  Pre        6.652857
      Post       4.784286
Name: Jharkhand_Simdega, dtype: float64

In [37]:
D.loc[ds[10]]

Year  Monsoon
2005  Pre        11.433333
      Post       10.492069
2006  Pre        10.425517
      Post        9.460690
2007  Pre         9.896552
      Post        7.579667
2008  Pre         9.824333
      Post        8.145667
2009  Pre         9.561667
      Post        5.721333
2010  Pre         7.837000
      Post        6.438333
2011  Pre         9.518333
      Post        8.855172
2012  Pre        11.233704
      Post       10.542222
2013  Pre        11.351034
      Post        8.561379
2014  Pre        10.259655
      Post        9.719655
2015  Pre        11.103571
      Post       10.412593
2016  Pre        12.247778
      Post       11.349565
2017  Pre        11.502727
      Post        7.777000
2018  Pre         9.080526
      Post        8.325238
2019  Pre         9.310500
      Post        6.906500
Name: Karnataka_Bagalkot, dtype: float64

In [38]:
avgr = np.nanmean(D.loc[ds[10]].iloc[::2])
avgo = np.nanmean(D.loc[ds[10]].iloc[1::2])
D.loc[ds[10]]['2005']['Pre'] = avgr
D.loc[ds[10]]['2005']['Post'] = avgo
D.loc[ds[10]]['2006']['Pre'] = avgr
D.loc[ds[10]]['2006']['Post'] = avgo
D.loc[ds[10]][['2005', '2006']]

Year  Monsoon
2005  Pre        10.305749
      Post        8.685806
2006  Pre        10.305749
      Post        8.685806
Name: Karnataka_Bagalkot, dtype: float64

In [39]:
pred = arima(list(D.loc[ds[10]].iloc[1:-2:2]))
print(pred)
D.loc[ds[10]]['2019']['Post'] = pred[0]
D.loc[ds[10]]['2019']['Post']

[8.325238095238094]


8.325238095238094

In [40]:
D.loc[ds[11]]

Year  Monsoon
2005  Pre        11.949565
      Post       10.447727
2006  Pre        11.364762
      Post       12.038636
2007  Pre        12.760455
      Post       12.239545
2008  Pre        13.635455
      Post       11.008261
2009  Pre        12.911667
      Post       11.812083
2010  Pre        13.343333
      Post       12.487500
2011  Pre        13.325833
      Post       12.627391
2012  Pre        13.920909
      Post       16.080455
2013  Pre        18.034783
      Post       15.829091
2014  Pre        16.877727
      Post       14.057826
2015  Pre        14.832174
      Post       14.244783
2016  Pre        14.428696
      Post       16.062632
2017  Pre        16.167895
      Post       11.162632
2018  Pre        15.957368
      Post       14.930526
2019  Pre        18.962222
      Post       15.413889
Name: Karnataka_Bangalore, dtype: float64

In [41]:
D.loc[ds[11]]['2007']['Pre'] = D.loc[ds[11]]['2005']['Pre']
D.loc[ds[11]]['2006']['Post'] = D.loc[ds[11]]['2005']['Post']
D.loc[ds[11]]['2008']['Pre'] = D.loc[ds[11]]['2005']['Pre']
D.loc[ds[11]]['2007']['Post'] = D.loc[ds[11]]['2005']['Pre']
D.loc[ds[11]].head(8)

Year  Monsoon
2005  Pre        11.949565
      Post       10.447727
2006  Pre        11.364762
      Post       10.447727
2007  Pre        11.949565
      Post       11.949565
2008  Pre        11.949565
      Post       11.008261
Name: Karnataka_Bangalore, dtype: float64

In [42]:
pred = arima(list(D.loc[ds[11]].iloc[1:-2:2]))
print(pred)
D.loc[ds[11]]['2019']['Post'] = pred[0]
D.loc[ds[11]]['2019']['Post']

[15.563105713609705]


15.563105713609705

In [43]:
D.loc[ds[12]].iloc[1:-10:2]

Year  Monsoon
2005  Post       4.316
2006  Post       3.222
2007  Post       5.842
2008  Post       5.200
2009  Post       4.812
2010  Post       7.030
2011  Post       3.910
2012  Post       4.880
2013  Post       4.236
2014  Post       4.194
Name: Jharkhand_Ramgarh, dtype: float64

In [44]:
pred = arima(list(D.loc[ds[12]].iloc[:-10:2]), no_of_p = 5)
print(pred)
for i in range(5):
    D.loc[ds[12]][str(2015 + i)]['Pre'] = pred[i]
pred = arima(list(D.loc[ds[12]].iloc[1:-10:2]), no_of_p = 5)
print(pred)
for i in range(5):
    D.loc[ds[12]][str(2015 + i)]['Post'] = pred[i]
D.loc[ds[12]].tail(10)

[5.064, 5.064, 5.064, 5.064, 5.064]
[4.748722315551895, 4.745909026177488, 4.743095736803081, 4.740282447428674, 4.737469158054267]


Year  Monsoon
2015  Pre        5.064000
      Post       4.748722
2016  Pre        5.064000
      Post       4.745909
2017  Pre        5.064000
      Post       4.743096
2018  Pre        5.064000
      Post       4.740282
2019  Pre        5.064000
      Post       4.737469
Name: Jharkhand_Ramgarh, dtype: float64

In [45]:
D.loc[ds[13]] = D.loc[ds[13]].fillna(method = 'ffill')
D.loc[ds[13]]

Year  Monsoon
2005  Pre        10.916882
      Post       10.448077
2006  Pre        11.204151
      Post       10.736681
2007  Pre        11.564304
      Post       11.373361
2008  Pre        11.832150
      Post       11.172885
2009  Pre        12.007952
      Post       11.320349
2010  Pre        12.866799
      Post       10.627150
2011  Pre        11.591267
      Post       11.549803
2012  Pre        12.123784
      Post       11.950637
2013  Pre        12.660098
      Post       11.635926
2014  Pre        12.629065
      Post       12.981733
2015  Pre        12.795311
      Post       12.331303
2016  Pre        13.107287
      Post       12.442290
2017  Pre        21.711944
      Post       20.796031
2018  Pre        18.087802
      Post       17.719895
2019  Pre        17.643864
      Post       17.458293
Name: Nct Of Delhi, dtype: float64

In [46]:
D.loc[ds].T.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Jharkhand_Lohardaga,Jharkhand_Pakur,Jharkhand_Palamu,Jharkhand_Pashchimi Singhbhum,Karnataka_Koppal,Jharkhand_Purbi Singhbhum,Jharkhand_Ranchi,Jharkhand_Sahibganj,Jharkhand_Saraikela-kharsawan,Jharkhand_Simdega,Karnataka_Bagalkot,Karnataka_Bangalore,Jharkhand_Ramgarh,Nct Of Delhi
Year,Monsoon,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2005,Pre,6.756667,7.42,8.670769,6.7275,9.02037,10.533846,7.923333,7.617143,7.718222,7.154286,10.305749,11.949565,5.792,10.916882
2005,Post,4.846667,4.741429,7.010769,4.1675,8.352143,5.499231,5.081333,6.975714,4.434,4.428571,8.685806,10.447727,4.316,10.448077
2006,Pre,7.0,6.281429,7.587692,6.9375,9.514815,9.24,6.262,7.232857,8.066667,6.46,10.305749,11.364762,5.718,11.204151
2006,Post,3.273333,3.021429,5.496923,3.8625,8.352143,5.499231,3.429333,2.915714,3.438333,3.685714,8.685806,10.447727,3.222,10.736681
2007,Pre,7.828333,7.407143,7.134615,5.87875,9.693333,9.862308,6.74,8.674286,8.333333,6.4,9.896552,11.949565,6.75,11.564304
2007,Post,3.145,5.04,4.421538,3.60625,7.531852,3.915385,3.081333,2.915714,5.406667,3.722857,7.579667,11.949565,5.842,11.373361
2008,Pre,6.465,6.848571,7.134615,6.0575,8.180357,8.760769,7.03,6.461429,7.866667,5.947143,9.824333,11.949565,7.54,11.83215
2008,Post,4.075,5.04,4.979231,3.93375,7.506429,3.9,4.200667,4.805,4.323333,3.437143,8.145667,11.008261,5.2,11.172885
2009,Pre,7.598333,6.535714,7.484615,6.3125,8.898929,10.234615,6.715333,5.91125,7.171667,7.007143,9.561667,12.911667,6.632,12.007952
2009,Post,5.391667,5.151429,6.992308,3.8575,6.646786,5.213077,4.066667,4.515,4.321667,4.605714,5.721333,11.812083,4.812,11.320349


In [47]:
D.to_csv('../Data/Districts.csv')