## Generate predictions under :
 - district
 - province
 - age group
 - profession
 

In [18]:
import numpy as np 
import pandas as pd 
import sklearn
import pickle

_general test data pre-processing_

In [19]:
minmax_scaler = pickle.load(open('../../../data/models/minMax_lsmt_1.pkl', 'rb'))

In [20]:
test_df = pd.read_csv('../../../data/main_data/final_test.csv')
test_df.sort_values(by=['index','formal_date'], inplace=True)
uniq_dist = test_df['index'].unique().tolist()
num_dists = len(uniq_dist)


In [21]:
dist_df_list = []
for i in uniq_dist:
    sub_df = test_df.loc[test_df['index'] == i, :]
    dist_df_list.append(sub_df)

In [22]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	n_vars = 1 if type(data) is list else data.shape[1]
	df = pd.DataFrame(data)
	cols, names = list(), list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
		names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(df.shift(-i))
		if i == 0:
			names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
		else:
			names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
	# put it all together
	agg = pd.concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg

In [23]:
def preProcess(df):
    df.drop('formal_date', axis=1, inplace=True)
    df_value = df.values
    df_value.astype('float32')
    scaled_df = minmax_scaler.transform(df_value)
    reframed_df = series_to_supervised(scaled_df, 1, 1)
    reframed_df = reframed_df.values
    test_X, test_y = reframed_df[:, :-1], reframed_df[:, -1]
    test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
    return test_X



In [24]:
pre_processed_data = []
for t_df in dist_df_list:
    pre_processed_data.append(preProcess(t_df))

Predicting

In [25]:
from tensorflow.keras.models import load_model
model = load_model('../../../data/models/LSTM_1.h5')

In [26]:
yhat_list = []
for dat in pre_processed_data:
    yhat = model.predict(dat)
    yhat_list.append(yhat)

In [27]:
def inversePreProcess(yhat, test_x):
    test_X = test_x.reshape((test_x.shape[0], test_x.shape[2]))
    inv_yhat = np.concatenate((test_X[:, :14], yhat), axis=1)
    inv_yhat = minmax_scaler.inverse_transform(inv_yhat)
    inv_yhat = inv_yhat[:,-1]
    return inv_yhat


In [28]:
res_list = []
for i in range(0,len(yhat_list)):
    res_list.append(inversePreProcess(yhat_list[i], pre_processed_data[i]))


In [29]:
dist_result = [j[-1] for j in res_list]
dist_result

[39.372662865209705,
 35.70161458505167,
 34.26633295205763,
 20.34711299431779,
 16.386532321212318,
 11.357818235309963,
 13.049751060968497,
 7.136516415386911,
 6.216467538646918,
 4.044180359650236,
 4.123657788966169,
 2.123007753749371,
 2.055596353434402,
 2.2039295712449314,
 2.0650322157283947,
 1.196969569477281,
 1.0987561372120342,
 1.0993579857988673,
 1.010162112293614,
 0.12572280701648184,
 0.06246096565158948,
 0.05117872608715787,
 0.05822260043749344,
 -0.0047718535598629585,
 0.036300468457582905]

In [30]:
dist_map = {1:'Colombo', 2:'Gampaha', 3:'Kalutara',
4:'Galle',5:'Matara',6:'Hambantota',7:'Ratnapura', 8:'Kegalle', 9:'Kandy',
10:'NuwaraEliya', 11:'Matale', 12:'Badulla', 13:'Moneragala', 14:'Batticaloa',
15:'Ampara', 16:'Trincomalee', 17:'Kurunegala', 18:'Puttalam', 19:'Anuradhapura',
20:'Polonnaruwa',21:'Jaffna',22:'Vavuniya',23:'Kilinochchi',
24:'Mannar',25:'Mulativu'}


In [31]:
dist_count_df = pd.DataFrame()
dist_count_df['District'] = dist_map.values()
dist_count_df['Infected Count'] = dist_result
dist_count_df

Unnamed: 0,District,Infected Count
0,Colombo,39.372663
1,Gampaha,35.701615
2,Kalutara,34.266333
3,Galle,20.347113
4,Matara,16.386532


Province wise prediction

In [32]:
mapping={1:[1,2,3], 2:[4,5,6], 3:[7,8], 4:[9,10,11], 5:[12,13], 6:[14,15,16], 7:[17,18], 8:[19,20], 9:[21,22,23,24,25]}
prov = {1:'Western', 2:'Southern', 3:'Sabaragamuwa',4:'Central', 5:'Uva', 6:'Eastern', 7:'NorthWestern', 8:'NorthCentral', 9:'Nothern'}

In [33]:
prov_cnt = []
for i in range(1,10):
    dist_list = mapping[i]
    prov_sum = 0
    for j in dist_list:
        prov_sum+=dist_result[j-1]
    prov_cnt.append(prov_sum)


In [34]:
prov_count_df = pd.DataFrame()
prov_count_df['District'] = prov.values()
prov_count_df['Infected Count'] = prov_cnt
prov_count_df

Unnamed: 0,District,Infected Count
0,Western,109.34061
1,Southern,48.091464
2,Sabaragamuwa,20.186267
3,Central,14.384306
4,Uva,4.178604


Age group wise prediction

In [36]:
age_df_1 = pd.read_csv('../../../data/main_data/Age_district_percentages.csv')
age_df_1.columns
age_df = age_df_1.iloc[:, 1:6]
age_value = age_df.values

pointer = 0

def mul_prediction(row):
    global pointer
    x = row*dist_result[pointer]
    pointer+=1
    return x

age_counts = np.apply_along_axis(mul_prediction, 1, age_value)

Index(['District', '0<10', '10<20', '20<40', '40<60', '60<', 'psum'], dtype='object')

In [67]:
age_count_df = pd.DataFrame(data=age_counts, columns=['0<10', '10<20', '20<40', '40<60', '60<'])
age_count_df['District'] = age_df_1['District']
age_count_df

Unnamed: 0,0<10,10<20,20<40,40<60,60<,District
0,0.005302,0.005314,0.011611,0.009139,0.004934,Colombo
1,0.005595,0.005495,0.011526,0.009025,0.00466,Gampaha
2,0.006039,0.005483,0.010835,0.008792,0.005153,Kalutara
3,0.006264,0.005901,0.01022,0.009006,0.00491,Kandy
4,0.006501,0.005734,0.010433,0.009201,0.004431,Matale


profession wise prediction

In [70]:
emp_df_1 = pd.read_csv('../../../data/main_data/emp_dist.csv')
emp_df_1.columns
emp_df = emp_df_1.iloc[:, 1:]
emp_value = emp_df.values

Index(['District', 'employed_Total', 'unemployed_Total',
       'unemployed_housework', 'unemployed_studies', 'unemployed_rest',
       'employeed_service', 'employeed_agriculture', 'employeed_Industries'],
      dtype='object')

In [71]:
emp_count_df = pd.DataFrame(data=emp_value, columns=['employed_Total', 'unemployed_Total','unemployed_housework', 'unemployed_studies', 'unemployed_rest','employeed_service', 'employeed_agriculture', 'employeed_Industries'])
emp_count_df['District'] = emp_df_1['District']
emp_count_df

Unnamed: 0,employed_Total,unemployed_Total,unemployed_housework,unemployed_studies,unemployed_rest,employeed_service,employeed_agriculture,employeed_Industries,District
0,52.6,47.4,22.3728,10.0014,15.0258,24.7746,13.3078,14.5176,Colombo
1,50.7,49.3,23.2696,10.4023,15.6281,23.8797,12.8271,13.9932,Gampaha
2,51.1,48.9,23.0808,10.3179,15.5013,24.0681,12.9283,14.1036,Kalutara
3,50.1,49.9,23.5528,10.5289,15.8183,23.5971,12.6753,13.8276,Kandy
4,54.9,45.1,21.2872,9.5161,14.2967,25.8579,13.8897,15.1524,Matale
