In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

paths=[]

import os
for dirname, _, filenames in os.walk('../input/global-hospital-beds-capacity-for-covid19'):
    for filename in filenames:
        paths.append(os.path.join(dirname, filename))

print(paths)

In [None]:
# Loading data
for i in paths:
    df=pd.read_csv(i,usecols=["beds","population"])
    
print(df.head(10))
print("No. of data items:",len(df.axes[0]))

In [None]:
#removing outliers
max_p=df['population'].quantile(0.95)
min_p=df['population'].quantile(0.05)

max_b=df['beds'].quantile(0.95)
min_b=df['beds'].quantile(0.05)

df=df[(df.population<max_p) & (df.population>min_p) & (df.beds<max_b) & (df.beds>min_b)]
print("No. of data items:",len(df.axes[0]))

In [None]:
#index change
df.index = range(len(df.axes[0]))

l=len(df.axes[0])

# total beds calculation 
for i in range(l):
    df.beds[i]=(df.beds[i]*df.population[i])/1000

print(df)

In [None]:
# remove decimals
import math
l=len(df.axes[0])
for i in range(l):
    df.beds[i]=math.trunc(df.beds[i])
    
df=df.astype(float)
print(df.dtypes)
print(df)

In [None]:
# normalization
def normalize(column):
    min=0
    max=0
    
    for i in range(len(column)):
        
        if column[i]>max:
            max=column[i]
        
        elif column[i]<min:
            min=column[i]
                       
    for i in range(len(column)):
        column[i]=(column[i]-min)/(max-min)
                
normalize(df.beds)
normalize(df.population)

print(df)

In [None]:
# Train-Test Split
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics

X_train, X_test, y_train, y_test = train_test_split(df.population, df.beds, test_size=0.2)
X_train=np.array(X_train).reshape((-1,1))
y_train=np.array(y_train).reshape((-1,1))
y_test=np.array(y_test).reshape((-1,1))
X_test=np.array(X_test).reshape((-1,1))

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
# Linear Regression
model = LinearRegression().fit(X_train, y_train)

# prediction
from sklearn import metrics
y_pred=model.predict(X_test)
print(y_pred)


for i in range(len(y_pred)):
    print(y_pred[i]," ",y_test[i])

# Evaluation Metrics
print(metrics.mean_absolute_error(y_test,y_pred))
print(metrics.mean_squared_error(y_test,y_pred))
print(np.sqrt(metrics.mean_squared_error(y_test,y_pred)))

In [None]:
# Plotting actual values vs predicted values - Linear Regression
import matplotlib.pyplot as plt
plt.scatter(X_test,y_test, color="red")
plt.plot(X_test,y_pred, color="blue")
plt.xlabel("POPULATION")
plt.ylabel("BEDS")

In [None]:
# Random Forest 
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(n_estimators = 128)
rf.fit(X_train, y_train)

yr_pred=rf.predict(X_test)

#Evaluation metrics
print(metrics.mean_absolute_error(y_test,yr_pred))
print(metrics.mean_squared_error(y_test,yr_pred))
print(np.sqrt(metrics.mean_squared_error(y_test,yr_pred)))

# Prediction
print(yr_pred)

In [None]:
# Plotting actual values vs predicted values - Random Forest 
import matplotlib.pyplot as plt
plt.scatter(X_test,y_test, color="red")
plt.scatter(X_test,yr_pred, color="blue")
plt.xlabel("POPULATION")
plt.ylabel("BEDS")

In [None]:
# Lasso Regression
from sklearn.linear_model import Lasso
from sklearn.metrics import r2_score
# load the dataset
modelL = Lasso(alpha=1.0)
# fit model
modelL.fit(X_train, y_train)
y_predL = model.predict(X_test)

# Preediction
print(y_predL)

# Evaluation Metrics
print(metrics.mean_absolute_error(y_test,y_predL))
print(metrics.mean_squared_error(y_test,y_predL))
print(np.sqrt(metrics.mean_squared_error(y_test,y_predL)))

In [None]:
# Plotting actual values vs predicted values - Lasso Regression
import matplotlib.pyplot as plt
plt.scatter(X_test,y_test, color="red")
plt.plot(X_test,y_predL, color="blue")
plt.xlabel("POPULATION")
plt.ylabel("BEDS")

In [None]:
listL=[]
for i in range(len(y_test)):
    listL.append((y_test[i]-y_predL)**2)

listr=[]
for i in range(len(y_test)):
    listr.append((y_test[i]-yr_pred)**2)
    
list=[]
for i in range(len(y_test)):
    list.append((y_test[i]-y_pred)**2)  

In [None]:
# Comparison of  Lasso Regression, Random Forest and Linear Regression Evaluation metrics 
print("R2_score")
print("Lasso Regression ",r2_score(y_test,y_predL))
print("Random Forest    ",r2_score(y_test,yr_pred))
print("Linear Regression",r2_score(y_test,y_pred))

print("MSE")
print("Lasso Regression ",metrics.mean_squared_error(y_test,y_predL))
print("Random Forest    ",metrics.mean_squared_error(y_test,yr_pred))
print("Linear Regression",metrics.mean_squared_error(y_test,y_pred))

print("MAE")
print("Lasso Regression ",metrics.mean_absolute_error(y_test,y_predL))
print("Random Forest    ",metrics.mean_absolute_error(y_test,yr_pred))
print("Linear Regression",metrics.mean_absolute_error(y_test,y_pred))

print("RMSE")
print("Lasso Regression ",np.sqrt(metrics.mean_squared_error(y_test,y_predL)))
print("Random Forest    ",np.sqrt(metrics.mean_squared_error(y_test,yr_pred)))
print("Linear Regression",np.sqrt(metrics.mean_squared_error(y_test,y_pred)))

print("VARIANCE")
print("Lasso Regression ",np.var(listL))
print("Random Forest    ",np.var(listr))
print("Linear Regression",np.var(list))