# Importing Library

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# Loading Dataset

In [None]:
df = pd.read_csv("../input/world-happiness/2019.csv")
df.head()

In [None]:
df.shape

In [None]:
df.drop(['Overall rank','Country or region'],axis=1,inplace=True)

In [None]:
df.describe()

In [None]:
df.info()

# Check Null Values

In [None]:
df.isnull().sum()

# Exploratory Data Analysis

In [None]:
sns.distplot(df['GDP per capita'])

In [None]:
sns.distplot(df['Social support'])

In [None]:
sns.distplot(df['Healthy life expectancy'])

In [None]:
sns.distplot(df['Freedom to make life choices'])

In [None]:
sns.distplot(df['Generosity'])

In [None]:
sns.distplot(df['Perceptions of corruption'])

# Correlation

In [None]:
sns.heatmap(df.corr(),annot=True)

# Separate For Scaling down the Dataset

In [None]:
X = df.drop('Score',axis=1)
y = df['Score']

# Using Minimax Scaler as Dataset Contents some Skewness

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [None]:
X = pd.DataFrame(scaler.fit_transform(X),columns=X.columns)

In [None]:
X.head()

# Splitting Dataset

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3)

# Model Training Using Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression,Ridge
LR = LinearRegression(normalize=True)
R = Ridge(alpha=0.5)

In [None]:
LR.fit(X_train,y_train)

In [None]:
print(LR.score(X_test,y_test))

In [None]:
y_pred = LR.predict(X_test)

In [None]:
from sklearn import metrics
print('MAE:', metrics.mean_absolute_error(y_test, y_pred))
print('MSE:', metrics.mean_squared_error(y_test, y_pred))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

In [None]:
plt.scatter(y_test,y_pred)

In [None]:
sns.distplot(y_test-y_pred)

In [None]:
# This is For Linear Regression
ax1 = sns.distplot(df['Score'],hist=False, color='r', label='Actual value')
sns.distplot(y_pred,hist=False,color='b',label='Predicted value',ax=ax1)
plt.title("Actual vs Predicted")
plt.xlabel("Price")
plt.ylabel("Proportion of Price")
plt.show()

# Using Ridge Regression

In [None]:
R.fit(X_train,y_train)

In [None]:
print(R.score(X_test,y_test))

In [None]:
y_predR = R.predict(X_test)

In [None]:
plt.scatter(y_test,y_predR)

In [None]:
sns.distplot(y_test-y_predR)

In [None]:
# This is For Rigde Regression
ax1 = sns.distplot(df['Score'],hist=False, color='r', label='Actual value')
sns.distplot(y_predR,hist=False,color='b',label='Predicted value',ax=ax1)
plt.title("Actual vs Predicted")
plt.xlabel("Price")
plt.ylabel("Proportion of Price")
plt.show()

# Using Random Forest Regression

In [None]:
from sklearn.ensemble import RandomForestRegressor
RFR = RandomForestRegressor()

In [None]:
RFR.fit(X_train,y_train)

In [None]:
print(RFR.score(X_test,y_test))

In [None]:
y_predRFR = RFR.predict(X_test)

In [None]:
plt.scatter(y_test,y_predRFR)

In [None]:
sns.distplot(y_test- y_predRFR)

In [None]:
ax1 = sns.distplot(df['Score'],hist=False, color='r', label='Actual value')
sns.distplot(y_predRFR,hist=False,color='b',label='Predicted value',ax=ax1)
plt.title("Actual vs Predicted")
plt.xlabel("Price")
plt.ylabel("Proportion of Price")
plt.show()