In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%notebook inline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)


# Reading file and checking head and shape

In [None]:
df = pd.read_csv("/kaggle/input/world-happiness/2015.csv")
print(df.shape)
df.head()

# Checking for missing values

In [None]:
df.isna().sum()

# Finding number of different strings (countries and region)

In [None]:
for col in df.select_dtypes(object):
    print(col,df[col].nunique())

In [None]:
df["Region"].value_counts()

# Checking correlation between variables

In [None]:
sns.set_style("darkgrid")
plt.figure(figsize=(9,6))
corr = df.corr()
sns.heatmap(corr, annot=True)

# Dropping cols

In [None]:
df1 = df.copy()
df1.drop(["Happiness Rank", "Country"], axis=1, inplace=True)

# Generating dummies

In [None]:
dummies = pd.get_dummies(df1)
dummies.head()

# Scaling and splitting data

In [None]:
sc = StandardScaler()

X = dummies.drop("Happiness Score", axis=1)
y = dummies["Happiness Score"]

X = pd.DataFrame(sc.fit_transform(X), columns=X.columns)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=101)

# Initializing models

In [None]:
models = {
    "                     Linear Regression": LinearRegression(),
    " Linear Regression (L2 Regularization)": Ridge(),
    " Linear Regression (L1 Regularization)": Lasso(),
    "                   SVM (Linear Kernel)": SVR(kernel="linear"),
}

# Training models

In [None]:
for name, model in models.items():
    model.fit(X_train, y_train)
    print(name + " trained.")

# Testing

In [None]:
for name, model in models.items():
    print(name)
    print("--------------------"*3)
    print("Testing Accuracy: {:.5f}".format(model.score(X_test, y_test)))
    print("Training Accuracy: {:.5f}".format(model.score(X_train, y_train)))
    print("--------------------"*3)
    print('\n')
                     

# Cross validation

In [None]:
for name, model in models.items():
    print(name,"\n")
    print(cross_val_score(model, X_test, y_test, cv=10))
    print("-----------------------------------------------------")

Linear Regression gives us the best results

In [None]:
lr = LinearRegression()
lr.fit(X_train, y_train)
plt.figure(figsize=(8,5))
sns.set_style('darkgrid')
sns.scatterplot(y_test, y_test, color="black", size=dummies["Happiness Score"], sizes=(80,180), label= "actual")
sns.scatterplot(lr.predict(X_test) ,y_test, color= "yellow", label="predicted")
plt.ylabel("Actual Happiness Score")
plt.xlabel("predicted Happiness Score")
plt.legend()

In [None]:
plt.figure(figsize=(10,8))
sns.scatterplot(dummies["Happiness Score"], dummies["Dystopia Residual"], label = "Dystopian Residual",
                color="black", size=dummies["Happiness Score"])
sns.scatterplot(dummies["Happiness Score"], dummies["Generosity"], label = "Generosity",size=dummies["Happiness Score"])
plt.ylabel("Generosity and Dystopian Residual")

In [None]:
sns.scatterplot(dummies["Happiness Score"], dummies["Generosity"])