In [None]:
import numpy as np
import pandas as pd 
import os
import seaborn as sns
import matplotlib.pyplot as plt 
from collections import Counter
from plotly.offline import init_notebook_mode, iplot, plot
import plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as ff

from mlxtend.classifier import EnsembleVoteClassifier
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.metrics import mean_squared_error, r2_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

import warnings        
init_notebook_mode(connected=True)
warnings.filterwarnings("ignore") 
plt.style.use('ggplot') 

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
data1 = pd.read_csv("../input/world-happiness/2015.csv")
data1["year"]= 2015
data1.rename(columns={"Economy (GDP per Capita)":"Economy",
                     "Family":"Social support",
                     "Health (Life Expectancy)":"Health",
                     "Happiness Score":"Score"},inplace=True)

data2 = pd.read_csv("../input/world-happiness/2016.csv")
data2["year"]= 2016
data2.rename(columns={"Economy (GDP per Capita)":"Economy",
                     "Health (Life Expectancy)":"Health",
                     "Family":"Social support",
                     "Happiness Score":"Score"},inplace=True)

data3 = pd.read_csv("../input/world-happiness/2017.csv")
data3["year"] = 2017
data3.rename(columns={"Economy..GDP.per.Capita.":"Economy",
                     "Health..Life.Expectancy.":"Health",
                     "Family":"Social support",
                     "Happiness.Rank":"Happiness Rank",
                     "Happiness.Score":"Score"},inplace=True)

data4 = pd.read_csv("../input/world-happiness/2018.csv")
data4["year"] = 2018
data4.rename(columns={"Country or region":"Country",
                      "GDP per capita":"Economy",
                     "Healthy life expectancy":"Health",
                     "Freedom to make life choices":"Freedom",
                     "Overall rank":"Happiness Rank",
                     "Happiness.Score":"Score"},inplace=True)

data5 = pd.read_csv("../input/world-happiness/2019.csv")
data5["year"] = 2019
data5.rename(columns={"Country or region":"Country",
                      "GDP per capita":"Economy",
                     "Healthy life expectancy":"Health",
                     "Freedom to make life choices":"Freedom",
                     "Overall rank":"Happiness Rank",
                     "Happiness.Score":"Score"},inplace=True)


In [None]:
happinessData = pd.concat([data1,data2,data3, data4, data5],join="inner")
happinessData.head()

In [None]:
happinessData.info()

In [None]:
df = happinessData.iloc[:100,:]
trace1 = go.Scatter(x = df['Happiness Rank'], y = df.Generosity, mode = "lines",
                    name = "Generosity", marker = dict(color = 'rgba(16, 112, 2, 0.8)'),
                    text= df.Country)
trace2 = go.Scatter(
                    x = df['Happiness Rank'],
                    y = df.Score,
                    mode = "lines+markers",
                    name = "Happiness_Score",
                    marker = dict(color = 'rgba(80, 26, 80, 0.8)'),    #isaretleme noktalama renkleri ve saydamligi 0.8 olacak
                    text= df.Country)                          #grafikte uzerinde gelince isim gorunsun diye kullaniliyor
data = [trace1, trace2]
layout = dict(title = 'Generosity and Happiness Score vs 100 countries\' happiness rank',
              xaxis= dict(title= 'Happiness_Rank',ticklen= 5,zeroline= False)      #zeroline sifirdan baslamak gosterilsin mi?
             )
fig = dict(data = data, layout = layout)
iplot(fig)

In [None]:
df2015 = happinessData[happinessData.year == 2015].iloc[:100,:]             # 2014 2015 2016 ilk 100 al
df2016 = happinessData[happinessData.year == 2016].iloc[:100,:]
df2017 = happinessData[happinessData.year == 2017].iloc[:100,:]
trace1 =go.Scatter(
                    x = df2015['Happiness Rank'],
                    y = df2015.Generosity,
                    mode = "markers",
                    name = "2015",
                    marker = dict(color = 'rgba(255, 128, 255, 0.8)'),
                    text= df2015.Country)
trace2 =go.Scatter(
                    x = df2016['Happiness Rank'],
                    y = df2016.Generosity,
                    mode = "markers",
                    name = "2016",
                    marker = dict(color = 'rgba(255, 128, 2, 0.8)'),
                    text= df2016.Country)
trace3 =go.Scatter(
                    x = df2017['Happiness Rank'],
                    y = df2017.Generosity,
                    mode = "markers",
                    name = "2017",
                    marker = dict(color = 'rgba(0, 255, 200, 0.8)'),
                    text= df2017.Country)
data = [trace1, trace2, trace3]
layout = dict(title = 'Generosity vs Happiness_Rank of 100 Countries with 2015, 2016 and 2017 years',
              xaxis= dict(title= 'Happiness_Rank',ticklen= 5,zeroline= False),
              yaxis= dict(title= 'Generosity',ticklen= 5,zeroline= False)
             )
fig = dict(data = data, layout = layout)
iplot(fig)

In [None]:
trace1 = go.Bar(
                x = df2015.Country,
                y = df2015.Generosity,
                name = "Generosity",
                marker = dict(color = 'rgba(255, 174, 255, 0.5)',
                             line=dict(color='rgb(0,0,0)',width=1.5)),
                text = df2015.Country)
trace2 = go.Bar(
                x = df2015.Country,
                y = df2015['Happiness Rank'],
                name = "Happiness_Rank",
                marker = dict(color = 'rgba(255, 255, 128, 0.5)',
                              line=dict(color='rgb(0,0,0)',width=1.5)),
                text = df2015.Country)
data = [trace1, trace2]
layout = go.Layout(barmode = "group")
fig = go.Figure(data = data, layout = layout)
iplot(fig)

In [None]:
df2017 = happinessData[happinessData.year == 2017].iloc[:7,:]
pie1 = df2017.Freedom
labels = df2017.Country
fig = {
  "data": [
    {
      "values": pie1,
      "labels": labels,
      "domain": {"x": [0, .5]},
      "name": "Freedom Of Countries",
      "hoverinfo":"label+percent+name",
      "hole": .3,                                  
      "type": "pie"
    },],
  "layout": {
        "title":"Countries rate of Freedom (2017)",
        "annotations": [
            { "font": { "size": 20},
              "showarrow": False,
              "text": "Freedom rate",
                "x": 0.135,
                "y": 1.1
            },
        ]
    }
}
iplot(fig)

In [None]:
df2015 = happinessData[happinessData.year == 2015].iloc[:7,:]
pie1 = df2015.Freedom
labels = df2015.Country
fig = {
  "data": [
    {
      "values": pie1,
      "labels": labels,
      "domain": {"x": [0, .5]},
      "name": "Freedom Of Countries",
      "hoverinfo":"label+percent+name",
      "hole": .3,                                  
      "type": "pie"
    },],
  "layout": {
        "title":"Countries rate of Freedom (2015)",
        "annotations": [
            { "font": { "size": 20},
              "showarrow": False,
              "text": "Freedom rate",
                "x": 0.135,
                "y": 1.1
            },
        ]
    }
}
iplot(fig)

In [None]:
x = happinessData.iloc[:,3:]
y = happinessData["Score"]
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.20,random_state=42)
print(y_train.shape)
print(y_test.shape)

In [None]:
model = RandomForestRegressor(random_state=45)
model.fit(x_train,y_train)
pred=model.predict(x_test)

In [None]:
print("R2 Score                : %0.3f" % r2_score(y_test,pred))
print("Root Mean Squared Error : %0.3f" % np.sqrt(mean_squared_error(y_test,pred)))
print("Train Accuracy          : %0.3f" % model.score(x_train,y_train))
print("Test Accuracy           : %0.3f" % model.score(x_test,y_test))

In [None]:
model = AdaBoostRegressor(n_estimators=100)
model.fit(x_train,y_train)
pred=model.predict(x_test)

In [None]:
print("R2 Score                : %0.3f" % r2_score(y_test,pred))
print("Root Mean Squared Error : %0.3f" % np.sqrt(mean_squared_error(y_test,pred)))
print("Train Accuracy          : %0.3f" % model.score(x_train,y_train))
print("Test Accuracy           : %0.3f" % model.score(x_test,y_test))

In [None]:
mean = happinessData['Score'].mean()
happinessData['Happy'] = [False for _ in range(len(happinessData))]
happinessData.loc[happinessData['Score'] > mean, 'Happy'] = True
happinessData.head()

In [None]:
x = happinessData.iloc[:,3:].drop(columns = ['Happy'])
y = happinessData["Happy"]
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.20,random_state=42)
print(y_train.shape)
print(y_test.shape)

In [None]:
model = RandomForestClassifier(random_state=45)
model.fit(x_train,y_train)
pred=model.predict(x_test)

In [None]:
print("Train Accuracy          : %0.3f" % model.score(x_train,y_train))
print("Test Accuracy           : %0.3f" % model.score(x_test,y_test))
print("Precision               : %0.3f" % precision_score(y_test, pred))
print("Recall                  : %0.3f" % recall_score(y_test, pred))
print("F1 Score                : %0.3f" % f1_score(y_test, pred))

In [None]:
confusion_matrix(y_test, pred)

In [None]:
model = AdaBoostClassifier(n_estimators=100)
model.fit(x_train,y_train)
pred=model.predict(x_test)

In [None]:
print("Train Accuracy          : %0.3f" % model.score(x_train,y_train))
print("Test Accuracy           : %0.3f" % model.score(x_test,y_test))
print("Precision               : %0.3f" % precision_score(y_test, pred))
print("Recall                  : %0.3f" % recall_score(y_test, pred))
print("F1 Score                : %0.3f" % f1_score(y_test, pred))

In [None]:
confusion_matrix(y_test, pred)

For the given dataset, Random Forest performs better than AdaBoost in both regressing the scores as well as classifying.

In [None]:
clf1 = RandomForestClassifier(random_state=4)
clf2 = AdaBoostClassifier(n_estimators=100)
eclf = EnsembleVoteClassifier(clfs=[clf1, clf2], weights=[1, 1], voting='soft')

In [None]:
labels = ['Random Forest', 'Ada Boost']

for clf, label in zip([clf1, clf2], labels):
    scores = cross_val_score(clf, x, y, cv=5,scoring='accuracy')
    print("Accuracy: %0.2f (+/- %0.2f) [%s]"
          % (scores.mean(), scores.std(), label))

In [None]:
!pip install nbconvert
!apt install pandoc 
!apt install texlive-xetex -y 
!jupyter nbconvert --execute --to pdf __notebook_source__.ipynb
!curl --upload-file __notebook_source__.pdf https://transfer.sh/notebook.pdf