In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
#importing dataset

In [None]:
df = pd.read_csv('../input/2017.csv')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
#data is already cleaned and in proper formats, so we don't need to perform data cleaning and refining, anyway its a small dataset

In [None]:
#our main aim is to get high impact parameters affecting happiness score

In [None]:
#visualizing countries geographically based on their happiness score

In [None]:
import plotly.plotly as py 
import plotly.graph_objs as go

In [None]:
from plotly.offline import download_plotlyjs, init_notebook_mode,plot,iplot

In [None]:
init_notebook_mode(connected=True)

In [None]:
data = dict(type='choropleth',
           locations=df['Country'],
           locationmode="country names",
           z=df['Happiness.Score'],
           text=df['Country'],
           colorbar={'title':'Happiness Score out of 10 for 2017'})

In [None]:
layout= dict(title= "Happiness Score for 2017",
            geo=dict(showframe= False,
                    projection = {'type':'Mercator'}))

In [None]:
choromap3=go.Figure(data=[data], layout=layout)

In [None]:
iplot(choromap3)

In [None]:
#from the visualization, we see that happier people are found in North America, South America, North Western Europe, Russia and Australia

In [None]:
#the plot is interactive, feel free to play around with it

In [None]:
#data is skewed because we couldnt statewise happiness score which could have helped us more

In [None]:
#Lets try to develop a correlation between happiness score and other attributes

In [None]:
df.head()

In [None]:
#dependent attributes comprises of - Health Life Expectancy, Freedom

In [None]:
#corr matrix
plt.figure(figsize=(12,8))
sns.heatmap(df.corr(),annot=True)

In [None]:
#We will not consider Happiness rank here as its obviously directly linked with Happiness Score. 

In [None]:
#Important correlated fields with Happiness Score can be find out by looking at Happiness Score row in heatmap

In [None]:
#it shows Whisker(confidence of estomates) decides happiness score mostly

In [None]:
#also, other factors except generosity affects happiness to an high extent

In [None]:
#we will get rid of generosity and happiness rank as they are not useful here now

In [None]:
df_new= df.drop(['Happiness.Rank','Generosity'],axis=1)

In [None]:
df_new.head()

In [None]:
#checking linearity between whisker high, low and Happiness Score as correlation shows they are highly linked

In [None]:
sns.lmplot(x='Whisker.high', y='Happiness.Score', data=df_new)

In [None]:
#repeating it for whisker low score too

In [None]:
sns.lmplot(x='Whisker.low', y='Happiness.Score', data=df_new)

In [None]:
#The plot shows that whisker high and low are directly link with happiness score and hence won't be useful in analysis, we will remove them too.

In [None]:
df_new.drop(['Whisker.low','Whisker.high'],axis=1, inplace=True)

In [None]:
df_new.head()

In [None]:
sns.jointplot(x='Economy..GDP.per.Capita.', y='Happiness.Score', data=df_new , kind='scatter')  #for bi variate data for two variables

In [None]:
#jointplot shows Happiness Score is highly linked with Per Capita GDP

In [None]:
sns.pairplot(df_new)

In [None]:
#pairplot helped me understand that Countries with higher freedom still have low Happiness score for many countries

In [None]:
#Inversely , countries with low trust and more Govt Corruption have High Happiness Score.

In [None]:
#This shows countries doesn't count govt corruption and freedom as their main source of happiness, we need to think more than that

In [None]:
#From this viz, i noticed even GDP is highly linked with better family rating and better life expectancy and health which is true.

In [None]:
sns.jointplot(x='Happiness.Score', y='Dystopia.Residual', data=df_new , kind='reg') 

In [None]:
#its seen here that with increasing happiness score, there is slight increase in Dystopia.Residual where people are slightly dishappy but not able to figure out the reasons

In [None]:
#this shows with increasing happiness score, countries couldn't explain the reason of being unhappy as they have all the other factors fulfilled

In [None]:
#take top 5 countris

In [None]:
df_top=df_new.head()
df_top=df_top.drop(['Country'], axis=1)
df_top

In [None]:
#taking bottom 5 countries
df_bottom=df_new.tail()
df_bottom=df_bottom.drop(['Country'], axis=1)
df_bottom.reset_index(inplace=True)
df_bottom

In [None]:
#calculating difference between there values
df_top.subtract(df_bottom, axis='index')

In [None]:
#it Shows dystopia residual is varying between top and lowest "happy" countries

In [None]:
#Economy- GDP, Family and Health Life Expectancy are major sources of happiness for any country

In [None]:
#Pay people better, they will take better care of their health and family and hence be more happy. That's the conclusion :D