## About datatset
#### The Better Life Index 2024 dataset provides comprehensive indicators across various dimensions of well-being for multiple countries. It encompasses factors such as economic prosperity, housing quality, education, health, safety, and overall life satisfaction.

In [36]:
# Library 

import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns 
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler


In [37]:
df = pd.read_csv(r"C:\Users\Maftuna\Desktop\Machine Learning\1 Bo'lim\5 Homework\better-life-index-2024 (1).csv")
df.head()

Unnamed: 0,Country,GDP per capita (USD),Dwellings without basic facilities,Housing expenditure,Rooms per person,Household net adjusted disposable income,Household net wealth,Labour market insecurity,Employment rate,Long-term unemployment rate,Personal earnings,Quality of support network,Educational attainment,Student skills,Years in education,Air pollution,Water quality,Stakeholder engagement for developing regulations,Voter turnout,Life expectancy,Self-reported health,Feeling safe walking alone at night,Homicide rate,Employees working very long hours,Time devoted to leisure and personal care,Life satisfaction
0,Australia,66589,,19.4,,37433.0,528768.0,3.1,73,1.0,55206.0,93,84.0,499.0,20.0,6.7,92,2.7,92,83.0,85,67,0.9,12.5,14.36,7.1
1,Austria,59225,0.8,20.8,1.6,37001.0,309637.0,2.3,72,1.3,53132.0,92,86.0,491.0,17.0,12.2,92,1.3,76,82.0,71,86,0.5,5.3,14.51,7.2
2,Belgium,55536,0.7,20.0,2.1,34884.0,447607.0,2.4,65,2.3,54327.0,90,80.0,500.0,19.0,12.8,79,2.0,88,82.1,74,56,1.1,4.3,15.52,6.8
3,Canada,54866,0.2,22.9,2.6,34421.0,478240.0,3.8,70,0.5,55342.0,93,92.0,517.0,17.0,7.1,90,2.9,68,82.1,89,78,1.2,3.3,14.57,7.0
4,Chile,16616,9.4,18.4,1.9,,135787.0,7.0,56,,26729.0,88,67.0,438.0,17.0,23.4,62,1.3,47,80.6,60,41,2.4,7.7,,6.2


In [38]:
# Checking the data infprmation
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38 entries, 0 to 37
Data columns (total 26 columns):
 #   Column                                               Non-Null Count  Dtype  
---  ------                                               --------------  -----  
 0   Country                                              38 non-null     object 
 1   GDP per capita (USD)                                 38 non-null     int64  
 2     Dwellings without basic facilities                 35 non-null     float64
 3     Housing expenditure                                35 non-null     float64
 4     Rooms per person                                   37 non-null     float64
 5     Household net adjusted disposable income           33 non-null     float64
 6     Household net wealth                               29 non-null     float64
 7     Labour market insecurity                           34 non-null     float64
 8     Employment rate                                    38 non-null     int

##  Missing Values

In [39]:
# Checking the missing values
df.isnull().sum()

Country                                                 0
GDP per capita (USD)                                    0
  Dwellings without basic facilities                    3
  Housing expenditure                                   3
  Rooms per person                                      1
  Household net adjusted disposable income              5
  Household net wealth                                  9
  Labour market insecurity                              4
  Employment rate                                       0
  Long-term unemployment rate                           1
  Personal earnings                                     3
  Quality of support network                            0
  Educational attainment                                1
  Student skills                                        1
  Years in education                                    1
  Air pollution                                         0
  Water quality                                         0
  Stakeholder 

In [40]:
# only missing columns

df.columns[df.isna().any()]

Index(['  Dwellings without basic facilities', '  Housing expenditure',
       '  Rooms per person', '  Household net adjusted disposable income',
       '  Household net wealth', '  Labour market insecurity',
       '  Long-term unemployment rate', '  Personal earnings',
       '  Educational attainment', '  Student skills', '  Years in education',
       '  Employees working very long hours',
       '  Time devoted to leisure and personal care'],
      dtype='object')

In [41]:
# Filling the missing values with the mean of the columns
nomerical_columns = df.select_dtypes(include=[np.number]).columns.tolist()
df[nomerical_columns] = df[nomerical_columns].fillna(df[nomerical_columns].mean())

In [42]:
df.isnull().sum()

Country                                                0
GDP per capita (USD)                                   0
  Dwellings without basic facilities                   0
  Housing expenditure                                  0
  Rooms per person                                     0
  Household net adjusted disposable income             0
  Household net wealth                                 0
  Labour market insecurity                             0
  Employment rate                                      0
  Long-term unemployment rate                          0
  Personal earnings                                    0
  Quality of support network                           0
  Educational attainment                               0
  Student skills                                       0
  Years in education                                   0
  Air pollution                                        0
  Water quality                                        0
  Stakeholder engagement for de

In [43]:
df.head()

Unnamed: 0,Country,GDP per capita (USD),Dwellings without basic facilities,Housing expenditure,Rooms per person,Household net adjusted disposable income,Household net wealth,Labour market insecurity,Employment rate,Long-term unemployment rate,Personal earnings,Quality of support network,Educational attainment,Student skills,Years in education,Air pollution,Water quality,Stakeholder engagement for developing regulations,Voter turnout,Life expectancy,Self-reported health,Feeling safe walking alone at night,Homicide rate,Employees working very long hours,Time devoted to leisure and personal care,Life satisfaction
0,Australia,66589,3.051429,19.4,1.675676,37433.0,528768.0,3.1,73,1.0,55206.0,93,84.0,499.0,20.0,6.7,92,2.7,92,83.0,85,67,0.9,12.5,14.36,7.1
1,Austria,59225,0.8,20.8,1.6,37001.0,309637.0,2.3,72,1.3,53132.0,92,86.0,491.0,17.0,12.2,92,1.3,76,82.0,71,86,0.5,5.3,14.51,7.2
2,Belgium,55536,0.7,20.0,2.1,34884.0,447607.0,2.4,65,2.3,54327.0,90,80.0,500.0,19.0,12.8,79,2.0,88,82.1,74,56,1.1,4.3,15.52,6.8
3,Canada,54866,0.2,22.9,2.6,34421.0,478240.0,3.8,70,0.5,55342.0,93,92.0,517.0,17.0,7.1,90,2.9,68,82.1,89,78,1.2,3.3,14.57,7.0
4,Chile,16616,9.4,18.4,1.9,30490.151515,135787.0,7.0,56,1.737838,26729.0,88,67.0,438.0,17.0,23.4,62,1.3,47,80.6,60,41,2.4,7.7,15.069091,6.2


##  Encoding

In [44]:
# Category columns

categorical_column = df['Country']

In [45]:
df = pd.get_dummies(df, columns=['Country'], drop_first=True)

In [46]:
df.head()

Unnamed: 0,GDP per capita (USD),Dwellings without basic facilities,Housing expenditure,Rooms per person,Household net adjusted disposable income,Household net wealth,Labour market insecurity,Employment rate,Long-term unemployment rate,Personal earnings,Quality of support network,Educational attainment,Student skills,Years in education,Air pollution,Water quality,Stakeholder engagement for developing regulations,Voter turnout,Life expectancy,Self-reported health,Feeling safe walking alone at night,Homicide rate,Employees working very long hours,Time devoted to leisure and personal care,Life satisfaction,Country_Austria,Country_Belgium,Country_Canada,Country_Chile,Country_Colombia,Country_Costa Rica,Country_Czechia,Country_Denmark,Country_Estonia,Country_Finland,Country_France,Country_Germany,Country_Greece,Country_Hungary,Country_Iceland,Country_Ireland,Country_Israel,Country_Italy,Country_Japan,Country_Korea,Country_Latvia,Country_Lithuania,Country_Luxembourg,Country_Mexico,Country_Netherlands,Country_New Zealand,Country_Norway,Country_Poland,Country_Portugal,Country_Slovak Republic,Country_Slovenia,Country_Spain,Country_Sweden,Country_Switzerland,Country_Türkiye,Country_United Kingdom,Country_United States
0,66589,3.051429,19.4,1.675676,37433.0,528768.0,3.1,73,1.0,55206.0,93,84.0,499.0,20.0,6.7,92,2.7,92,83.0,85,67,0.9,12.5,14.36,7.1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,59225,0.8,20.8,1.6,37001.0,309637.0,2.3,72,1.3,53132.0,92,86.0,491.0,17.0,12.2,92,1.3,76,82.0,71,86,0.5,5.3,14.51,7.2,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,55536,0.7,20.0,2.1,34884.0,447607.0,2.4,65,2.3,54327.0,90,80.0,500.0,19.0,12.8,79,2.0,88,82.1,74,56,1.1,4.3,15.52,6.8,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,54866,0.2,22.9,2.6,34421.0,478240.0,3.8,70,0.5,55342.0,93,92.0,517.0,17.0,7.1,90,2.9,68,82.1,89,78,1.2,3.3,14.57,7.0,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,16616,9.4,18.4,1.9,30490.151515,135787.0,7.0,56,1.737838,26729.0,88,67.0,438.0,17.0,23.4,62,1.3,47,80.6,60,41,2.4,7.7,15.069091,6.2,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [47]:
# Convert boolean columns to integers
bool_columns = df.select_dtypes(include=['bool']).columns.tolist()
df[bool_columns] = df[bool_columns].astype(int)

In [48]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38 entries, 0 to 37
Data columns (total 62 columns):
 #   Column                                               Non-Null Count  Dtype  
---  ------                                               --------------  -----  
 0   GDP per capita (USD)                                 38 non-null     int64  
 1     Dwellings without basic facilities                 38 non-null     float64
 2     Housing expenditure                                38 non-null     float64
 3     Rooms per person                                   38 non-null     float64
 4     Household net adjusted disposable income           38 non-null     float64
 5     Household net wealth                               38 non-null     float64
 6     Labour market insecurity                           38 non-null     float64
 7     Employment rate                                    38 non-null     int64  
 8     Long-term unemployment rate                        38 non-null     flo

##

## Scaling

In [49]:
df.head(2)

Unnamed: 0,GDP per capita (USD),Dwellings without basic facilities,Housing expenditure,Rooms per person,Household net adjusted disposable income,Household net wealth,Labour market insecurity,Employment rate,Long-term unemployment rate,Personal earnings,Quality of support network,Educational attainment,Student skills,Years in education,Air pollution,Water quality,Stakeholder engagement for developing regulations,Voter turnout,Life expectancy,Self-reported health,Feeling safe walking alone at night,Homicide rate,Employees working very long hours,Time devoted to leisure and personal care,Life satisfaction,Country_Austria,Country_Belgium,Country_Canada,Country_Chile,Country_Colombia,Country_Costa Rica,Country_Czechia,Country_Denmark,Country_Estonia,Country_Finland,Country_France,Country_Germany,Country_Greece,Country_Hungary,Country_Iceland,Country_Ireland,Country_Israel,Country_Italy,Country_Japan,Country_Korea,Country_Latvia,Country_Lithuania,Country_Luxembourg,Country_Mexico,Country_Netherlands,Country_New Zealand,Country_Norway,Country_Poland,Country_Portugal,Country_Slovak Republic,Country_Slovenia,Country_Spain,Country_Sweden,Country_Switzerland,Country_Türkiye,Country_United Kingdom,Country_United States
0,66589,3.051429,19.4,1.675676,37433.0,528768.0,3.1,73,1.0,55206.0,93,84.0,499.0,20.0,6.7,92,2.7,92,83.0,85,67,0.9,12.5,14.36,7.1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,59225,0.8,20.8,1.6,37001.0,309637.0,2.3,72,1.3,53132.0,92,86.0,491.0,17.0,12.2,92,1.3,76,82.0,71,86,0.5,5.3,14.51,7.2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [60]:

# Standard Scaling (Z-score normalization)
standard_scaler = StandardScaler()
df_scaled = pd.DataFrame(standard_scaler.fit_transform(df), columns=df.columns)
df_scaled.head(2)

Unnamed: 0,GDP per capita (USD),Dwellings without basic facilities,Housing expenditure,Rooms per person,Household net adjusted disposable income,Household net wealth,Labour market insecurity,Employment rate,Long-term unemployment rate,Personal earnings,Quality of support network,Educational attainment,Student skills,Years in education,Air pollution,Water quality,Stakeholder engagement for developing regulations,Voter turnout,Life expectancy,Self-reported health,Feeling safe walking alone at night,Homicide rate,Employees working very long hours,Time devoted to leisure and personal care,Life satisfaction,Country_Austria,Country_Belgium,Country_Canada,Country_Chile,Country_Colombia,Country_Costa Rica,Country_Czechia,Country_Denmark,Country_Estonia,Country_Finland,Country_France,Country_Germany,Country_Greece,Country_Hungary,Country_Iceland,Country_Ireland,Country_Israel,Country_Italy,Country_Japan,Country_Korea,Country_Latvia,Country_Lithuania,Country_Luxembourg,Country_Mexico,Country_Netherlands,Country_New Zealand,Country_Norway,Country_Poland,Country_Portugal,Country_Slovak Republic,Country_Slovenia,Country_Spain,Country_Sweden,Country_Switzerland,Country_Türkiye,Country_United Kingdom,Country_United States
0,0.646065,-8.736284000000001e-17,-0.462269,5.511325e-16,0.924406,1.286025,-0.519628,0.579619,-0.395213,0.865446,0.358892,0.344357,0.439395,1.778507,-1.071781,0.846303,0.960689,1.854189,0.794315,1.266362,-0.534234,-0.297719,0.759313,-1.570543,0.628841,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399
1,0.388197,-0.4429092,0.129146,-0.1878331,0.866887,-0.089934,-0.719259,0.446131,-0.234522,0.708305,0.172072,0.482848,0.159091,-0.414985,-0.165989,0.846303,-1.484701,0.558388,0.400269,0.184974,0.949292,-0.369323,-0.272748,-1.238313,0.781044,6.082763,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399,-0.164399


In [64]:
# Min-Max Scaling
min_max_scaler = MinMaxScaler()
df_min_max_scaled = pd.DataFrame(min_max_scaler.fit_transform(df), columns = df.columns)

In [65]:
df_min_max_scaled.head(2)

Unnamed: 0,GDP per capita (USD),Dwellings without basic facilities,Housing expenditure,Rooms per person,Household net adjusted disposable income,Household net wealth,Labour market insecurity,Employment rate,Long-term unemployment rate,Personal earnings,Quality of support network,Educational attainment,Student skills,Years in education,Air pollution,Water quality,Stakeholder engagement for developing regulations,Voter turnout,Life expectancy,Self-reported health,Feeling safe walking alone at night,Homicide rate,Employees working very long hours,Time devoted to leisure and personal care,Life satisfaction,Country_Austria,Country_Belgium,Country_Canada,Country_Chile,Country_Colombia,Country_Costa Rica,Country_Czechia,Country_Denmark,Country_Estonia,Country_Finland,Country_France,Country_Germany,Country_Greece,Country_Hungary,Country_Iceland,Country_Ireland,Country_Israel,Country_Italy,Country_Japan,Country_Korea,Country_Latvia,Country_Lithuania,Country_Luxembourg,Country_Mexico,Country_Netherlands,Country_New Zealand,Country_Norway,Country_Poland,Country_Portugal,Country_Slovak Republic,Country_Slovenia,Country_Spain,Country_Sweden,Country_Switzerland,Country_Türkiye,Country_United Kingdom,Country_United States
0,0.4777,0.117816,0.370079,0.422297,0.606801,0.521539,0.101449,0.78125,0.092593,0.733155,0.761905,0.807692,0.775,1.0,0.055046,0.833333,0.75,1.0,0.849462,0.927273,0.5,0.026316,0.456929,0.109705,0.733333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.41834,0.030888,0.480315,0.375,0.594415,0.267302,0.062802,0.75,0.12037,0.694142,0.714286,0.846154,0.708333,0.5,0.307339,0.833333,0.05,0.659574,0.741935,0.672727,0.865385,0.011278,0.187266,0.172996,0.766667,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [67]:
# Robust Scaling (handles outliers better)
robust_scaler = RobustScaler()
df_robust_scaled = pd.DataFrame(robust_scaler.fit_transform(df), columns = df.columns)

In [68]:
df_robust_scaled.head(2)

Unnamed: 0,GDP per capita (USD),Dwellings without basic facilities,Housing expenditure,Rooms per person,Household net adjusted disposable income,Household net wealth,Labour market insecurity,Employment rate,Long-term unemployment rate,Personal earnings,Quality of support network,Educational attainment,Student skills,Years in education,Air pollution,Water quality,Stakeholder engagement for developing regulations,Voter turnout,Life expectancy,Self-reported health,Feeling safe walking alone at night,Homicide rate,Employees working very long hours,Time devoted to leisure and personal care,Life satisfaction,Country_Austria,Country_Belgium,Country_Canada,Country_Chile,Country_Colombia,Country_Costa Rica,Country_Czechia,Country_Denmark,Country_Estonia,Country_Finland,Country_France,Country_Germany,Country_Greece,Country_Hungary,Country_Iceland,Country_Ireland,Country_Israel,Country_Italy,Country_Japan,Country_Korea,Country_Latvia,Country_Lithuania,Country_Luxembourg,Country_Mexico,Country_Netherlands,Country_New Zealand,Country_Norway,Country_Poland,Country_Portugal,Country_Slovak Republic,Country_Slovenia,Country_Spain,Country_Sweden,Country_Switzerland,Country_Türkiye,Country_United Kingdom,Country_United States
0,0.711413,0.842184,-0.397922,-0.048649,0.708599,1.773691,-0.457912,0.342857,-0.153846,0.514721,0.0,0.076923,0.156863,2.0,-0.595041,0.651163,0.647059,1.548387,0.282051,1.038462,-0.754717,0.129032,1.118881,-3.904881,0.325581,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.484814,0.052605,0.111169,-0.2,0.664508,-0.059923,-0.763187,0.228571,0.076923,0.421262,-0.166667,0.230769,-0.156863,-1.0,0.011019,0.651163,-1.0,0.516129,0.025641,-0.038462,0.679245,-0.387097,0.111888,-3.078849,0.418605,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
