In [153]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import RobustScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import HuberRegressor
from sklearn.metrics import r2_score, mean_squared_error

gd = pd.read_csv('global-data-on-sustainable-energy (1).csv')
gd.head()

Unnamed: 0,Entity,Year,Access to electricity (% of population),Access to clean fuels for cooking,Renewable-electricity-generating-capacity-per-capita,Financial flows to developing countries (US $),Renewable energy share in the total final energy consumption (%),Electricity from fossil fuels (TWh),Electricity from nuclear (TWh),Electricity from renewables (TWh),...,Primary energy consumption per capita (kWh/person),Energy intensity level of primary energy (MJ/$2017 PPP GDP),Value_co2_emissions_kt_by_country,Renewables (% equivalent primary energy),gdp_growth,gdp_per_capita,Density\n(P/Km2),Land Area(Km2),Latitude,Longitude
0,Afghanistan,2000,1.613591,6.2,9.22,20000.0,44.99,0.16,0.0,0.31,...,302.59482,1.64,760.0,,,,60,652230.0,33.93911,67.709953
1,Afghanistan,2001,4.074574,7.2,8.86,130000.0,45.6,0.09,0.0,0.5,...,236.89185,1.74,730.0,,,,60,652230.0,33.93911,67.709953
2,Afghanistan,2002,9.409158,8.2,8.47,3950000.0,37.83,0.13,0.0,0.56,...,210.86215,1.4,1029.999971,,,179.426579,60,652230.0,33.93911,67.709953
3,Afghanistan,2003,14.738506,9.5,8.09,25970000.0,36.66,0.31,0.0,0.63,...,229.96822,1.4,1220.000029,,8.832278,190.683814,60,652230.0,33.93911,67.709953
4,Afghanistan,2004,20.064968,10.9,7.75,,44.24,0.33,0.0,0.56,...,204.23125,1.2,1029.999971,,1.414118,211.382074,60,652230.0,33.93911,67.709953


## Feature Engineering

In [154]:
gd.columns

Index(['Entity', 'Year', 'Access to electricity (% of population)',
       'Access to clean fuels for cooking',
       'Renewable-electricity-generating-capacity-per-capita',
       'Financial flows to developing countries (US $)',
       'Renewable energy share in the total final energy consumption (%)',
       'Electricity from fossil fuels (TWh)', 'Electricity from nuclear (TWh)',
       'Electricity from renewables (TWh)',
       'Low-carbon electricity (% electricity)',
       'Primary energy consumption per capita (kWh/person)',
       'Energy intensity level of primary energy (MJ/$2017 PPP GDP)',
       'Value_co2_emissions_kt_by_country',
       'Renewables (% equivalent primary energy)', 'gdp_growth',
       'gdp_per_capita', 'Density\n(P/Km2)', 'Land Area(Km2)', 'Latitude',
       'Longitude'],
      dtype='object')

In [155]:
gd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3649 entries, 0 to 3648
Data columns (total 21 columns):
 #   Column                                                            Non-Null Count  Dtype  
---  ------                                                            --------------  -----  
 0   Entity                                                            3649 non-null   object 
 1   Year                                                              3649 non-null   int64  
 2   Access to electricity (% of population)                           3639 non-null   float64
 3   Access to clean fuels for cooking                                 3480 non-null   float64
 4   Renewable-electricity-generating-capacity-per-capita              2718 non-null   float64
 5   Financial flows to developing countries (US $)                    1560 non-null   float64
 6   Renewable energy share in the total final energy consumption (%)  3455 non-null   float64
 7   Electricity from fossil fuels (TW

In [156]:
gd['Density\\n(P/Km2)']

0       60
1       60
2       60
3       60
4       60
        ..
3644    38
3645    38
3646    38
3647    38
3648    38
Name: Density\n(P/Km2), Length: 3649, dtype: object

In [157]:
target_dtype = int
fil_values = gd[gd['Density\\n(P/Km2)'].apply(lambda x: isinstance(x, target_dtype))]['Density\\n(P/Km2)']
fil_values

Series([], Name: Density\n(P/Km2), dtype: object)

In [158]:
# Converting the Density\n(P/Km2) column datatype

# gd['Density\\n(P/Km2)'] = gd['Density\\n(P/Km2)'].astype(float)

gd['Density\\n(P/Km2)'] = gd['Density\\n(P/Km2)'].replace({',': ''}, regex=True).astype(float)

gd['Density\\n(P/Km2)']

0       60.0
1       60.0
2       60.0
3       60.0
4       60.0
        ... 
3644    38.0
3645    38.0
3646    38.0
3647    38.0
3648    38.0
Name: Density\n(P/Km2), Length: 3649, dtype: float64

In [159]:
gd['Entity'].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin',
       'Bermuda', 'Bhutan', 'Bosnia and Herzegovina', 'Botswana',
       'Brazil', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia',
       'Cameroon', 'Canada', 'Cayman Islands', 'Central African Republic',
       'Chad', 'Chile', 'China', 'Colombia', 'Comoros', 'Congo',
       'Costa Rica', 'Croatia', 'Cuba', 'Cyprus', 'Czechia', 'Denmark',
       'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt',
       'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
       'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France',
       'French Guiana', 'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana',
       'Greece', 'Grenada', 'Guatemala', 'Guinea', 'Guinea-Bissau',
       'Guyana', 'Haiti', 'Honduras', 'Hungary', 'Icelan

In [160]:
gd.groupby('Entity').mean()

Unnamed: 0_level_0,Year,Access to electricity (% of population),Access to clean fuels for cooking,Renewable-electricity-generating-capacity-per-capita,Financial flows to developing countries (US $),Renewable energy share in the total final energy consumption (%),Electricity from fossil fuels (TWh),Electricity from nuclear (TWh),Electricity from renewables (TWh),Low-carbon electricity (% electricity),Primary energy consumption per capita (kWh/person),Energy intensity level of primary energy (MJ/$2017 PPP GDP),Value_co2_emissions_kt_by_country,Renewables (% equivalent primary energy),gdp_growth,gdp_per_capita,Density\n(P/Km2),Land Area(Km2),Latitude,Longitude
Entity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Afghanistan,2010.0,52.520488,19.802381,8.581429,3.296947e+07,25.6805,0.189524,0.0,0.740952,78.872636,697.268136,2.039000,4073.000025,,6.163893,439.055765,60.0,652230.0,33.939110,67.709953
Albania,2010.0,99.981429,63.592857,,,,0.048571,0.0,5.184286,98.855430,11353.520929,3.177500,4303.000004,,3.859305,3623.224775,105.0,28748.0,41.153332,20.168331
Algeria,2010.0,99.108146,98.904762,9.390000,5.056250e+05,0.2870,46.895714,0.0,0.365238,0.742125,12925.532952,4.498500,119921.000066,0.185239,2.885714,3787.817377,18.0,2381741.0,28.033886,1.659626
Angola,2010.0,35.138564,44.933333,43.951429,9.525000e+06,57.6970,2.040476,0.0,4.420952,68.980548,3102.008457,2.896500,21689.499863,,4.945895,2982.568601,26.0,1246700.0,-11.202692,17.873887
Antigua and Barbuda,2010.0,98.823761,100.000000,22.455238,9.700000e+06,0.2005,0.271429,0.0,0.002857,0.861557,32065.486190,3.624000,512.999998,,1.515946,13555.401528,223.0,443.0,17.060816,-61.796428
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Uzbekistan,2010.0,99.848840,84.976190,61.972381,5.868556e+07,1.2385,43.476667,0.0,6.438095,12.974661,19346.599286,15.948500,118427.000704,3.192610,6.253772,1438.250604,79.0,447400.0,41.377491,64.585262
Vanuatu,2010.0,41.269784,12.050000,19.339524,5.694000e+06,43.6560,0.046667,0.0,0.008095,11.904762,2471.556643,3.516500,122.999999,,2.441195,2458.898510,25.0,12189.0,-15.376706,166.959158
Yemen,2010.0,60.928522,59.290476,1.672381,1.059562e+07,1.3115,4.907619,0.0,0.091905,2.593720,2998.960281,3.067611,,,,,56.0,527968.0,15.552727,48.516388
Zambia,2010.0,27.753585,14.471429,146.577619,1.881594e+08,86.8955,0.492381,0.0,10.701429,96.479406,3027.719648,9.181500,3559.000016,,5.442444,1144.799404,25.0,752618.0,-13.133897,27.849332


#### Missing Values

In [161]:
gd.isna().sum()

Entity                                                                 0
Year                                                                   0
Access to electricity (% of population)                               10
Access to clean fuels for cooking                                    169
Renewable-electricity-generating-capacity-per-capita                 931
Financial flows to developing countries (US $)                      2089
Renewable energy share in the total final energy consumption (%)     194
Electricity from fossil fuels (TWh)                                   21
Electricity from nuclear (TWh)                                       126
Electricity from renewables (TWh)                                     21
Low-carbon electricity (% electricity)                                42
Primary energy consumption per capita (kWh/person)                     0
Energy intensity level of primary energy (MJ/$2017 PPP GDP)          207
Value_co2_emissions_kt_by_country                  

In [162]:
# Replacing missing values with the mean of each column

column_means = gd.mean()
gd.fillna(column_means, inplace=True)
gd.isna().sum()

  column_means = gd.mean()


Entity                                                              0
Year                                                                0
Access to electricity (% of population)                             0
Access to clean fuels for cooking                                   0
Renewable-electricity-generating-capacity-per-capita                0
Financial flows to developing countries (US $)                      0
Renewable energy share in the total final energy consumption (%)    0
Electricity from fossil fuels (TWh)                                 0
Electricity from nuclear (TWh)                                      0
Electricity from renewables (TWh)                                   0
Low-carbon electricity (% electricity)                              0
Primary energy consumption per capita (kWh/person)                  0
Energy intensity level of primary energy (MJ/$2017 PPP GDP)         0
Value_co2_emissions_kt_by_country                                   0
Renewables (% equiva

#### Outliers

In [163]:
# Checking for outliers in the CO2 emmissions column using z-score

mean_value = gd['Value_co2_emissions_kt_by_country'].mean()
std_dev = gd['Value_co2_emissions_kt_by_country'].std()
z_scores = (gd['Value_co2_emissions_kt_by_country'] - mean_value) / std_dev
outliers = gd[abs(z_scores) > 2]
outliers

# The outliers will be sorted by scaling and/or huber regression

Unnamed: 0,Entity,Year,Access to electricity (% of population),Access to clean fuels for cooking,Renewable-electricity-generating-capacity-per-capita,Financial flows to developing countries (US $),Renewable energy share in the total final energy consumption (%),Electricity from fossil fuels (TWh),Electricity from nuclear (TWh),Electricity from renewables (TWh),...,Primary energy consumption per capita (kWh/person),Energy intensity level of primary energy (MJ/$2017 PPP GDP),Value_co2_emissions_kt_by_country,Renewables (% equivalent primary energy),gdp_growth,gdp_per_capita,Density\n(P/Km2),Land Area(Km2),Latitude,Longitude
714,China,2000,97.0218,42.0,58.78,243620000.0,29.63,1113.3,16.74,225.56,...,9334.936,10.85,3346530.0,5.656925,8.490093,959.372484,153.0,9596960.0,35.86166,104.195397
715,China,2001,97.27279,42.3,61.01,325050000.0,28.36,1182.59,17.47,280.73,...,9797.812,10.36,3529080.0,6.618624,8.335733,1053.108243,153.0,9596960.0,35.86166,104.195397
716,China,2002,97.516205,43.1,63.68,218270000.0,27.0,1337.46,25.13,291.41,...,10602.087,10.12,3810060.0,6.280488,9.133631,1148.50829,153.0,9596960.0,35.86166,104.195397
717,China,2003,97.75438,43.4,70.24,22510000.0,23.86,1579.96,43.34,287.28,...,12273.32,10.48,4415910.0,5.301109,10.03803,1288.643252,153.0,9596960.0,35.86166,104.195397
718,China,2004,97.98967,44.9,77.85,32680000.0,20.17,1795.41,50.47,357.43,...,14272.745,10.83,5124820.0,5.60094,10.113621,1508.668098,153.0,9596960.0,35.86166,104.195397
719,China,2005,98.22443,46.3,86.43,155060000.0,17.44,2042.8,53.09,404.37,...,16114.158,10.72,5824630.0,5.554481,11.394592,1753.417829,153.0,9596960.0,35.86166,104.195397
720,China,2006,98.46105,47.7,96.27,127720000.0,16.39,2364.16,54.84,446.72,...,17552.816,10.41,6437470.0,5.577982,12.720956,2099.229435,153.0,9596960.0,35.86166,104.195397
721,China,2007,98.7022,49.4,109.0,24770000.0,14.88,2718.7,62.13,500.71,...,18955.008,9.81,6993180.0,5.718535,14.230861,2693.970063,153.0,9596960.0,35.86166,104.195397
722,China,2008,98.95058,51.1,128.8,142770000.0,14.14,2762.29,68.39,665.08,...,19560.7,9.18,7199600.0,7.268667,9.650679,3468.304602,153.0,9596960.0,35.86166,104.195397
723,China,2009,99.2089,53.5,150.78,58540000.0,13.43,2980.2,70.05,664.39,...,20281.068,8.95,7719070.0,6.935532,9.398726,3832.236432,153.0,9596960.0,35.86166,104.195397


### Preprocessing

In [164]:
# Encoding the Entity Column

le = LabelEncoder()
gd['Entity'] = le.fit_transform(gd['Entity'])
gd.head()

Unnamed: 0,Entity,Year,Access to electricity (% of population),Access to clean fuels for cooking,Renewable-electricity-generating-capacity-per-capita,Financial flows to developing countries (US $),Renewable energy share in the total final energy consumption (%),Electricity from fossil fuels (TWh),Electricity from nuclear (TWh),Electricity from renewables (TWh),...,Primary energy consumption per capita (kWh/person),Energy intensity level of primary energy (MJ/$2017 PPP GDP),Value_co2_emissions_kt_by_country,Renewables (% equivalent primary energy),gdp_growth,gdp_per_capita,Density\n(P/Km2),Land Area(Km2),Latitude,Longitude
0,0,2000,1.613591,6.2,9.22,20000.0,44.99,0.16,0.0,0.31,...,302.59482,1.64,760.0,11.986707,3.44161,13283.774348,60.0,652230.0,33.93911,67.709953
1,0,2001,4.074574,7.2,8.86,130000.0,45.6,0.09,0.0,0.5,...,236.89185,1.74,730.0,11.986707,3.44161,13283.774348,60.0,652230.0,33.93911,67.709953
2,0,2002,9.409158,8.2,8.47,3950000.0,37.83,0.13,0.0,0.56,...,210.86215,1.4,1029.999971,11.986707,3.44161,179.426579,60.0,652230.0,33.93911,67.709953
3,0,2003,14.738506,9.5,8.09,25970000.0,36.66,0.31,0.0,0.63,...,229.96822,1.4,1220.000029,11.986707,8.832278,190.683814,60.0,652230.0,33.93911,67.709953
4,0,2004,20.064968,10.9,7.75,94224000.0,44.24,0.33,0.0,0.56,...,204.23125,1.2,1029.999971,11.986707,1.414118,211.382074,60.0,652230.0,33.93911,67.709953


In [165]:
gd.corr()

Unnamed: 0,Entity,Year,Access to electricity (% of population),Access to clean fuels for cooking,Renewable-electricity-generating-capacity-per-capita,Financial flows to developing countries (US $),Renewable energy share in the total final energy consumption (%),Electricity from fossil fuels (TWh),Electricity from nuclear (TWh),Electricity from renewables (TWh),...,Primary energy consumption per capita (kWh/person),Energy intensity level of primary energy (MJ/$2017 PPP GDP),Value_co2_emissions_kt_by_country,Renewables (% equivalent primary energy),gdp_growth,gdp_per_capita,Density\n(P/Km2),Land Area(Km2),Latitude,Longitude
Entity,1.0,0.006182,-0.016878,-0.010785,-0.055576,-0.003156,-0.00196,0.026273,0.084212,-0.048606,...,0.063304,0.143391,0.00769,-0.043752,0.003065,-0.006496,0.028586,-0.107311,-0.067975,0.115318
Year,0.006182,1.0,0.124581,0.071555,0.090168,0.084778,-0.027241,0.032521,-0.004736,0.073722,...,0.004196,-0.149249,0.0221,0.06746,-0.18339,0.129667,-0.001683,-0.001628,0.003505,0.000944
Access to electricity (% of population),-0.016878,0.124581,1.0,0.854515,0.24644,0.041712,-0.766297,0.123863,0.127744,0.137903,...,0.454571,-0.245652,0.12198,0.009838,-0.107104,0.403257,0.09979,0.054956,0.420443,-0.102838
Access to clean fuels for cooking,-0.010785,0.071555,0.854515,1.0,0.194777,0.026121,-0.75596,0.100569,0.161107,0.124203,...,0.547108,-0.184132,0.090569,0.023922,-0.121557,0.475801,0.097955,0.074291,0.426953,-0.127654
Renewable-electricity-generating-capacity-per-capita,-0.055576,0.090168,0.24644,0.194777,1.0,0.00304,0.046615,0.040453,0.014609,0.128832,...,0.000242,-0.051317,0.036642,0.094556,-0.011733,0.00403,-0.077792,0.047971,-0.027931,-0.023933
Financial flows to developing countries (US $),-0.003156,0.084778,0.041712,0.026121,0.00304,1.0,-0.003756,0.103063,0.020033,0.096624,...,0.001755,-0.002617,0.10108,-0.011312,0.002249,0.006942,0.004383,0.08844,0.011779,0.020154
Renewable energy share in the total final energy consumption (%),-0.00196,-0.027241,-0.766297,-0.75596,0.046615,-0.003756,1.0,-0.129112,-0.131473,-0.063157,...,-0.420561,0.245761,-0.128787,0.26775,0.067505,-0.346178,-0.156475,-0.044281,-0.32141,0.017843
Electricity from fossil fuels (TWh),0.026273,0.032521,0.123863,0.100569,0.040453,0.103063,-0.129112,1.0,0.645919,0.845067,...,0.105361,0.029734,0.948809,-0.086205,0.020893,0.107579,-0.010303,0.627389,0.110639,0.044613
Electricity from nuclear (TWh),0.084212,-0.004736,0.127744,0.161107,0.014609,0.020033,-0.131473,0.645919,1.0,0.491857,...,0.17898,-0.00728,0.582779,-0.050948,-0.048044,0.230436,-0.028876,0.468429,0.17346,-0.083877
Electricity from renewables (TWh),-0.048606,0.073722,0.137903,0.124203,0.128832,0.096624,-0.063157,0.845067,0.491857,1.0,...,0.119569,0.008651,0.786492,0.161226,-0.0025,0.139354,-0.036013,0.704554,0.112472,-0.016092


#### Selecting Features

In [166]:
columns_to_drop = ['Financial flows to developing countries (US $)', 
                   'Energy intensity level of primary energy (MJ/$2017 PPP GDP)',
                   'gdp_growth', 'gdp_per_capita', 'Land Area(Km2)', 'Latitude', 'Longitude']
gd.drop(columns=columns_to_drop, inplace=True)
gd.columns

Index(['Entity', 'Year', 'Access to electricity (% of population)',
       'Access to clean fuels for cooking',
       'Renewable-electricity-generating-capacity-per-capita',
       'Renewable energy share in the total final energy consumption (%)',
       'Electricity from fossil fuels (TWh)', 'Electricity from nuclear (TWh)',
       'Electricity from renewables (TWh)',
       'Low-carbon electricity (% electricity)',
       'Primary energy consumption per capita (kWh/person)',
       'Value_co2_emissions_kt_by_country',
       'Renewables (% equivalent primary energy)', 'Density\n(P/Km2)'],
      dtype='object')

#### Separating Features and target variable

In [167]:
X = gd.drop(columns='Value_co2_emissions_kt_by_country')
y = gd['Value_co2_emissions_kt_by_country']
X

Unnamed: 0,Entity,Year,Access to electricity (% of population),Access to clean fuels for cooking,Renewable-electricity-generating-capacity-per-capita,Renewable energy share in the total final energy consumption (%),Electricity from fossil fuels (TWh),Electricity from nuclear (TWh),Electricity from renewables (TWh),Low-carbon electricity (% electricity),Primary energy consumption per capita (kWh/person),Renewables (% equivalent primary energy),Density\n(P/Km2)
0,0,2000,1.613591,6.2,9.22,44.99,0.16,0.0,0.31,65.957440,302.59482,11.986707,60.0
1,0,2001,4.074574,7.2,8.86,45.60,0.09,0.0,0.50,84.745766,236.89185,11.986707,60.0
2,0,2002,9.409158,8.2,8.47,37.83,0.13,0.0,0.56,81.159424,210.86215,11.986707,60.0
3,0,2003,14.738506,9.5,8.09,36.66,0.31,0.0,0.63,67.021280,229.96822,11.986707,60.0
4,0,2004,20.064968,10.9,7.75,44.24,0.33,0.0,0.56,62.921350,204.23125,11.986707,60.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3644,175,2016,42.561730,29.8,62.88,81.90,3.50,0.0,3.32,48.680350,3227.68020,11.986707,38.0
3645,175,2017,44.178635,29.8,62.33,82.46,3.05,0.0,4.30,58.503407,3068.01150,11.986707,38.0
3646,175,2018,45.572647,29.9,82.53,80.23,3.73,0.0,5.46,59.412407,3441.98580,11.986707,38.0
3647,175,2019,46.781475,30.1,81.40,81.50,3.66,0.0,4.58,55.582527,3003.65530,11.986707,38.0


In [168]:
y

0          760.000000
1          730.000000
2         1029.999971
3         1220.000029
4         1029.999971
            ...      
3644     11020.000460
3645     10340.000150
3646     12380.000110
3647     11760.000230
3648    159866.462686
Name: Value_co2_emissions_kt_by_country, Length: 3649, dtype: float64

### Feature Splitting

In [169]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

#### Robust Scaling

In [170]:
scl = RobustScaler()
X_train = scl.fit_transform(X_train)
X_test = scl.fit_transform(X_test)

### Model

In [171]:
# gb_regressor = GradientBoostingRegressor()
# gb_regressor.fit(X_train, y_train)
hb_regressor = GradientBoostingRegressor()
hb_regressor.fit(X_train, y_train)

GradientBoostingRegressor()

In [172]:
y_pred = hb_regressor.predict(X_test)
y_pred

array([ 8.90549683e+02,  9.26166428e+04,  5.33395163e+03,  4.39813848e+03,
        2.70319585e+05,  1.34264927e+05,  5.40283694e+04,  1.98804223e+05,
        1.18134609e+04,  1.69084184e+05,  9.29040478e+02,  4.69312265e+04,
        2.13360128e+05,  2.91645834e+04,  1.96081518e+05,  1.33718664e+04,
        9.74254286e+04,  1.50419102e+04, -8.46347518e+03,  5.07629660e+03,
        1.65806124e+04,  2.10385189e+04,  3.10715574e+04,  2.69592662e+04,
        1.57024000e+04,  5.46294350e+04,  7.10680359e+03,  7.88003440e+03,
        1.86533894e+04,  3.91518839e+05,  4.44842705e+03,  2.62241825e+04,
        1.36283410e+04,  3.18959761e+03,  3.00256107e+04,  2.27659597e+04,
        1.83695247e+04,  2.49439573e+05,  2.02901921e+04,  6.30904204e+03,
        2.72089519e+05,  1.01431908e+04,  3.05023723e+05,  3.01179257e+04,
        2.93704605e+05,  2.14753284e+04,  1.14661195e+03,  6.60178273e+04,
       -9.66043869e+03,  5.25658036e+04,  1.56002974e+04,  7.09994093e+04,
        8.59657640e+03,  

#### Model Evaluation

In [173]:
r2score = r2_score(y_test, y_pred)
r2score

0.943543171273237