In [19]:
import pandas as pd
import numpy as np
from sklearn.linear_model import ElasticNetCV, LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns


In [3]:
df = pd.read_csv('../data/model_data.csv', encoding='latin1')

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 139364 entries, 0 to 139363
Columns: 156 entries, female to economy_Zimbabwe
dtypes: bool(138), int64(18)
memory usage: 37.5 MB


## Linear Regression

In [4]:
X = df.drop(columns=['financial_worry'])
y = df['financial_worry']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
linear_regression = LinearRegression()

In [6]:
linear_regression.fit(X_train, y_train)

In [7]:
linear_regression_predictions = linear_regression.predict(X_test)

In [9]:
print('Linear Regression Model Performance:')
print('Intercept:', linear_regression.intercept_)
coeff_df = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': linear_regression.coef_
})
print(coeff_df)
print('Mean Absolute Error:', np.mean(np.abs(y_test - linear_regression_predictions)))
print('R^2:', r2_score(y_test, linear_regression_predictions))
print('RMSE:', np.sqrt(mean_squared_error(y_test, linear_regression_predictions)))

Linear Regression Model Performance:
Intercept: 5.723055707304338
                        Feature  Coefficient
0                        female     0.489008
1                           age     0.006210
2                         inc_q    -0.483585
3                        emp_in     0.456957
4                       account     0.175449
..                          ...          ...
150             economy_Vietnam    -1.302297
151  economy_West Bank and Gaza    -0.169306
152         economy_Yemen, Rep.     0.570697
153              economy_Zambia     3.471293
154            economy_Zimbabwe     3.348230

[155 rows x 2 columns]
Mean Absolute Error: 2.775101898423328
R^2: 0.35753286375180615
RMSE: 3.405111176270377


## Elastic Net

In [11]:
elastic_net = ElasticNetCV(cv=5, random_state=42)

In [12]:
elastic_net.fit(X_train, y_train)
elastic_net_predictions = elastic_net.predict(X_test)

In [13]:
print('Elastic Net Model Performance:')
print('Intercept:', elastic_net.intercept_)
coeff_df = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': elastic_net.coef_
})
print(coeff_df)
print('Mean Absolute Error:', np.mean(np.abs(y_test - elastic_net_predictions)))
print('R^2:', r2_score(y_test, elastic_net_predictions))
print('RMSE:', np.sqrt(mean_squared_error(y_test, elastic_net_predictions)))

Elastic Net Model Performance:
Intercept: 7.180923558644167
                        Feature  Coefficient
0                        female     0.389007
1                           age    -0.013258
2                         inc_q    -0.345423
3                        emp_in     0.631162
4                       account     0.000000
..                          ...          ...
150             economy_Vietnam    -0.000000
151  economy_West Bank and Gaza    -0.000000
152         economy_Yemen, Rep.    -0.000000
153              economy_Zambia     0.278643
154            economy_Zimbabwe     0.319921

[155 rows x 2 columns]
Mean Absolute Error: 3.111520125397602
R^2: 0.2439525127519272
RMSE: 3.6938595175074758


In [15]:
pd.set_option('display.max_rows', None)

coeff_df

Unnamed: 0,Feature,Coefficient
0,female,0.389007
1,age,-0.013258
2,inc_q,-0.345423
3,emp_in,0.631162
4,account,0.0
5,borrowed,1.428872
6,saved,-1.041963
7,receive_wages,-0.209861
8,receive_transfers,0.0
9,receive_pension,-0.466039


## Random Forrest Regressor