In [4]:
import pandas as pd
import os

In [7]:
artifacts_path = os.getcwd().removesuffix('notebook\\model')+'artifacts\\'


In [30]:
data = pd.read_csv(artifacts_path + 'eco-1990-2022.csv')

In [31]:
data['emp_pop_ratio'] = data['emp_pop_ratio'].fillna(data['emp_pop_ratio'].mean())
data['gdp_ppp'] = data['gdp_ppp'].fillna(data['gdp_ppp'].mean())
data['fr_ratio'] = data['fr_ratio'].fillna(data['fr_ratio'].mean())

In [114]:
from sklearn.preprocessing import LabelEncoder

# Label encode the 'Entity' column
label_encoder = LabelEncoder()
data['Entity_encoded'] = label_encoder.fit_transform(data['Entity'])

In [117]:
entity_mapper = dict(zip(label_encoder.transform(label_encoder.classes_),label_encoder.classes_))
print(entity_mapper)

{np.int64(0): 'Germany', np.int64(1): 'India', np.int64(2): 'United States'}


In [64]:
X = data[['Year','Entity_encoded']]  # Features
Y = data[['gdp_ppp', 'emp_pop_ratio', 'fr_ratio']]  # Targets

In [83]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [84]:
print(Y_train[:3].to_string())

         gdp_ppp  emp_pop_ratio  fr_ratio
49   4024.547309         48.119     2.867
70  11161.593200         55.299     1.988
68  10441.989949         53.800     2.039


In [85]:
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import LinearRegression

model = MultiOutputRegressor(LinearRegression())
model.fit(X_train, Y_train)

In [86]:
Y_pred = model.predict(X_test)

In [88]:
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(Y_test, Y_pred, multioutput='raw_values')
r2 = r2_score(Y_test, Y_pred, multioutput='raw_values')

# Define target names
target_names = ['gdp_ppp', 'emp_pop_ratio', 'fr_ratio']

# Print MSE and R² for each target
print("Performance Metrics for Each Target:")
for target, mse_value, r2_value in zip(target_names, mse, r2):
    print(f"- {target}: Mean Squared Error = {round(mse_value, 2)}, R-squared = {round(r2_value, 2)}")

Performance Metrics for Each Target:
- gdp_ppp: Mean Squared Error = 8111373.5, R-squared = 0.8
- emp_pop_ratio: Mean Squared Error = 74.03, R-squared = 0.1
- fr_ratio: Mean Squared Error = 0.41, R-squared = 0.17


In [100]:
pred_year_range = range(2035,2041)

In [101]:
test_data_prep = [{'Year':j,'Entity_encoded':i} for i in data['Entity_encoded'] .unique().tolist() for j in pred_year_range]

In [97]:
test_data_prep_df = pd.DataFrame(test_data_prep)

In [98]:
test_data_prep_df

Unnamed: 0,Year,Entity_encoded
0,2035,0
1,2036,0
2,2037,0
3,2038,0
4,2039,0
5,2040,0
6,2035,1
7,2036,1
8,2037,1
9,2038,1


In [108]:
for i in enumerate(test_data_prep):
    print(i)

(0, {'Year': 2035, 'Entity_encoded': 0})
(1, {'Year': 2036, 'Entity_encoded': 0})
(2, {'Year': 2037, 'Entity_encoded': 0})
(3, {'Year': 2038, 'Entity_encoded': 0})
(4, {'Year': 2039, 'Entity_encoded': 0})
(5, {'Year': 2040, 'Entity_encoded': 0})
(6, {'Year': 2035, 'Entity_encoded': 1})
(7, {'Year': 2036, 'Entity_encoded': 1})
(8, {'Year': 2037, 'Entity_encoded': 1})
(9, {'Year': 2038, 'Entity_encoded': 1})
(10, {'Year': 2039, 'Entity_encoded': 1})
(11, {'Year': 2040, 'Entity_encoded': 1})
(12, {'Year': 2035, 'Entity_encoded': 2})
(13, {'Year': 2036, 'Entity_encoded': 2})
(14, {'Year': 2037, 'Entity_encoded': 2})
(15, {'Year': 2038, 'Entity_encoded': 2})
(16, {'Year': 2039, 'Entity_encoded': 2})
(17, {'Year': 2040, 'Entity_encoded': 2})


In [118]:
predicted_values = model.predict(test_data_prep_df)
for index,i in enumerate(test_data_prep):
    print(f"Predicted values for {entity_mapper[i['Entity_encoded']]} @ {i['Year']}:")
    print(f"GDP (PPP): {predicted_values[index][0]}")
    print(f"Emp-Pop Ratio: {predicted_values[index][1]}")
    print(f"FR Ratio: {predicted_values[index][2]}")
    print('--------------------------------------')

Predicted values for Germany @ 2035:
GDP (PPP): 8442.036272551166
Emp-Pop Ratio: 44.206366246891236
FR Ratio: 1.1494311234849448
--------------------------------------
Predicted values for Germany @ 2036:
GDP (PPP): 8666.12005017756
Emp-Pop Ratio: 44.194347415584616
FR Ratio: 1.1251919951157134
--------------------------------------
Predicted values for Germany @ 2037:
GDP (PPP): 8890.203827804013
Emp-Pop Ratio: 44.182328584278
FR Ratio: 1.100952866746482
--------------------------------------
Predicted values for Germany @ 2038:
GDP (PPP): 9114.287605430407
Emp-Pop Ratio: 44.170309752971384
FR Ratio: 1.0767137383772507
--------------------------------------
Predicted values for Germany @ 2039:
GDP (PPP): 9338.371383056801
Emp-Pop Ratio: 44.158290921664765
FR Ratio: 1.0524746100080122
--------------------------------------
Predicted values for Germany @ 2040:
GDP (PPP): 9562.455160683254
Emp-Pop Ratio: 44.14627209035815
FR Ratio: 1.0282354816387809
-------------------------------------