In [4]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score

# --- Configuration ---
file_name = 'Population-EstimatesCSV.csv'
indicator_name = 'Population, total'
target_country = 'Estonia'
target_year_prediction = 2030

# Define time windows
START_YEAR = 2000
TRAIN_END_YEAR = 2015
TEST_START_YEAR = 2016
TEST_END_YEAR = 2020
FULL_HISTORICAL_END_YEAR = 2020

# --- Step 1: Prepare Data ---
df = pd.read_csv(file_name)

# Filter for the target indicator and country
df_filtered = df[
    (df['Indicator Name'] == indicator_name) &
    (df['Country Name'] == target_country)
].copy()

year_columns = [str(y) for y in range(START_YEAR, FULL_HISTORICAL_END_YEAR + 1)]
df_ts = df_filtered[['Country Name'] + year_columns].copy()

df_melted = df_ts.melt(
    id_vars=['Country Name'],
    value_vars=year_columns,
    var_name='Year',
    value_name='Population'
)

df_melted['Year'] = pd.to_numeric(df_melted['Year'])
df_melted['Population'] = pd.to_numeric(df_melted['Population'], errors='coerce')
df_melted.dropna(subset=['Population'], inplace=True)

X_1d = df_melted['Year'].values
y = df_melted['Population'].values

train_mask = X_1d <= TRAIN_END_YEAR
test_mask = (X_1d >= TEST_START_YEAR) & (X_1d <= TEST_END_YEAR)

X_train = X_1d[train_mask].reshape(-1, 1)
y_train = y[train_mask]

X_test = X_1d[test_mask].reshape(-1, 1)
y_test = y[test_mask]

X_full = X_1d.reshape(-1, 1)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred_test = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred_test)

model_full = LinearRegression()
model_full.fit(X_full, y)

X_future = np.array([[target_year_prediction]])
predicted_2030_pop = model_full.predict(X_future)[0]

print(f"Prediction Country: {target_country}")
print(f"MAE (Test {TEST_START_YEAR}-{TEST_END_YEAR}): {mae:,.0f}")
print(f"Predicted 2030 Population: {predicted_2030_pop:,.0f}")

Prediction Country: Estonia
MAE (Test 2016-2020): 32,969
Predicted 2030 Population: 1,268,702
