In [1]:
import pandas as pd
from typing import *
from src.preprocess.dataset import Dataset, DatasetConfig
from src.preprocess.result import ResultData


# Usage Example:

dataset_names: Optional[List[str]]

dataset = Dataset(DatasetConfig(type="clean"))

result_data = dataset.get()


In [2]:
# dictionary with key=names : value=dataframe

"""
{"feature1" : dataframe}

frames in format:
year | country1 | country2 ...
2019 | value1   | value2   
"""

datadict : Dict[str, pd.DataFrame] = result_data.datadict

for key in list(datadict.keys()): 
    print(f"{key}") 

fdi_net_inflows_current_usd
area
education_years
services_value_added_percent_of_gdp
total_population
economic_activity
life_expectancy_at_birth_total_years
population_size
petroleum_energy_production
hydro_electric_energy_production
unemployment_rate_percent_of_total_labor_force
gdp_current_usd


In [3]:
from src.clean.health_check import health_check_datadict, print_health_anomalies

report = health_check_datadict(datadict)
print_health_anomalies(report)



In [4]:
from src.preprocess.model_data import ModelLoaders, FreezeModelLoader

ml = ModelLoaders(df=result_data.ml_ready)
modelfile = "data/20--model/LSTMws5h15"



In [5]:

package: FreezeModelLoader = ml.train(
    label_col = "gdp_current_usd"
    )

model        =  package.model
train_loader =  package.train_loader
val_loader   =  package.val_loader
test_loader  =  package.test_loader

ml.save(modelfile)


2025-06-04 01:41:28,961 - [train_forecast.] - [INFO]    - [train_forecast.log] - Epoch 1/10000 train=0.9957 val=1.6775
2025-06-04 01:41:29,101 - [train_forecast.] - [INFO]    - [train_forecast.log] - Epoch 2/10000 train=0.9832 val=1.6616
2025-06-04 01:41:29,260 - [train_forecast.] - [INFO]    - [train_forecast.log] - Epoch 3/10000 train=0.9649 val=1.6320
2025-06-04 01:41:29,422 - [train_forecast.] - [INFO]    - [train_forecast.log] - Epoch 4/10000 train=0.9285 val=1.5720
2025-06-04 01:41:29,558 - [train_forecast.] - [INFO]    - [train_forecast.log] - Epoch 5/10000 train=0.8596 val=1.4533
2025-06-04 01:41:29,743 - [train_forecast.] - [INFO]    - [train_forecast.log] - Epoch 6/10000 train=0.7508 val=1.3013
2025-06-04 01:41:29,907 - [train_forecast.] - [INFO]    - [train_forecast.log] - Epoch 7/10000 train=0.6531 val=1.1809
2025-06-04 01:41:30,099 - [train_forecast.] - [INFO]    - [train_forecast.log] - Epoch 8/10000 train=0.5864 val=1.1113
2025-06-04 01:41:30,255 - [train_forecast.] - [I

[ModelLoaders] package saved to PosixPath('data/20--model/LSTMws5h15.pt')


In [6]:

model = ml.load(modelfile)
y_true, y_pred = ml.test(ml.package)


[ModelLoaders] package loaded from PosixPath('data/20--model/LSTMws5h15.pt')
Test  MSE : 0.003743
Test  RMSE: 0.061184
Test  R²  : 0.9919


## 'data/20--model/LSTMws10h1.pt'
- Test  MSE : 0.016699
- Test  RMSE: 0.129226
- Test  R²  : 0.9932


## 'data/20--model/LSTMws5h1.pt'
- Test  MSE : 0.031407
- Test  RMSE: 0.177221
- Test  R²  : 0.9713


## 'data/20--model/LSTMws5h3.pt'
- Test  MSE : 0.009216
- Test  RMSE: 0.096001
- Test  R²  : 0.9903


## 'data/20--model/LSTMws5h15.pt'
Test  MSE : 0.003743
Test  RMSE: 0.061184
Test  R²  : 0.9919