# Projeto Aplicado

In [371]:
import pandas as pd
import yfinance as yf
from curl_cffi import requests
import os

session = requests.Session(impersonate="chrome")

start = "1996-01-01"
end = "2023-12-31"

In [342]:
yf.__version__ # check your version

'0.2.55'

## Download de dados de índices de mercado

In [763]:
def download_idx(idx, idx_symbol):
    os.makedirs('files', exist_ok=True)
    df = yf.download(idx_symbol, start=start, end=end, multi_level_index=False, session=session)
    df_annual = df['Close'].resample('Y').last()
    df_csv = pd.DataFrame({
        'year': df_annual.index.year,
        idx: df_annual.values
    })
    df_csv.to_csv(f'files/{idx}.csv', index=False)

In [765]:
download_idx("ibov", "^BVSP")

[*********************100%***********************]  1 of 1 completed


In [529]:
download_idx("sp500", "^GSPC")

[*********************100%***********************]  1 of 1 completed


In [533]:
download_idx("dax", "^GDAXI")

[*********************100%***********************]  1 of 1 completed


In [535]:
download_idx("nikkei", "^N225")

[*********************100%***********************]  1 of 1 completed


In [537]:
download_idx("sti", "^STI")

[*********************100%***********************]  1 of 1 completed


In [539]:
download_idx("asx200", "^AXJO")

[*********************100%***********************]  1 of 1 completed


In [541]:
download_idx("mexbol", "^MXX")

[*********************100%***********************]  1 of 1 completed


In [545]:
download_idx("ssmi", "^SSMI")

[*********************100%***********************]  1 of 1 completed


In [547]:
df = pd.read_excel('wgi.xlsx')
df.to_csv('files/wgi.csv', index=False)

## Combina dados dos índices com os indicadores do WGI de seu país

In [767]:
import pandas as pd

# Mapping of indices to countries and their CSV file names
index_country_mapping = {
    'ibov': {'country': 'Brazil', 'csv': 'ibov.csv'},
    'sp500': {'country': 'United States', 'csv': 'sp500.csv'},
    'dax': {'country': 'Germany', 'csv': 'dax.csv'},
    'nikkei': {'country': 'Japan', 'csv': 'nikkei.csv'},
    'sti': {'country': 'Singapore', 'csv': 'sti.csv'},
    'asx200': {'country': 'Australia', 'csv': 'asx200.csv'},
    'mexbol': {'country': 'Mexico', 'csv': 'mexbol.csv'},
    'ssmi': {'country': 'Switzerland', 'csv': 'ssmi.csv'}
}

wgi_indicators = ['cc', 'ge', 'pv', 'rl', 'rq', 'va']

df_wgi = pd.read_csv('files/wgi.csv')

df_wgi['pctrank'] = pd.to_numeric(df_wgi['pctrank'], errors='coerce')

for index, info in index_country_mapping.items():
    country = info['country']
    index_csv = info['csv']
    
    df_country = df_wgi[df_wgi['countryname'] == country]
    
    # Pivot WGI data to have indicators as columns
    df_pivoted = df_country.pivot_table(index='year', columns='indicator', values='pctrank')
    df_pivoted.columns.name = None
    df_pivoted = df_pivoted.rename(columns=lambda x: x.strip())
    
    try:
        df_index = pd.read_csv("files/"+index_csv)
    except FileNotFoundError:
        print(f"Arquivo {index_csv} não encontrado. Pulando {index}.")
        continue
    
    df_combined = pd.merge(df_pivoted, df_index, on='year', how='inner')
    
    output_file = f'files/wgi_{index}_combined.csv'
    df_combined.to_csv(output_file, index=True)
    print(f"Arquivo salvo: {output_file}")


Arquivo salvo: files/wgi_ibov_combined.csv
Arquivo salvo: files/wgi_sp500_combined.csv
Arquivo salvo: files/wgi_dax_combined.csv
Arquivo salvo: files/wgi_nikkei_combined.csv
Arquivo salvo: files/wgi_sti_combined.csv
Arquivo salvo: files/wgi_asx200_combined.csv
Arquivo salvo: files/wgi_mexbol_combined.csv
Arquivo salvo: files/wgi_ssmi_combined.csv


## Regressão linear múltipla dos índices e indicadores do WGI

cc: Control of Corruption
ge: Government Effectiveness
pv: Political Stability and Absence of Violence/Terrorism
rl: Rule of Law
rq: Regulatory Quality
va: Voice and Accountability

In [773]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import combinations
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
import joblib

def test_models(idx, x=['cc', 'ge', 'pv', 'rl', 'rq', 'va']):
    file_name = f'files/wgi_{idx}_combined.csv'
    
    df = pd.read_csv(file_name)

    # Create directories
    os.makedirs(f'models/{idx}', exist_ok=True)
    os.makedirs(f'plots/{idx}', exist_ok=True)

    # Original features and target
    X_orig = df[x].values
    y_orig = df[idx].values.reshape(-1, 1)

    # Normalizar apenas y
    scaler_y = MinMaxScaler(feature_range=(0.01, 1.0))
    y_norm = scaler_y.fit_transform(y_orig)
    y_series = pd.Series(y_norm.ravel(), name=idx)

    best_model = None
    best_score = -np.inf
    best_model_name = None
    best_features = None
    best_X_var = None
    best_y_var = None
    best_model_obj = None

    # Test all feature combinations
    for r in range(1, len(x) + 1):
        for feature_subset in combinations(x, r):
            feature_subset = list(feature_subset)
            X_subset = df[feature_subset].values
            X_df = pd.DataFrame(X_subset, columns=feature_subset)

            modelos = {
                'Linear-Linear': (X_df, y_series),
                'Log-Lin': (np.log(X_df), y_series),
                'Log-Log': (np.log(X_df), np.log(y_series))
            }

            for nome, (X_var, y_var) in modelos.items():
                # Skip if X_var or y_var contains invalid values
                if (np.any(np.isnan(X_var)) or np.any(np.isinf(X_var)) or
                    np.any(np.isnan(y_var)) or np.any(np.isinf(y_var))):
                    continue
                X_var_const = sm.add_constant(X_var)
                model = sm.OLS(y_var, X_var_const).fit()

                # Compute metrics
                adj_r2 = model.rsquared_adj
                y_pred = model.predict(X_var_const)
                mae = mean_absolute_error(y_var, y_pred)
                significant_vars = sum(p < 0.05 for p in model.pvalues[1:])  # Exclude constant

                # Composite score
                max_y_var = max(y_var) if max(y_var) > 0 else 1.0
                score = adj_r2 - mae / max_y_var + significant_vars / len(feature_subset)

                if score > best_score:
                    best_score = score
                    best_model = model
                    best_model_name = nome
                    best_features = feature_subset
                    best_X_var = pd.DataFrame(X_var, columns=feature_subset)
                    best_y_var = y_var
                    best_model_obj = model

    if best_model is None:
        print(f"\n### No valid model found for {idx.upper()} ###")
        return

    print(f"\n### Best Model for {idx.upper()}: {best_model_name.upper()} with Features {best_features} ###")
    print(best_model.summary())

    print(f"Mean absolute error: {mae}")

    # Save the best model
    model_file = f'models/{idx}/{idx}_{best_model_name}.pkl'
    best_model.save(model_file)
    print(f"\n### Best Model Saved: {model_file} ###")

    # Serialize the scaler_y
    scaler_file = f'models/{idx}/{idx}_scaler_y.pkl'
    with open(scaler_file, 'wb') as f:
        pickle.dump(scaler_y, f)
    print(f"### Scaler Saved: {scaler_file} ###")

    # Plots
    n_cols = 3
    n_rows = int(np.ceil(len(best_features) / n_cols))
    plt.figure(figsize=(5 * n_cols, 4 * n_rows))

    for i, col in enumerate(best_X_var.columns, 1):
        plt.subplot(n_rows, n_cols, i)
        sns.scatterplot(x=best_X_var[col], y=best_y_var)
        plt.title(f'{col} vs {idx}')
        plt.xlabel(col)
        plt.ylabel(idx)

    plt.tight_layout()
    plt.savefig(f'plots/{idx}/{idx}_todas_variaveis_vs_y.png')
    plt.close()

    # VIF
    X_var_const = sm.add_constant(best_X_var)
    vif_data = pd.DataFrame()
    vif_data["Variable"] = X_var_const.columns
    vif_data["VIF"] = [variance_inflation_factor(X_var_const.values, i) for i in range(X_var_const.shape[1])]

    plt.figure(figsize=(8,4))
    sns.barplot(x="Variable", y="VIF", data=vif_data)
    plt.title(f'VIF - {idx} - {best_model_name}')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(f'plots/{idx}/{idx}_{best_model_name}_vif.png')
    plt.close()

    # Residuals
    residuals = best_model.resid

    plt.figure(figsize=(6,4))
    sns.histplot(residuals, kde=True)
    plt.title(f'Histograma dos Resíduos - {best_model_name}')
    plt.tight_layout()
    plt.savefig(f'plots/{idx}/{idx}_{best_model_name}_residuos_hist.png')
    plt.close()

    plt.figure(figsize=(6,4))
    sm.qqplot(residuals, line='s', fit=True)
    plt.title(f'Q-Q Plot dos Resíduos - {best_model_name}')
    plt.tight_layout()
    plt.savefig(f'plots/{idx}/{idx}_{best_model_name}_residuos_qq.png')
    plt.close()

def predict_with_saved_model(idx, x, model_name):
    # Validate model_name
    valid_models = ['Linear-Linear', 'Log-Lin', 'Log-Log']
    if model_name not in valid_models:
        print(f"Error: model_name must be one of {valid_models}")
        return None

    # Load the original data
    file_name = f'files/wgi_{idx}_combined.csv'
    if not os.path.exists(file_name):
        print(f"Error: Data file {file_name} not found")
        return None
    
    df = pd.read_csv(file_name)

    # Verify features and target exist
    missing_cols = [col for col in x + [idx] if col not in df.columns]
    if missing_cols:
        print(f"Error: Columns {missing_cols} not found in {file_name}")
        return None

    # Original features and target
    X_orig = df[x].values
    y_orig = df[idx].values.reshape(-1, 1)

    # Initialize scaler for target
    scaler_y = MinMaxScaler(feature_range=(0.01, 1.0))
    y_norm = scaler_y.fit_transform(y_orig)
    y_series = pd.Series(y_norm.ravel(), name=idx)

    # Prepare model data based on model_name
    X_df = pd.DataFrame(X_orig, columns=x)
    if model_name == 'Linear-Linear':
        X_var = X_df
        y_var = y_series
    elif model_name == 'Log-Lin':
        X_var = np.log(X_df)
        y_var = y_series
    elif model_name == 'Log-Log':
        X_var = np.log(X_df)
        y_var = np.log(y_series)

    # Check for invalid values
    if np.any(np.isnan(X_var)) or np.any(np.isinf(X_var)) or np.any(np.isnan(y_var)) or np.any(np.isinf(y_var)):
        print(f"Error: Invalid values (NaN or inf) in features or target after transformation for {model_name}")
        return None

    # Add constant for statsmodels
    X_var_const = sm.add_constant(X_var)

    # Load the saved model
    model_file = f'models/{idx}/{idx}_{model_name}.pkl'
    if not os.path.exists(model_file):
        print(f"Error: Model file {model_file} not found")
        return None

    model = sm.load(model_file)

    # Predict on all rows
    pred_norm = model.predict(X_var_const)
    
    # Inverse-transform predictions
    if model_name == 'Log-Log':
        # Exponentiate to reverse log transformation
        pred_norm = np.exp(pred_norm)
    # Inverse-transform to original scale
    pred_orig = scaler_y.inverse_transform(pred_norm.values.reshape(-1, 1)).ravel()

    # Create table with actual and predicted values
    results = pd.DataFrame({
        f'Actual {idx.upper()}': y_orig.ravel(),
        f'Predicted {idx.upper()}': pred_orig
    })

    # Display the table
    print(f"\n{idx.upper()} Predictions for All Rows (Using {model_name} Model):")
    print(results.to_string(index=False))

def predict_new_values(idx, x, values, model_name):
    # Validate model_name
    valid_models = ['Linear-Linear', 'Log-Lin', 'Log-Log']
    if model_name not in valid_models:
        print(f"Error: model_name must be one of {valid_models}")
        return None

    # Create DataFrame from the provided values (simulating what would come from the CSV)
    df = pd.DataFrame([values], columns=x)

    # Prepare model data based on model_name
    if model_name == 'Linear-Linear':
        X_var = df
    elif model_name == 'Log-Lin':
        X_var = np.log(df)
    elif model_name == 'Log-Log':
        X_var = np.log(df)
    
    # Check for invalid values
    if np.any(np.isnan(X_var)) or np.any(np.isinf(X_var)):
        print(f"Error: Invalid values (NaN or inf) in features after transformation for {model_name}")
        return None

    # Add constant for statsmodels
    X_var_const = sm.add_constant(X_var, has_constant='add')

    # Load the saved model
    model_file = f'models/{idx}/{idx}_{model_name}.pkl'
    if not os.path.exists(model_file):
        print(f"Error: Model file {model_file} not found")
        return None

    model = sm.load(model_file)

    # Load the saved scaler
    scaler_file = f'models/{idx}/{idx}_scaler_y.pkl'
    if not os.path.exists(scaler_file):
        print(f"Error: Scaler file {scaler_file} not found")
        return None

    scaler_y = joblib.load(scaler_file)

    # Predict using the model
    try:
        pred_norm = model.predict(X_var_const)
    except Exception as e:
        print(f"Error during prediction: {e}")
        return None
    
    # Inverse-transform predictions
    if model_name == 'Log-Log':
        # Exponentiate to reverse log transformation
        pred_norm = np.exp(pred_norm)

    # Inverse-transform to original scale
    pred_orig = scaler_y.inverse_transform(pred_norm.values.reshape(-1, 1)).ravel()

    # Create a DataFrame with predicted values
    results = pd.DataFrame({
        f'Predicted {idx.upper()}': pred_orig
    })

    # Display the table
    print(f"\n{idx.upper()} Predictions (Using {model_name} Model):")
    print(results.to_string(index=False))

    return results

# predict_new_values('ibov', ['ge'], [32.075469970703125], 'Log-Lin')

## Seleciona o melhor modelo para o índice IBOV

In [775]:
test_models('ibov')


### Best Model for IBOV: LOG-LIN with Features ['ge'] ###
                            OLS Regression Results                            
Dep. Variable:                   ibov   R-squared:                       0.687
Model:                            OLS   Adj. R-squared:                  0.673
Method:                 Least Squares   F-statistic:                     50.45
Date:                Wed, 07 May 2025   Prob (F-statistic):           3.10e-07
Time:                        18:46:57   Log-Likelihood:                 10.902
No. Observations:                  25   AIC:                            -17.80
Df Residuals:                      23   BIC:                            -15.37
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------

<Figure size 600x400 with 0 Axes>

In [777]:
predict_with_saved_model('ibov', ['ge'], 'Log-Lin')


IBOV Predictions for All Rows (Using Log-Lin Model):
  Actual IBOV  Predicted IBOV
  7039.899902    41443.404924
  6784.000000    44878.662801
 15259.000000    17815.041070
 11268.000000    25496.179134
 22236.000000    16633.158508
 26196.000000    32639.878939
 33456.000000    42471.191901
 44474.000000    75465.225016
 63886.000000    58460.860530
 37550.000000    39476.174296
 68588.000000    40277.055411
 69305.000000    33060.468039
 56754.000000    46275.611748
 60952.000000    43265.705991
 51507.000000    40312.051526
 50007.000000    53402.661892
 43350.000000    59827.585939
 60227.000000    66625.011116
 76402.000000    79273.013747
 87887.000000   108082.694541
115964.000000    71922.117924
119306.000000   101451.680629
104822.000000   105841.289596
110031.000000   118878.200032
134185.000000   114160.974653


## Seleciona o melhor modelo para o índice NIKKEI

In [779]:
test_models('nikkei')


### Best Model for NIKKEI: LINEAR-LINEAR with Features ['pv', 'rl', 'rq'] ###
                            OLS Regression Results                            
Dep. Variable:                 nikkei   R-squared:                       0.711
Model:                            OLS   Adj. R-squared:                  0.669
Method:                 Least Squares   F-statistic:                     17.20
Date:                Wed, 07 May 2025   Prob (F-statistic):           7.15e-06
Time:                        18:47:06   Log-Likelihood:                 13.034
No. Observations:                  25   AIC:                            -18.07
Df Residuals:                      21   BIC:                            -13.19
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------

<Figure size 600x400 with 0 Axes>

In [781]:
predict_with_saved_model('nikkei', ['pv', 'rl', 'rq'], 'Linear-Linear')


NIKKEI Predictions for All Rows (Using Linear-Linear Model):
 Actual NIKKEI  Predicted NIKKEI
  19361.349609      14427.759384
  13842.169922      16499.150764
  13785.690430      14893.381356
   8578.950195       2592.712970
  10676.639648       7787.541032
  11488.759766      14801.010830
  16111.429688      16109.601407
  17225.830078      24144.825350
  15307.780273      17717.003005
   8859.559570      12857.644379
  10546.440430      15206.001818
  10228.919922      10639.659060
   8455.349609      12865.572332
  10395.179688      12963.130916
  16291.309570      17138.480263
  17450.769531      15448.445536
  19033.710938      19056.231487
  19114.369141      17826.006460
  22764.939453      22618.006389
  20014.769531      22656.791838
  23656.619141      20242.188322
  27444.169922      23337.773112
  28791.710938      24075.857081
  26094.500000      27256.793540
  33464.171875      25823.520237


## Seleciona o melhor modelo para o índice STI

In [783]:
test_models('sti')


### Best Model for STI: LOG-LIN with Features ['ge', 'rl'] ###
                            OLS Regression Results                            
Dep. Variable:                    sti   R-squared:                       0.607
Model:                            OLS   Adj. R-squared:                  0.571
Method:                 Least Squares   F-statistic:                     16.96
Date:                Wed, 07 May 2025   Prob (F-statistic):           3.50e-05
Time:                        18:47:22   Log-Likelihood:                 6.5794
No. Observations:                  25   AIC:                            -7.159
Df Residuals:                      22   BIC:                            -3.502
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------

<Figure size 600x400 with 0 Axes>

In [785]:
predict_with_saved_model('sti', ['ge', 'rl'], 'Log-Lin')


STI Predictions for All Rows (Using Log-Lin Model):
 Actual STI  Predicted STI
2216.800049    2246.780316
1392.729980    2253.106615
1926.829956    2154.491164
1341.030029    1128.918353
1764.520020    2035.333139
2066.139893    2204.969349
2347.340088    2745.022308
2985.830078    2649.895270
3482.300049    2601.702754
1761.560059    2597.520526
2879.760010    2609.942985
3190.040039    2657.637727
2646.350098    2667.858712
3167.080078    2760.611300
3167.429932    2806.640348
3365.149902    2789.795068
2882.729980    2889.829813
2880.760010    3074.412552
3402.919922    3119.986928
3068.760010    3210.466733
3222.830078    3210.466733
2843.810059    3300.067362
3123.679932    3300.067362
3251.320068    3345.380928
3240.270020    3257.065993


## Seleciona o melhor modelo para o índice ASX200

In [787]:
test_models('asx200')


### Best Model for ASX200: LINEAR-LINEAR with Features ['rl', 'rq'] ###
                            OLS Regression Results                            
Dep. Variable:                 asx200   R-squared:                       0.811
Model:                            OLS   Adj. R-squared:                  0.793
Method:                 Least Squares   F-statistic:                     47.06
Date:                Wed, 07 May 2025   Prob (F-statistic):           1.13e-08
Time:                        18:47:28   Log-Likelihood:                 17.587
No. Observations:                  25   AIC:                            -29.17
Df Residuals:                      22   BIC:                            -25.52
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------

<Figure size 600x400 with 0 Axes>

In [789]:
predict_with_saved_model('asx200', ['rl', 'rq'], 'Linear-Linear')


ASX200 Predictions for All Rows (Using Linear-Linear Model):
 Actual ASX200  Predicted ASX200
   2367.800049       2580.156551
   2717.600098       3090.193062
   3206.199951       4265.682464
   3007.100098       2845.985851
   3299.800049       3484.964717
   4050.600098       4397.737082
   4763.399902       4187.134403
   5669.899902       4931.225582
   6339.799805       5058.580221
   3722.300049       4792.955765
   4870.600098       5592.292820
   4745.200195       5020.698077
   4056.600098       4841.632460
   4649.000000       4615.161223
   5352.200195       4615.161223
   5411.000000       4633.066919
   5295.899902       4670.247499
   5665.799805       5265.285345
   6065.100098       6140.187015
   5646.399902       6393.900212
   6684.100098       6253.961291
   6587.100098       6901.318475
   7444.600098       6761.383619
   7038.700195       7705.661308
   7590.799805       7203.027402


## Seleciona o melhor modelo para o índice SP500

In [791]:
test_models('sp500')


### Best Model for SP500: LOG-LIN with Features ['va'] ###
                            OLS Regression Results                            
Dep. Variable:                  sp500   R-squared:                       0.868
Model:                            OLS   Adj. R-squared:                  0.863
Method:                 Least Squares   F-statistic:                     151.9
Date:                Wed, 07 May 2025   Prob (F-statistic):           1.30e-11
Time:                        18:47:36   Log-Likelihood:                 21.127
No. Observations:                  25   AIC:                            -38.25
Df Residuals:                      23   BIC:                            -35.82
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------

<Figure size 600x400 with 0 Axes>

In [793]:
predict_with_saved_model('sp500', ['va'], 'Log-Lin')


SP500 Predictions for All Rows (Using Log-Lin Model):
 Actual SP500  Predicted SP500
   740.739990       769.267171
  1229.229980       761.522528
  1320.280029      1109.454909
   879.820007       590.398025
  1111.920044       761.522528
  1211.920044       960.056559
  1248.290039       960.056559
  1418.300049      1560.691201
  1468.359985      1736.596012
   903.250000      1560.691201
  1115.099976      1873.842449
  1257.640015      1698.915038
  1257.599976      1588.637542
  1426.189941      1334.024194
  1848.359985      2022.364694
  2058.899902      2452.342788
  2043.939941      1985.149266
  2238.830078      1985.149266
  2673.610107      2547.469438
  2506.850098      2778.426416
  3230.780029      3339.111786
  3756.070068      4250.552909
  4766.180176      3940.839589
  3839.500000      4043.402402
  4769.830078      3711.006072


## Seleciona o melhor modelo para o índice SSMI

In [795]:
test_models('ssmi')


### Best Model for SSMI: LINEAR-LINEAR with Features ['cc', 'ge', 'pv', 'va'] ###
                            OLS Regression Results                            
Dep. Variable:                   ssmi   R-squared:                       0.655
Model:                            OLS   Adj. R-squared:                  0.585
Method:                 Least Squares   F-statistic:                     9.473
Date:                Wed, 07 May 2025   Prob (F-statistic):           0.000183
Time:                        18:47:59   Log-Likelihood:                 13.570
No. Observations:                  25   AIC:                            -17.14
Df Residuals:                      20   BIC:                            -11.05
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------

<Figure size 600x400 with 0 Axes>

In [797]:
predict_with_saved_model('ssmi', ['cc', 'ge', 'pv', 'va'], 'Linear-Linear')


SSMI Predictions for All Rows (Using Linear-Linear Model):
 Actual SSMI  Predicted SSMI
 3942.199951     4564.448148
 7160.700195     6531.950939
 8135.399902     7002.624085
 4630.799805     6481.047838
 5487.799805     4471.641888
 5693.200195     8938.716994
 7583.899902     8640.363259
 8785.700195     7554.678678
 8484.500000     8488.424502
 5534.529785     8528.979895
 6545.910156     7005.210534
 6436.040039     7271.602697
 5936.229980     5380.617030
 6822.439941     6700.753424
 8202.980469     7370.163179
 8983.370117     8729.006277
 8818.089844     9272.996735
 8219.870117     7995.187331
 9381.870117     9276.188996
 8429.299805     7599.785343
10616.940430     8921.085753
10703.509766    10173.873691
12875.660156    10670.265081
10729.400391    10094.697737
11137.790039    11613.821070


## Seleciona o melhor modelo para o índice MEXBOL

In [799]:
test_models('mexbol')


### Best Model for MEXBOL: LINEAR-LINEAR with Features ['cc', 'pv'] ###
                            OLS Regression Results                            
Dep. Variable:                 mexbol   R-squared:                       0.704
Model:                            OLS   Adj. R-squared:                  0.678
Method:                 Least Squares   F-statistic:                     26.21
Date:                Wed, 07 May 2025   Prob (F-statistic):           1.51e-06
Time:                        18:48:08   Log-Likelihood:                 9.3174
No. Observations:                  25   AIC:                            -12.63
Df Residuals:                      22   BIC:                            -8.978
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------

<Figure size 600x400 with 0 Axes>

In [801]:
predict_with_saved_model('mexbol', ['cc', 'pv'], 'Linear-Linear')


MEXBOL Predictions for All Rows (Using Linear-Linear Model):
 Actual MEXBOL  Predicted MEXBOL
   3361.000000      33620.754528
   3959.659912      20396.618742
   5652.189941       6084.447161
   6127.089844       7027.395747
   8795.280273      10011.310273
  12917.879883      16544.938576
  17802.710938      16971.151908
  26448.320312      23420.174460
  29536.830078      24171.302130
  22380.320312      27349.078972
  32120.470703      26038.253683
  38243.140625      28437.900179
  37077.519531      27275.330340
  43705.828125      29380.996628
  42958.820312      32937.630917
  43145.660156      46501.007974
  42977.500000      45758.475221
  45642.898438      43690.199686
  49354.421875      51035.852690
  41640.269531      47077.317093
  43541.019531      49021.825236
  44066.878906      47253.323712
  53272.441406      46010.832142
  48463.859375      48163.331222
  57386.250000      46398.810787


In [None]:
## Seleciona o melhor modelo para o índice DAX

In [803]:
test_models('dax')


### Best Model for DAX: LOG-LIN with Features ['cc', 'rq'] ###
                            OLS Regression Results                            
Dep. Variable:                    dax   R-squared:                       0.622
Model:                            OLS   Adj. R-squared:                  0.588
Method:                 Least Squares   F-statistic:                     18.11
Date:                Wed, 07 May 2025   Prob (F-statistic):           2.24e-05
Time:                        18:48:17   Log-Likelihood:                 7.9212
No. Observations:                  25   AIC:                            -9.842
Df Residuals:                      22   BIC:                            -6.186
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------

<Figure size 600x400 with 0 Axes>

In [805]:
predict_with_saved_model('dax', ['cc', 'rq'], 'Log-Lin')


DAX Predictions for All Rows (Using Log-Lin Model):
  Actual DAX  Predicted DAX
 2880.070068    6584.596004
 5006.569824    5249.857930
 6433.609863    4131.284214
 2892.629883    5326.129314
 3965.159912    4270.503244
 4256.080078    5665.535650
 5408.259766    8093.135811
 6596.919922    7581.674129
 8067.319824    5774.070670
 4810.200195    5858.990433
 5957.430176    7582.577490
 6914.189941    7660.986815
 5898.350098    6889.093321
 7612.390137    8291.231994
 9552.160156    8749.839853
 9805.549805    9110.541358
10743.009766    8140.796206
11481.059570   11532.678777
12917.639648   12521.629335
10558.959961   14815.481821
13249.009766   14558.994763
13718.780273   11869.357344
15884.860352   14368.359375
13923.589844   12264.768261
16751.640625    8393.325341
