In [22]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from corr_parc import calculate_partial_correlation_df, apply_partial_correlation_criteria

In [23]:
df_brent = pd.read_csv("./processed_tables/brent.csv")
df_precios = pd.read_csv("./processed_tables/precios.csv")
df_precipitacion = pd.read_csv("./processed_tables/precipitacion.csv")
df_temperatura = pd.read_csv("./processed_tables/temperatura.csv")
df_trm = pd.read_csv("./processed_tables/TRM.csv")

In [24]:
df_brent = df_brent.rename(columns={"Fecha": "Date"})
df_precipitacion = df_precipitacion.rename(columns={"date": "Date"})
df_temperatura = df_temperatura.rename(columns={"date": "Date"})
df_trm = df_trm.rename(columns={"vigenciadesde": "Date"})

# Perform left joins
df_merged = pd.merge(df_precios, df_brent, on="Date", how="left")
df_merged = pd.merge(df_merged, df_precipitacion, on="Date", how="left")
df_merged = pd.merge(df_merged, df_temperatura, on="Date", how="left")
df_merged = pd.merge(df_merged, df_trm, on="Date", how="left")

Quitamos `precipitacion_unidadmedida`

In [25]:
df_merged.drop("precipitacion_unidadmedida", axis=1, inplace=True)

In [26]:
df_merged.to_csv("processed_tables/merged_no_dams.csv", index=False)

### Correlación variables explicativas con variable dependiente con data estandarizada

In [27]:
scaler = StandardScaler()
exogen_data_df = df_merged.drop(["Date", "energy_price"], axis=1)
exogen_data = scaler.fit_transform(exogen_data_df)
exogen_data_df_standardized = pd.DataFrame(exogen_data, columns=exogen_data_df.columns)
scaled_df_merged = exogen_data_df_standardized.copy()
scaled_df_merged["energy_price"] = df_merged["energy_price"]

## Matriz de correlación

In [28]:
correlation_matrix = exogen_data_df.corr()
print(f"Correlation Matrix Original: {correlation_matrix.shape}")
correlation_matrix.style.background_gradient(cmap="coolwarm")


Correlation Matrix Original: (67, 67)


Unnamed: 0,brent_value,precipitacion_amazonas,precipitacion_antioquia,precipitacion_arauca,precipitacion_atlantico,precipitacion_bogota,precipitacion_bolivar,precipitacion_boyaca,precipitacion_caldas,precipitacion_caqueta,precipitacion_casanare,precipitacion_cauca,precipitacion_cesar,precipitacion_choco,precipitacion_cordoba,precipitacion_cundinamarca,precipitacion_guainia,precipitacion_guaviare,precipitacion_huila,precipitacion_la guajira,precipitacion_magdalena,precipitacion_meta,precipitacion_narino,precipitacion_norte de santander,precipitacion_putumayo,precipitacion_quindio,precipitacion_risaralda,precipitacion_san andres providencia,precipitacion_santander,precipitacion_sucre,precipitacion_tolima,precipitacion_valle del cauca,precipitacion_vaupes,precipitacion_vichada,temp_AMAZONAS,temp_ANTIOQUIA,temp_ARAUCA,temp_ARCHIPIELAGO DE SAN ANDRES PROVIDENCIA Y SANTA CATALINA,temp_ATLANTICO,temp_BOGOTA,temp_BOLIVAR,temp_BOYACA,temp_CALDAS,temp_CAQUETA,temp_CASANARE,temp_CAUCA,temp_CESAR,temp_CHOCO,temp_CORDOBA,temp_CUNDINAMARCA,temp_GUAINIA,temp_GUAVIARE,temp_HUILA,temp_LA GUAJIRA,temp_MAGDALENA,temp_META,temp_NARINO,temp_NORTE DE SANTANDER,temp_PUTUMAYO,temp_QUINDIO,temp_RISARALDA,temp_SANTANDER,temp_SUCRE,temp_TOLIMA,temp_VALLE DEL CAUCA,temp_VICHADA,TRM
brent_value,1.0,0.01518,0.329614,0.034924,0.115467,0.252189,0.191135,0.05121,0.201223,0.051083,0.006283,0.036725,0.204421,-0.312554,0.320371,0.037665,0.134976,0.183881,-0.071111,0.154511,0.358489,0.214289,0.18663,0.126432,-0.153138,0.052736,0.183884,0.134912,0.286426,0.233963,0.020231,0.155809,0.001619,-0.01326,-0.195409,-0.123097,-0.140329,-0.094225,-0.202322,-0.326451,-0.313565,-0.272161,-0.519461,-0.265061,-0.239194,-0.31354,-0.166155,-0.356461,-0.215685,-0.235658,-0.169176,-0.33752,-0.216031,0.040892,-0.125118,0.026172,0.08541,-0.295623,-0.011673,-0.356808,-0.192665,-0.476708,-0.304795,-0.075845,-0.238194,0.032238,-0.317626
precipitacion_amazonas,0.01518,1.0,-0.000581,-0.066088,-0.075014,0.106463,-0.033369,0.00389,0.029611,-0.022714,-0.013511,0.087176,-0.015286,0.083052,0.00055,0.093379,-0.059546,0.043675,-0.010479,-0.105463,-0.001357,-0.022306,0.036657,0.046852,-0.000844,0.066723,0.028251,-0.062482,0.02598,0.008736,0.045353,0.100075,0.0032,0.008473,-0.232514,0.052096,0.073518,-0.060093,0.110331,0.052482,0.082042,0.051021,-0.056883,0.055474,0.060518,0.024629,0.086051,0.127459,0.070835,0.014092,0.101549,0.026618,0.082148,0.017831,-0.011986,0.122759,0.101894,-0.008471,0.041728,-0.011253,0.026718,-0.007228,0.103934,0.071053,0.037895,-0.016458,-0.011616
precipitacion_antioquia,0.329614,-0.000581,1.0,0.314739,0.08488,0.270766,0.324677,0.391595,0.594696,0.209265,0.116232,0.335611,0.137814,0.003961,0.455286,0.29718,0.074976,0.351991,0.286523,0.140865,0.4049,0.345804,0.351733,0.194504,0.083891,0.395891,0.38427,0.141533,0.51993,0.306948,0.466152,0.514026,0.164454,0.026048,-0.087057,-0.253362,0.180829,0.004907,-0.234126,-0.196108,-0.288471,-0.101192,-0.380997,-0.220491,-0.184058,-0.299346,-0.264779,-0.193303,-0.319414,-0.303399,-0.125014,-0.40577,-0.185246,0.03013,-0.105091,-0.009625,0.146918,-0.254834,-0.034198,-0.405214,-0.309354,-0.387502,-0.251227,-0.184477,-0.346207,-0.019513,-0.165865
precipitacion_arauca,0.034924,-0.066088,0.314739,1.0,0.01771,0.045715,0.247388,0.264242,0.109682,0.209292,0.208583,0.072115,0.061164,-0.01762,0.275258,0.084851,0.107836,0.117684,0.025013,0.039089,0.109131,0.247661,-0.007031,-0.047415,0.056463,0.031036,0.096886,0.142209,0.191792,0.190068,0.130386,0.192756,0.13098,0.287361,0.038514,0.006995,0.380051,0.185561,-0.041487,-0.123384,-0.046943,-0.135892,-0.090326,-0.2081,-0.149469,0.031148,-0.063089,-0.109112,-0.107389,-0.15883,-0.071209,-0.242535,-0.054332,0.160982,0.006801,-0.131026,0.084866,0.017726,-0.094691,-0.0382,0.000818,-0.051431,0.021874,0.072079,-0.021756,0.022685,-0.260864
precipitacion_atlantico,0.115467,-0.075014,0.08488,0.01771,1.0,0.013352,0.119168,0.152109,0.085969,0.019529,0.052842,0.056464,0.036497,0.085676,0.130977,0.104012,0.116575,0.119205,0.05607,0.230301,0.204079,0.060673,0.057876,0.053496,0.01225,0.022535,0.051031,0.137871,0.254622,0.114213,0.093871,0.025685,-0.050822,-0.019426,0.133536,-0.098189,-0.045853,0.117832,-0.168199,0.006932,-0.164992,-0.063107,-0.08035,0.070169,0.007875,-0.079185,-0.124734,-0.181007,-0.15978,-0.043825,0.068818,-0.023108,0.031834,-0.049335,0.028422,0.089177,0.041105,-0.052018,0.086999,-0.092026,-0.067412,-0.120624,-0.184702,0.002977,-0.043783,0.095447,-0.007299
precipitacion_bogota,0.252189,0.106463,0.270766,0.045715,0.013352,1.0,0.074195,0.330263,0.352761,0.104728,0.117971,0.181343,0.109118,-0.002043,0.055827,0.345873,0.059264,0.215744,0.258703,0.002575,0.242182,0.286513,0.285836,0.110124,0.033521,0.332314,0.235644,-0.014192,0.294258,0.075289,0.302506,0.261891,0.072888,0.037687,-0.056657,-0.091854,-0.061607,-0.010421,-0.036763,-0.116177,-0.068183,-0.049279,-0.200511,-0.121085,-0.1072,-0.153075,-0.085909,-0.045204,-0.032545,-0.083105,-0.034626,-0.205303,-0.133304,0.001194,-0.044992,-0.076165,0.032972,-0.155177,0.010893,-0.152004,-0.100928,-0.190826,-0.036242,-0.123051,-0.152808,-0.082595,-0.042113
precipitacion_bolivar,0.191135,-0.033369,0.324677,0.247388,0.119168,0.074195,1.0,0.202172,0.163448,0.063806,0.091918,0.109791,0.14274,-0.004278,0.338383,0.063309,0.073121,0.16794,0.018195,0.13825,0.235722,0.089413,0.090713,0.041052,-0.079292,0.048677,0.130465,0.120488,0.319381,0.340527,0.085778,0.161077,0.145462,0.047074,0.007794,-0.131698,0.158988,0.003807,-0.218628,-0.121354,-0.243621,-0.113995,-0.223528,-0.07708,-0.091759,-0.148268,-0.218361,-0.152417,-0.298837,-0.173457,-0.025532,-0.209518,-0.081786,-0.010243,-0.033582,-0.00732,0.043375,-0.110049,-0.008454,-0.201869,-0.137354,-0.225181,-0.191749,-0.026751,-0.141358,0.036608,-0.166742
precipitacion_boyaca,0.05121,0.00389,0.391595,0.264242,0.152109,0.330263,0.202172,1.0,0.360643,0.200954,0.469496,0.2135,0.074206,0.213239,0.266711,0.338042,0.097191,0.317155,0.329143,0.153622,0.276858,0.262295,0.204066,0.12761,0.108749,0.234871,0.241668,0.092542,0.444877,0.268873,0.392177,0.281573,-0.013271,0.202611,0.058891,-0.140925,0.058463,0.137253,-0.109702,-0.086422,-0.165788,-0.09803,-0.145009,-0.129683,-0.113991,-0.117167,-0.203134,-0.004545,-0.178077,-0.134366,-0.049687,-0.208743,-0.093938,-0.014822,-0.002302,-0.112497,0.064011,-0.069928,-0.004328,-0.161964,-0.152405,-0.165942,-0.14428,-0.114971,-0.181996,-0.034793,-0.07238
precipitacion_caldas,0.201223,0.029611,0.594696,0.109682,0.085969,0.352761,0.163448,0.360643,1.0,0.093907,0.079431,0.383858,0.116267,-0.050947,0.261277,0.334361,-0.000631,0.211038,0.399479,0.061261,0.36002,0.296932,0.396841,0.251389,0.048318,0.573429,0.527399,0.076655,0.505304,0.170035,0.602967,0.557396,0.164279,-0.03088,-0.08812,-0.338051,0.091871,-0.094561,-0.198313,-0.201634,-0.221452,-0.04913,-0.39228,-0.114163,-0.144841,-0.351597,-0.231205,-0.17012,-0.204761,-0.286337,-0.053682,-0.335226,-0.201808,-0.094616,-0.203066,0.005426,0.108946,-0.336116,0.027229,-0.43594,-0.397373,-0.383376,-0.170929,-0.278062,-0.387878,-0.07051,-0.035315
precipitacion_caqueta,0.051083,-0.022714,0.209265,0.209292,0.019529,0.104728,0.063806,0.200954,0.093907,1.0,0.229139,0.054377,0.085191,-0.036104,0.061298,0.147628,0.146477,0.22314,0.217711,-0.008876,0.110767,0.417098,0.149357,-0.000406,0.299014,0.024021,0.092406,0.127199,0.152929,0.066551,0.147462,0.055052,0.07925,0.118803,-0.023762,-0.023872,0.012223,0.01824,-0.05367,-0.117982,-0.081146,-0.093948,-0.048599,-0.298821,-0.139115,-0.078549,-0.066724,-0.103962,-0.080312,-0.13005,-0.060351,-0.249599,-0.2181,0.032954,0.092904,-0.182588,-0.056841,0.004882,-0.195364,-0.056612,-0.046891,-0.045017,-0.062502,-0.127519,-0.068021,-0.034095,-0.007909


Se considera una alta correlación entre variables a valores superiores a 0.5 o menores a -0.5, esto puede causar multicolinealidad y afectar la eficiencia de la predicción de los modelos, por lo tanto, se van a analizar las variables que tienen esta alta correlación para realizar una selección de variables inicial.


In [29]:
partial_correlation_df = calculate_partial_correlation_df(scaled_df_merged)
partial_correlation_df

Unnamed: 0,Variable,partial_correlation,abs_partial_correlation
0,brent_value,-0.201266,0.201266
1,precipitacion_amazonas,0.025642,0.025642
2,precipitacion_antioquia,-0.023005,0.023005
3,precipitacion_arauca,-0.016477,0.016477
4,precipitacion_atlantico,-0.042564,0.042564
...,...,...,...
62,temp_SUCRE,-0.012152,0.012152
63,temp_TOLIMA,-0.035211,0.035211
64,temp_VALLE DEL CAUCA,0.193226,0.193226
65,temp_VICHADA,-0.028915,0.028915


In [30]:
columns_to_keep, _ = apply_partial_correlation_criteria({ 
    "correlation_matrix": exogen_data_df_standardized.corr(),
    "partial_correlation_df": partial_correlation_df,
    "excluded": ["precipitacion_antioquia"]
})
columns_to_keep

['precipitacion_norte de santander',
 'precipitacion_antioquia',
 'precipitacion_boyaca',
 'precipitacion_bolivar',
 'precipitacion_arauca',
 'precipitacion_choco',
 'precipitacion_cauca',
 'precipitacion_guainia',
 'precipitacion_putumayo',
 'temp_BOYACA',
 'precipitacion_amazonas',
 'precipitacion_huila',
 'precipitacion_cundinamarca',
 'temp_AMAZONAS',
 'precipitacion_guaviare',
 'precipitacion_cordoba',
 'precipitacion_la guajira',
 'TRM',
 'precipitacion_cesar',
 'precipitacion_vaupes',
 'temp_ARAUCA',
 'precipitacion_casanare',
 'precipitacion_vichada',
 'precipitacion_quindio',
 'temp_CUNDINAMARCA',
 'brent_value',
 'precipitacion_atlantico',
 'precipitacion_magdalena',
 'temp_ARCHIPIELAGO DE SAN ANDRES PROVIDENCIA Y SANTA CATALINA',
 'precipitacion_meta',
 'temp_CAQUETA',
 'temp_NARINO',
 'precipitacion_narino',
 'precipitacion_caqueta',
 'precipitacion_bogota',
 'precipitacion_san andres providencia']

In [31]:
df_without_correlation = exogen_data_df[columns_to_keep]
new_correlation_matrix = df_without_correlation.corr()
print(f"Correlation Matrix Original: {new_correlation_matrix.shape}")
new_correlation_matrix.style.background_gradient(cmap="coolwarm")

Correlation Matrix Original: (36, 36)


Unnamed: 0,precipitacion_norte de santander,precipitacion_antioquia,precipitacion_boyaca,precipitacion_bolivar,precipitacion_arauca,precipitacion_choco,precipitacion_cauca,precipitacion_guainia,precipitacion_putumayo,temp_BOYACA,precipitacion_amazonas,precipitacion_huila,precipitacion_cundinamarca,temp_AMAZONAS,precipitacion_guaviare,precipitacion_cordoba,precipitacion_la guajira,TRM,precipitacion_cesar,precipitacion_vaupes,temp_ARAUCA,precipitacion_casanare,precipitacion_vichada,precipitacion_quindio,temp_CUNDINAMARCA,brent_value,precipitacion_atlantico,precipitacion_magdalena,temp_ARCHIPIELAGO DE SAN ANDRES PROVIDENCIA Y SANTA CATALINA,precipitacion_meta,temp_CAQUETA,temp_NARINO,precipitacion_narino,precipitacion_caqueta,precipitacion_bogota,precipitacion_san andres providencia
precipitacion_norte de santander,1.0,0.194504,0.12761,0.041052,-0.047415,0.027519,0.127603,-0.02539,0.0093,0.065491,0.046852,0.118526,0.098627,-0.011762,0.070243,0.286116,0.113244,-0.352825,0.23075,-0.009287,0.131767,-0.126998,-0.059519,0.28404,-0.222202,0.126432,0.053496,0.135904,-0.108762,0.055692,-0.004543,0.194092,0.302257,-0.000406,0.110124,-0.000136
precipitacion_antioquia,0.194504,1.0,0.391595,0.324677,0.314739,0.003961,0.335611,0.074976,0.083891,-0.101192,-0.000581,0.286523,0.29718,-0.087057,0.351991,0.455286,0.140865,-0.165865,0.137814,0.164454,0.180829,0.116232,0.026048,0.395891,-0.303399,0.329614,0.08488,0.4049,0.004907,0.345804,-0.220491,0.146918,0.351733,0.209265,0.270766,0.141533
precipitacion_boyaca,0.12761,0.391595,1.0,0.202172,0.264242,0.213239,0.2135,0.097191,0.108749,-0.09803,0.00389,0.329143,0.338042,0.058891,0.317155,0.266711,0.153622,-0.07238,0.074206,-0.013271,0.058463,0.469496,0.202611,0.234871,-0.134366,0.05121,0.152109,0.276858,0.137253,0.262295,-0.129683,0.064011,0.204066,0.200954,0.330263,0.092542
precipitacion_bolivar,0.041052,0.324677,0.202172,1.0,0.247388,-0.004278,0.109791,0.073121,-0.079292,-0.113995,-0.033369,0.018195,0.063309,0.007794,0.16794,0.338383,0.13825,-0.166742,0.14274,0.145462,0.158988,0.091918,0.047074,0.048677,-0.173457,0.191135,0.119168,0.235722,0.003807,0.089413,-0.07708,0.043375,0.090713,0.063806,0.074195,0.120488
precipitacion_arauca,-0.047415,0.314739,0.264242,0.247388,1.0,-0.01762,0.072115,0.107836,0.056463,-0.135892,-0.066088,0.025013,0.084851,0.038514,0.117684,0.275258,0.039089,-0.260864,0.061164,0.13098,0.380051,0.208583,0.287361,0.031036,-0.15883,0.034924,0.01771,0.109131,0.185561,0.247661,-0.2081,0.084866,-0.007031,0.209292,0.045715,0.142209
precipitacion_choco,0.027519,0.003961,0.213239,-0.004278,-0.01762,1.0,0.128701,-0.007055,0.134876,0.090258,0.083052,0.067838,0.103832,0.269572,0.17062,0.191771,-0.121592,-0.079713,-0.048245,0.26395,0.024089,0.198906,0.03296,0.057048,0.21066,-0.312554,0.085676,0.071512,0.120379,-0.136208,0.173642,0.196236,0.063042,-0.036104,-0.002043,0.059913
precipitacion_cauca,0.127603,0.335611,0.2135,0.109791,0.072115,0.128701,1.0,-0.070319,0.094615,0.067971,0.087176,0.430154,0.290584,-0.090115,0.291185,0.15927,0.097752,0.11887,0.018856,0.203663,0.227512,0.027785,-0.094415,0.354683,-0.182039,0.036725,0.056464,0.32063,-0.086548,0.137821,0.080221,0.175159,0.472071,0.054377,0.181343,0.017864
precipitacion_guainia,-0.02539,0.074976,0.097191,0.073121,0.107836,-0.007055,-0.070319,1.0,0.05507,-0.100795,-0.059546,0.060888,0.06534,-0.060151,0.114822,0.070223,0.00067,-0.082259,0.095596,0.040606,0.050947,0.141937,0.064548,-0.036821,-0.06462,0.134976,0.116575,0.037041,0.015437,0.134029,-0.15165,-0.018777,-0.005481,0.146477,0.059264,0.113545
precipitacion_putumayo,0.0093,0.083891,0.108749,-0.079292,0.056463,0.134876,0.094615,0.05507,1.0,0.040466,-0.000844,0.331619,0.08685,-0.054078,0.112979,-0.022697,-0.019715,0.035349,0.006276,0.13769,-0.04316,0.155397,0.111173,0.066086,-0.014973,-0.153138,0.01225,-0.035423,0.013209,0.16101,-0.176497,-0.145711,0.221635,0.299014,0.033521,0.062644
temp_BOYACA,0.065491,-0.101192,-0.09803,-0.113995,-0.135892,0.090258,0.067971,-0.100795,0.040466,1.0,0.051021,0.028838,0.008866,0.205191,-0.096949,-0.160228,-0.019203,0.190215,-0.053743,-0.005434,0.311354,-0.150789,-0.102908,0.057213,0.428593,-0.272161,-0.063107,-0.113297,0.067886,-0.179403,0.389437,0.437176,0.011222,-0.093948,-0.049279,-0.118994


In [32]:
# exogen_data["Date"] = df_merged["Date"]
# exogen_data["energy_price"] = df_merged["energy_price"]
# scaled_df.to_csv("processed_tables/merged_no_damns_standardized.csv", index=False)