# Consolidación de la base de datos meteorológica con todas las fuentes disponibles 

## Metodología de la consolidación

Que vamos a hacer?
1. Descargar los 8 csv de AMSC, uno por cada atributo o varibles meteorológica
2. Consolidar en un mismo .csv, para que quede en una misma serie de tiempo |Fecha|Variables|
3. Reemuestreo de la BD a frecuencia por semana epidemiológica 
4. Preprocesamiento 

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

## Datos satélitales

Importación de datos de la NASA POWER

In [12]:
# Importar datos
df_nasa=pd.read_csv(r"C:\Users\usuario1\OneDrive - Universidad de Antioquia\UNIVERSIDAD DE ANTIOQUIA\Proyecto SAT Dengue\Bases de datos\Datos meteorológicos\NASA POWER\Datos_NS_2021-2024.csv", sep=';')
df_nasa

Unnamed: 0,YEAR,DOY,T2M,T2M_MAX,T2M_MIN,QV2M,RH2M,PRECTOTCORR,WS2M,WS2M_MAX,WS2M_MIN,WD2M,UV
0,2021,1,28.38,34.96,23.58,16.62,71.66,3.59,0.14,0.24,0.02,355.4,2.48
1,2021,2,27.44,33.39,23.64,17.98,80.54,12.12,0.10,0.23,0.04,294.9,2.18
2,2021,3,28.64,35.09,23.84,17.89,75.41,4.03,0.14,0.32,0.07,332.0,2.48
3,2021,4,28.48,35.05,23.69,15.91,68.69,0.70,0.14,0.25,0.07,329.4,2.47
4,2021,5,27.84,34.96,22.71,15.16,68.47,0.62,0.16,0.34,0.03,310.1,2.30
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1456,2024,362,25.83,28.78,23.57,18.48,88.70,2.16,0.19,0.46,0.04,300.9,1.79
1457,2024,363,26.05,29.56,23.60,19.30,91.31,9.07,0.14,0.31,0.06,321.3,1.51
1458,2024,364,24.92,26.19,23.93,19.24,96.95,8.35,0.15,0.38,0.03,101.8,1.70
1459,2024,365,25.11,27.89,23.30,18.50,92.34,1.89,0.13,0.36,0.01,250.0,1.65


## Datos locales

Importación de los datos de AMSC


In [3]:
direccion_viento=pd.read_csv(r"C:\Users\usuario1\OneDrive - Universidad de Antioquia\UNIVERSIDAD DE ANTIOQUIA\Proyecto SAT Dengue\Bases de datos\Datos meteorológicos\AMSC\AMSC_direccion_viento.csv", sep=';')
humedad=pd.read_csv(r"C:\Users\usuario1\OneDrive - Universidad de Antioquia\UNIVERSIDAD DE ANTIOQUIA\Proyecto SAT Dengue\Bases de datos\Datos meteorológicos\AMSC\AMSC_humedad.csv", sep=';')
precipitacion=pd.read_csv(r"C:\Users\usuario1\OneDrive - Universidad de Antioquia\UNIVERSIDAD DE ANTIOQUIA\Proyecto SAT Dengue\Bases de datos\Datos meteorológicos\AMSC\AMSC_precipitacion.csv", sep=';')
presion=pd.read_csv(r"C:\Users\usuario1\OneDrive - Universidad de Antioquia\UNIVERSIDAD DE ANTIOQUIA\Proyecto SAT Dengue\Bases de datos\Datos meteorológicos\AMSC\AMSC_presion.csv", sep=';')
radiacion=pd.read_csv(r"C:\Users\usuario1\OneDrive - Universidad de Antioquia\UNIVERSIDAD DE ANTIOQUIA\Proyecto SAT Dengue\Bases de datos\Datos meteorológicos\AMSC\AMSC_radiacion.csv", sep=';')
temperatura=pd.read_csv(r"C:\Users\usuario1\OneDrive - Universidad de Antioquia\UNIVERSIDAD DE ANTIOQUIA\Proyecto SAT Dengue\Bases de datos\Datos meteorológicos\AMSC\AMSC_temperatura.csv", sep=';')
uv=pd.read_csv(r"C:\Users\usuario1\OneDrive - Universidad de Antioquia\UNIVERSIDAD DE ANTIOQUIA\Proyecto SAT Dengue\Bases de datos\Datos meteorológicos\AMSC\AMSC_uv.csv", sep=';')
velocidad_viento=pd.read_csv(r"C:\Users\usuario1\OneDrive - Universidad de Antioquia\UNIVERSIDAD DE ANTIOQUIA\Proyecto SAT Dengue\Bases de datos\Datos meteorológicos\AMSC\AMSC_velocidad_viento.csv", sep=';')

  precipitacion=pd.read_csv(r"C:\Users\usuario1\OneDrive - Universidad de Antioquia\UNIVERSIDAD DE ANTIOQUIA\Proyecto SAT Dengue\Bases de datos\Datos meteorológicos\AMSC\AMSC_precipitacion.csv", sep=';')


In [4]:
import pandas as pd
from functools import reduce

# Diccionario de tus dataframes y los nombres que quieres darles
dataframes = {
    "direccion_viento": direccion_viento,
    "humedad": humedad,
    "precipitacion": precipitacion,
    "presion": presion,
    "radiacion": radiacion,
    "temperatura": temperatura,
    "uv": uv,
    "velocidad_viento": velocidad_viento
}

# Renombrar columnas de forma segura
for name, df in dataframes.items():
    cols = df.columns.tolist()
    if len(cols) > 1:
        df.rename(columns={cols[1]: name}, inplace=True)
    else:
        print(f"⚠️ El archivo {name} solo tiene la columna: {cols}. No se pudo renombrar la variable.")

# Unir todos los dataframes por la columna 'fecha'
df_final = reduce(lambda left, right: pd.merge(left, right, on="fecha", how="outer"), dataframes.values())

# Convertir 'fecha' a tipo datetime
df_final["fecha"] = pd.to_datetime(df_final["fecha"], errors="coerce")

# Ordenar por fecha ascendente
df_final = df_final.sort_values(by="fecha", ascending=True).reset_index(drop=True)


In [11]:
# Eliminar filas con al menos un dato faltante (NaN)
df_amsc = df_final.dropna()

# Revisar resultado
df_amsc.head(10)


Unnamed: 0,fecha,direccion_viento,humedad,precipitacion,presion,radiacion,temperatura,uv,velocidad_viento
1440,2022-01-12 00:00:00,257.0,94.0,0.0,100.992.279.147,0.0,24.277.777.777.777.700,255.0,1.0
1441,2022-01-12 00:01:00,327.0,94.0,0.0,100.985.506.369,0.0,2.433.333.333.333.330,255.0,0.0
1442,2022-01-12 00:02:00,327.0,94.0,0.0,100.985.506.369,0.0,2.433.333.333.333.330,255.0,0.0
1443,2022-01-12 00:03:00,327.0,94.0,0.0,100.978.733.591,0.0,2.433.333.333.333.330,255.0,0.0
1444,2022-01-12 00:04:00,360.0,94.0,0.0,100.971.960.813,0.0,2.433.333.333.333.330,255.0,1.0
1445,2022-01-12 00:05:00,360.0,94.0,0.0,100.978.733.591,0.0,2.433.333.333.333.330,255.0,1.0
1446,2022-01-12 00:06:00,40.0,94.0,0.0,100.978.733.591,0.0,2.433.333.333.333.330,255.0,1.0
1447,2022-01-12 00:07:00,59.0,94.0,0.0,10.098.211.998,0.0,2.433.333.333.333.330,255.0,0.0
1448,2022-01-12 00:08:00,60.0,94.0,0.0,10.098.211.998,0.0,2.433.333.333.333.330,255.0,1.0
1449,2022-01-12 00:09:00,81.0,94.0,0.0,100.985.506.369,0.0,2.433.333.333.333.330,255.0,0.0


In [6]:
#guardar df final csv
df_final_sin_na.to_csv(r"C:\Users\usuario1\OneDrive - Universidad de Antioquia\UNIVERSIDAD DE ANTIOQUIA\Proyecto SAT Dengue\Bases de datos\Datos meteorológicos\Datos_meteorologicos_completos_2021-2024.csv", index=False)

In [13]:
# Renombrar columnas del dataframe nasa
df_nasa.rename(columns={
    'T2M': 'TEMP (nasa)',
    'T2M_MAX': 'TEMP_MAX (nasa)',
    'T2M_MIN': 'TEMP_MIN (nasa)',
    'QV2M': 'HUMESP (nasa)',
    'RH2M': 'HUMREL (nasa)',
    'PRECTOTCORR': 'PREC (nasa)',
    'WS2M': 'VELV (nasa)',
    'WS2M_MAX': 'VELV_MAX (nasa)',
    'WS2M_MIN': 'VELV_MIN (nasa)',
    'WD2M': 'DIRV (nasa)',
    'UV': 'UV (nasa)'
}, inplace=True)
df_nasa

Unnamed: 0,YEAR,DOY,TEMP (nasa),TEMP_MAX (nasa),TEMP_MIN (nasa),HUMESP (nasa),HUMREL (nasa),PREC (nasa),VELV (nasa),VELV_MAX (nasa),VELV_MIN (nasa),DIRV (nasa),UV (nasa)
0,2021,1,28.38,34.96,23.58,16.62,71.66,3.59,0.14,0.24,0.02,355.4,2.48
1,2021,2,27.44,33.39,23.64,17.98,80.54,12.12,0.10,0.23,0.04,294.9,2.18
2,2021,3,28.64,35.09,23.84,17.89,75.41,4.03,0.14,0.32,0.07,332.0,2.48
3,2021,4,28.48,35.05,23.69,15.91,68.69,0.70,0.14,0.25,0.07,329.4,2.47
4,2021,5,27.84,34.96,22.71,15.16,68.47,0.62,0.16,0.34,0.03,310.1,2.30
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1456,2024,362,25.83,28.78,23.57,18.48,88.70,2.16,0.19,0.46,0.04,300.9,1.79
1457,2024,363,26.05,29.56,23.60,19.30,91.31,9.07,0.14,0.31,0.06,321.3,1.51
1458,2024,364,24.92,26.19,23.93,19.24,96.95,8.35,0.15,0.38,0.03,101.8,1.70
1459,2024,365,25.11,27.89,23.30,18.50,92.34,1.89,0.13,0.36,0.01,250.0,1.65


In [14]:
# renombrar columnas del df final_sin_na
df_amsc.rename(columns={
    'temperatura': 'TEMP (amsc)',
    'humedad': 'HUMESP (amsc)',
    'precipitacion': 'PREC (amsc)',
    'velocidad_viento': 'VELV (amsc)',
    'direccion_viento': 'DIRV (amsc)',
    'presion': 'PRES (amsc)',
    'radiacion': 'RAD (amsc)',
    'uv': 'UV (amsc)'
}, inplace=True)
df_amsc 


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_amsc.rename(columns={


Unnamed: 0,fecha,DIRV (amsc),HUMESP (amsc),PREC (amsc),PRES (amsc),RAD (amsc),TEMP (amsc),UV (amsc),VELV (amsc)
1440,2022-01-12 00:00:00,257.0,94.0,0.0,100.992.279.147,0.0,24.277.777.777.777.700,255.0,1.0
1441,2022-01-12 00:01:00,327.0,94.0,0.0,100.985.506.369,0.0,2.433.333.333.333.330,255.0,0.0
1442,2022-01-12 00:02:00,327.0,94.0,0.0,100.985.506.369,0.0,2.433.333.333.333.330,255.0,0.0
1443,2022-01-12 00:03:00,327.0,94.0,0.0,100.978.733.591,0.0,2.433.333.333.333.330,255.0,0.0
1444,2022-01-12 00:04:00,360.0,94.0,0.0,100.971.960.813,0.0,2.433.333.333.333.330,255.0,1.0
...,...,...,...,...,...,...,...,...,...
407770,2024-12-07 23:55:00,69.0,89.0,0.0,101.131.121.096,0.0,2.777.777.777.777.770,255.0,2.0
407771,2024-12-07 23:56:00,55.0,89.0,0.0,101.124.348.318,0.0,2.777.777.777.777.770,255.0,1.0
407772,2024-12-07 23:57:00,80.0,88.0,0.0,101.120.961.929,0.0,2.777.777.777.777.770,255.0,4.0
407773,2024-12-07 23:58:00,95.0,88.0,0.0,10.111.757.554,0.0,2.777.777.777.777.770,255.0,1.0


Empezamos a hacer el reemuestreo 