# Preparación del modelo

## Importación de librerías

In [2]:
import glob, os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import rasterio as rio
import seaborn as sns

# Carga de datos

## Geodataframe

In [3]:
agebs = gpd.read_file('data/colima/agebs.gpkg')
agebs["area"] = agebs.area / 1000 # ha
agebs.drop(columns=["CVE_ENT","CVE_MUN", "CVE_AGEB","CVE_LOC","geometry"], inplace=True)
agebs = pd.DataFrame(agebs)

## Zonal Statistics

In [4]:
df0 = pd.read_csv('data/megaGDF.csv')
df0.drop(columns=["CVE_ENT","CVE_MUN","CVE_LOC", "_count","_sum"], inplace=True)
df0.head()

Unnamed: 0,CVEGEO,CVE_AGEB,_mean,class,time
0,600200010676,676,,Depth,0
1,600200010727,727,,Depth,0
2,600200011142,1142,,Depth,0
3,600200010411,411,,Depth,0
4,600200011068,1068,,Depth,0


In [5]:
df = df0.pivot_table(index=["CVEGEO",'CVE_AGEB',"time"], columns='class', values=["_mean"])
df.reset_index(inplace=True)
# remove double header
df.columns = df.columns.droplevel(0)
df.columns = ['CVEGEO', 'CVE_AGEB',"time", 'Depth', 'Rain', 'Velocity']
df

Unnamed: 0,CVEGEO,CVE_AGEB,time,Depth,Rain,Velocity
0,0600200010318,0318,0,,0.00,
1,0600200010318,0318,300,,0.00,
2,0600200010318,0318,600,,0.72,
3,0600200010318,0318,900,,0.72,
4,0600200010318,0318,1200,,1.38,
...,...,...,...,...,...,...
2123,0601000010362,0362,23100,0.016889,4.80,0.070683
2124,0601000010362,0362,23400,0.016851,8.76,0.070517
2125,0601000010362,0362,23700,0.016796,8.76,0.070501
2126,0601000010362,0362,24000,0.016748,7.80,0.070419


In [6]:
df = df.merge(agebs, on="CVEGEO")
df

Unnamed: 0,CVEGEO,CVE_AGEB,time,Depth,Rain,Velocity,area
0,0600200010318,0318,0,,0.00,,339.556144
1,0600200010318,0318,300,,0.00,,339.556144
2,0600200010318,0318,600,,0.72,,339.556144
3,0600200010318,0318,900,,0.72,,339.556144
4,0600200010318,0318,1200,,1.38,,339.556144
...,...,...,...,...,...,...,...
2123,0601000010362,0362,23100,0.016889,4.80,0.070683,227.156848
2124,0601000010362,0362,23400,0.016851,8.76,0.070517,227.156848
2125,0601000010362,0362,23700,0.016796,8.76,0.070501,227.156848
2126,0601000010362,0362,24000,0.016748,7.80,0.070419,227.156848


## Denue and INV

In [7]:
denue_inv = pd.read_csv('data/denue_inv.csv')
denue_inv.head()

Unnamed: 0,CVE_AGEB,POBTOT,POB0_14_P,P15A29A_P,P30A59A_P,GRAPROES,P_CD_P,VPH_C_EL_P,VPH_EXSA_P,VPH_DREN_P,...,PARATRAN_C,DRENAJEP_C,TRANSCOL_C,ARBOLES_C,ACESOAUT_C,PUESSEMI_C,PUESAMBU_C,escuela,farmacia,hospital
0,273,74,6.349231,7.081538,13.980769,3.403077,3.540769,30.769231,29.486923,400.0,...,39,39,38,31,39,39,39,1.0,0.0,0.0
1,318,1925,11.397551,17.56898,31.385102,10.333878,7.33,85.303878,85.473878,4188.22,...,143,128,115,81,144,146,143,13.0,6.0,0.0
2,322,2143,11.647736,17.74717,30.118113,11.442453,7.085472,88.486038,88.553396,4693.33,...,155,150,144,70,159,155,151,13.0,3.0,3.0
3,362,2342,18.422286,21.292,33.82,9.617714,5.100857,91.145714,91.380857,3198.33,...,103,102,97,45,105,102,102,4.0,1.0,1.0
4,411,1116,9.866053,17.035,33.167105,11.912895,4.348947,81.578947,81.578947,3100.0,...,113,112,109,66,47,113,111,12.0,2.0,0.0


## Join

In [8]:
df_full = df.merge(denue_inv, on="CVE_AGEB")
df_full

Unnamed: 0,CVEGEO,CVE_AGEB,time,Depth,Rain,Velocity,area,POBTOT,POB0_14_P,P15A29A_P,...,PARATRAN_C,DRENAJEP_C,TRANSCOL_C,ARBOLES_C,ACESOAUT_C,PUESSEMI_C,PUESAMBU_C,escuela,farmacia,hospital
0,0600200010318,0318,0,,0.00,,339.556144,1925,11.397551,17.56898,...,143,128,115,81,144,146,143,13.0,6.0,0.0
1,0600200010318,0318,300,,0.00,,339.556144,1925,11.397551,17.56898,...,143,128,115,81,144,146,143,13.0,6.0,0.0
2,0600200010318,0318,600,,0.72,,339.556144,1925,11.397551,17.56898,...,143,128,115,81,144,146,143,13.0,6.0,0.0
3,0600200010318,0318,900,,0.72,,339.556144,1925,11.397551,17.56898,...,143,128,115,81,144,146,143,13.0,6.0,0.0
4,0600200010318,0318,1200,,1.38,,339.556144,1925,11.397551,17.56898,...,143,128,115,81,144,146,143,13.0,6.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2123,0601000010362,0362,23100,0.016889,4.80,0.070683,227.156848,2342,18.422286,21.29200,...,103,102,97,45,105,102,102,4.0,1.0,1.0
2124,0601000010362,0362,23400,0.016851,8.76,0.070517,227.156848,2342,18.422286,21.29200,...,103,102,97,45,105,102,102,4.0,1.0,1.0
2125,0601000010362,0362,23700,0.016796,8.76,0.070501,227.156848,2342,18.422286,21.29200,...,103,102,97,45,105,102,102,4.0,1.0,1.0
2126,0601000010362,0362,24000,0.016748,7.80,0.070419,227.156848,2342,18.422286,21.29200,...,103,102,97,45,105,102,102,4.0,1.0,1.0


In [118]:
list_area = ["POBTOT",'RECUCALL_C', 'RAMPAS_C', 'PASOPEAT_C',
       'BANQUETA_C', 'CICLOVIA_C', 'CICLOCAR_C', 'ALUMPUB_C', 'SEMAAUDI_C',
       'PARATRAN_C', 'DRENAJEP_C', 'TRANSCOL_C', 'ARBOLES_C', 'ACESOAUT_C',
       'PUESSEMI_C', 'PUESAMBU_C', 'escuela', 'farmacia', 'hospital']

for i in list_area:
    df_full[i] = df_full[i] / df_full["area"]

In [9]:
df_full.head(2).to_csv('data/example.csv')

In [119]:
df_full.columns

Index(['CVEGEO', 'CVE_AGEB', 'time', 'Depth', 'Rain', 'Velocity', 'area',
       'POBTOT', 'POB0_14_P', 'P15A29A_P', 'P30A59A_P', 'GRAPROES', 'P_CD_P',
       'VPH_C_EL_P', 'VPH_EXSA_P', 'VPH_DREN_P', 'RECUCALL_C', 'RAMPAS_C',
       'PASOPEAT_C', 'BANQUETA_C', 'CICLOVIA_C', 'CICLOCAR_C', 'ALUMPUB_C',
       'SEMAAUDI_C', 'PARATRAN_C', 'DRENAJEP_C', 'TRANSCOL_C', 'ARBOLES_C',
       'ACESOAUT_C', 'PUESSEMI_C', 'PUESAMBU_C', 'escuela', 'farmacia',
       'hospital'],
      dtype='object')

In [120]:
df_full.replace([np.nan], 0, inplace=True)

df_full.dropna(subset="Depth",inplace=True)
df_full.drop(columns=["CVEGEO","area"], inplace=True)

In [121]:
df_full.describe()

Unnamed: 0,time,Depth,Rain,Velocity,POBTOT,POB0_14_P,P15A29A_P,P30A59A_P,GRAPROES,P_CD_P,...,PARATRAN_C,DRENAJEP_C,TRANSCOL_C,ARBOLES_C,ACESOAUT_C,PUESSEMI_C,PUESAMBU_C,escuela,farmacia,hospital
count,2128.0,2128.0,2128.0,2128.0,2128.0,2128.0,2128.0,2128.0,2128.0,2128.0,...,2128.0,2128.0,2128.0,2128.0,2128.0,2128.0,2128.0,2128.0,2128.0,2128.0
mean,12127.161654,0.187875,5.140376,0.527384,4.907563,13.715332,18.723482,33.262518,10.481927,5.76134,...,0.294701,0.280747,0.268686,0.155014,0.283031,0.29573,0.291821,0.0208,0.007161,0.002585
std,7089.632672,0.230293,7.006402,0.630019,2.463613,4.298687,2.71563,3.538077,1.518658,2.72043,...,0.098596,0.101653,0.096795,0.049552,0.110762,0.095936,0.096671,0.014787,0.007274,0.003263
min,0.0,0.0,0.0,0.0,0.763735,5.978372,13.757209,26.7248,7.758302,1.448605,...,0.155521,0.151374,0.140659,0.081039,0.07271,0.155521,0.155521,0.004147,0.0,0.0
25%,6000.0,0.0,1.14,0.0,2.961125,10.981154,17.035,30.118113,9.5696,2.937368,...,0.221345,0.197157,0.187864,0.109205,0.208822,0.226925,0.221345,0.008389,0.003668,0.0
50%,12000.0,0.094626,2.22,0.265657,4.742263,12.719286,19.053636,33.346176,10.210981,5.85,...,0.26076,0.251447,0.23515,0.155901,0.257202,0.26659,0.258606,0.015385,0.005178,0.002142
75%,18300.0,0.281485,7.2,0.912471,5.942048,15.677234,20.5275,35.534118,11.442453,7.507209,...,0.376356,0.376356,0.351083,0.1905,0.376356,0.376356,0.373989,0.031621,0.008164,0.004402
max,24300.0,1.005461,36.3,2.407312,10.310057,25.316415,25.398293,40.178511,13.536364,10.405106,...,0.486407,0.481543,0.452359,0.248068,0.496135,0.457223,0.476679,0.059733,0.032164,0.012701


# Modelo

In [122]:
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [129]:
df_full.columns

Index(['CVE_AGEB', 'time', 'Depth', 'Rain', 'Velocity', 'POBTOT', 'POB0_14_P',
       'P15A29A_P', 'P30A59A_P', 'GRAPROES', 'P_CD_P', 'VPH_C_EL_P',
       'VPH_EXSA_P', 'VPH_DREN_P', 'RECUCALL_C', 'RAMPAS_C', 'PASOPEAT_C',
       'BANQUETA_C', 'CICLOVIA_C', 'CICLOCAR_C', 'ALUMPUB_C', 'SEMAAUDI_C',
       'PARATRAN_C', 'DRENAJEP_C', 'TRANSCOL_C', 'ARBOLES_C', 'ACESOAUT_C',
       'PUESSEMI_C', 'PUESAMBU_C', 'escuela', 'farmacia', 'hospital'],
      dtype='object')

In [134]:
dataset = tf.data.Dataset.from_tensor_slices(({
    'time': df_full['time'].values,
    'Depth': df_full['Depth'].values,
    'Velocity': df_full['Velocity'].values,
    'Rain': df_full['Rain'].values,
    'densidad_poblacion': df_full['POBTOT'].values,
    'edad_0_14': df_full['POB0_14_P'].values,
    'edad_15_29': df_full['P15A29A_P'].values,
    'edad_30_60': df_full['P30A59A_P'].values,
    'escolaridad': df_full['GRAPROES'].values,
    'discapacidad': df_full['P_CD_P'].values,
    'vivienda_energia': df_full['VPH_C_EL_P'].values,
    'viviendas_sanitario': df_full['VPH_EXSA_P'].values,
    'viviendas_drenaje': df_full['VPH_DREN_P'].values,
    'pavimento_calles': df_full['RECUCALL_C'].values,
    'rampas_sillas': df_full['RAMPAS_C'].values,
    'paso_peatonal': df_full['PASOPEAT_C'].values,
    'banqueta': df_full['BANQUETA_C'].values,
    'ciclovia': df_full['CICLOVIA_C'].values,
    'ciclocarril': df_full['CICLOCAR_C'].values,
    'alumbrado_publico': df_full['ALUMPUB_C'].values,
    'semaforo_auditivo': df_full['SEMAAUDI_C'].values,
    'transporte_colectivo': df_full['PARATRAN_C'].values,
    'drenaje_pluvial': df_full['DRENAJEP_C'].values,
    'transporte_colectivo': df_full['BANQUETA_C'].values,
    'arboles': df_full['ARBOLES_C'].values,
    'acceso_automovil': df_full['ACESOAUT_C'].values,
    'comercio_fijo': df_full['PUESSEMI_C'].values,
    'comercio_ambulante': df_full['PUESAMBU_C'].values,
    'escuela': df_full['escuela'].values,
    'farmacia': df_full['farmacia'].values,
    'hospital': df_full['hospital'].values,
}, df_full['CVE_AGEB'].values))

In [137]:
# dividir el conjunto de datos en entrenamiento y prueba
train_size = int(0.7 * len(dataset))
val_size = len(dataset) - train_size

train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(train_size)

# definir el modelo
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(27,)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# compilar el modelo
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


In [136]:
# Entrenamiento del modelo
model.fit(train_dataset, epochs=10, validation_data=val_dataset)

# Evaluación del modelo
test_loss, test_accuracy = model.evaluate(test_dataset)

print('Loss del conjunto de prueba:', test_loss)
print('Precisión del conjunto de prueba:', test_accuracy)

NameError: name 'train_data' is not defined