In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import joblib

pd.options.display.max_colwidth = None
pd.options.display.max_rows = 150


df = pd.read_csv("Datasets/df.csv") 

In [2]:
df = df.loc[(df['X'] != 0) & (df['Y'] != 0)]

In [3]:
df['F_DETEC'] = pd.to_datetime(df['F_DETEC'], format='%d/%m/%Y')

In [4]:
df = df[(df['F_DETEC'].dt.year >= 2012) & (df['F_DETEC'].dt.year <= 2015)]

Preprocesamiento

In [None]:
mediana_d_viento = df['D_VIENTO'][df['D_VIENTO'] != 0].median()
df['D_VIENTO'] = df['D_VIENTO'].replace(0, mediana_d_viento)

moda = df["IND_PE_TXT"].mode()[0]
df["IND_PE_TXT"] = df["IND_PE_TXT"].fillna(moda)

mediana_h_relat = df['H_RELAT'][df['H_RELAT'] != 0].median()
df['H_RELAT'] = df['H_RELAT'].replace(0, mediana_h_relat)




In [6]:
# Función para asignar la estación
def asignar_estacion(fecha):
    dia = fecha.day
    mes = fecha.month
    
    if (mes == 12 and dia >= 21) or (mes in [1, 2]) or (mes == 3 and dia < 21):
        return 'Invierno'
    elif (mes == 3 and dia >= 21) or (mes in [4, 5]) or (mes == 6 and dia < 21):
        return 'Primavera'
    elif (mes == 6 and dia >= 21) or (mes in [7, 8]) or (mes == 9 and dia < 21):
        return 'Verano'
    else:
        return 'Otoño'

# Crear la columna con las estaciones
df['ESTACION'] = df['F_DETEC'].apply(asignar_estacion)

In [7]:
df['AVIANFUSO_BINARY'] = df['AVIANFNUM'].apply(lambda x: 0 if x == 0 else 1)

In [8]:
predictors = ['D_VIENTO', 'V_VIENTO', 'IND_PE_TXT', 'H_RELAT', 'DULLUVIA', 'X', 'Y',
       'ESTACION']

In [9]:
# Identificamos las variables categóricas y numéricas.
categorical_features = ['IND_PE_TXT', 'ESTACION']
numeric_features = [col for col in predictors if col not in categorical_features]

# Creamos el transformador para procesar ambas partes
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ]
)

# Aviones_descarga

In [10]:
target = "AVIANFUSO_BINARY"


X = df[predictors]
X_transformed = preprocessor.fit_transform(X)
y = df[target]    



# Train the model as before
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.2, random_state=42, stratify=y)

scale_pos_weight = (len(y_train) - sum(y_train)) / sum(y_train)
model_aviones_descarga = xgb.XGBClassifier(use_label_encoder=False, 
                          eval_metric='logloss', 
                          n_estimators=100,
                          enable_categorical=True,  # Now disable categorical mode
                          scale_pos_weight=scale_pos_weight,
                          learning_rate=0.0001,
                          max_depth=6,
                          random_state=42)

model_aviones_descarga.fit(X_train, y_train)

Parameters: { "use_label_encoder" } are not used.



In [11]:
# Guardar el modelo entrenado
joblib.dump(model_aviones_descarga, "Modelos/modelo_incendios_aviones_descarga.pkl")


['Modelos/modelo_incendios_aviones_descarga.pkl']

In [12]:
df[predictors].head()

Unnamed: 0,D_VIENTO,V_VIENTO,IND_PE_TXT,H_RELAT,DULLUVIA,X,Y,ESTACION
2,270,18,Alarma,33,3,682259.0,4389508.0,Otoño
7,80,15,Prealerta,57,9,712201.0,4427566.0,Primavera
10,240,50,Alarma extrema,55,8,676347.0,4348633.0,Verano
13,230,20,Alarma extrema,28,10,690455.0,4403401.0,Verano
1973,200,0,Prealerta,54,9,706892.0,4289864.0,Primavera


# Heli-transporte

In [13]:
df["HELTRANUM"].unique()

array([12,  0, 27,  1,  6,  2,  5,  4,  3, 13, 22,  7,  9, 10, 20],
      dtype=int64)

In [14]:
df['HELTRANUM_USO'] = df['HELTRANUM'].apply(lambda x: 0 if x == 0 else 1)

In [15]:
target = "HELTRANUM_USO"


X = df[predictors]
X_transformed = preprocessor.fit_transform(X)
y = df[target]    



# Train the model as before
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.2, random_state=42, stratify=y)

scale_pos_weight = (len(y_train) - sum(y_train)) / sum(y_train)
model_helicopteros_transporte = xgb.XGBClassifier(use_label_encoder=False, 
                          eval_metric='logloss', 
                          n_estimators=100,
                          enable_categorical=True,  # Now disable categorical mode
                          scale_pos_weight=scale_pos_weight,
                          learning_rate=0.0001,
                          max_depth=6,
                          random_state=42)

model_helicopteros_transporte.fit(X_train, y_train)

Parameters: { "use_label_encoder" } are not used.



In [16]:
# Guardar el modelo entrenado
joblib.dump(model_helicopteros_transporte, "Modelos/modelo_incendios_helicopteros_transporte.pkl")

['Modelos/modelo_incendios_helicopteros_transporte.pkl']

# Bulldozer


In [17]:
df["BULDOZZER"].value_counts()

BULDOZZER
0     1626
1        3
25       2
5        1
10       1
Name: count, dtype: int64

In [18]:
df['BULDOZZER_USO'] = df['BULDOZZER'].apply(lambda x: 0 if x == 0 else 1)

In [19]:
target = "BULDOZZER_USO"


X = df[predictors]
X_transformed = preprocessor.fit_transform(X)
y = df[target]    



# Train the model as before
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.2, random_state=42, stratify=y)

scale_pos_weight = (len(y_train) - sum(y_train)) / sum(y_train)
model_bulldozer = xgb.XGBClassifier(use_label_encoder=False, 
                          eval_metric='logloss', 
                          n_estimators=100,
                          enable_categorical=True,  # Now disable categorical mode
                          scale_pos_weight=scale_pos_weight,
                          learning_rate=0.0001,
                          max_depth=6,
                          random_state=42)

model_bulldozer.fit(X_train, y_train)

Parameters: { "use_label_encoder" } are not used.



In [20]:
# Guardar el modelo entrenado
joblib.dump(model_bulldozer, "Modelos/modelo_incendios_bulldozer.pkl")

['Modelos/modelo_incendios_bulldozer.pkl']

# Aviones terrestres

In [21]:
df["AVICARNUM"].value_counts()

AVICARNUM
0     1235
1      220
2      119
4       20
3       16
5        7
6        4
9        2
7        2
8        2
18       1
38       1
15       1
16       1
10       1
14       1
Name: count, dtype: int64

In [22]:
df['AVICARNUM_USO'] = df['AVICARNUM'].apply(lambda x: 0 if x == 0 else 1)

In [23]:
target = "AVICARNUM_USO"


X = df[predictors]
X_transformed = preprocessor.fit_transform(X)
y = df[target]    



# Train the model as before
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.2, random_state=42, stratify=y)

scale_pos_weight = (len(y_train) - sum(y_train)) / sum(y_train)
model_aviones_terrestres = xgb.XGBClassifier(use_label_encoder=False, 
                          eval_metric='logloss', 
                          n_estimators=100,
                          enable_categorical=True,  # Now disable categorical mode
                          scale_pos_weight=scale_pos_weight,
                          learning_rate=0.0001,
                          max_depth=6,
                          random_state=42)

model_aviones_terrestres.fit(X_train, y_train)

Parameters: { "use_label_encoder" } are not used.



In [24]:
# Guardar el modelo entrenado
joblib.dump(model_aviones_terrestres, "Modelos/modelo_incendios_aviones_terrestres.pkl")

['Modelos/modelo_incendios_aviones_terrestres.pkl']

# Autobombas

In [25]:
df["AUTOBOMBA"].value_counts()

AUTOBOMBA
1     500
2     413
3     247
0     160
4     119
5      60
6      30
7      28
8      10
12      8
10      6
9       6
30      4
21      4
15      4
13      4
11      4
99      3
17      2
16      2
22      2
14      2
25      2
18      2
40      1
49      1
20      1
24      1
28      1
55      1
23      1
36      1
54      1
19      1
26      1
Name: count, dtype: int64

In [26]:
df['AUTOBOMBA_USO'] = df['AUTOBOMBA'].apply(lambda x: 0 if x == 0 else 1)

In [27]:
target = "AUTOBOMBA_USO"


X = df[predictors]
X_transformed = preprocessor.fit_transform(X)
y = df[target]    



# Train the model as before
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.2, random_state=42, stratify=y)

scale_pos_weight = (len(y_train) - sum(y_train)) / sum(y_train)
model_autobombas = xgb.XGBClassifier(use_label_encoder=False, 
                          eval_metric='logloss', 
                          n_estimators=100,
                          enable_categorical=True,  # Now disable categorical mode
                          scale_pos_weight=scale_pos_weight,
                          learning_rate=0.0001,
                          max_depth=6,
                          random_state=42)

model_autobombas.fit(X_train, y_train)

Parameters: { "use_label_encoder" } are not used.



In [28]:
joblib.dump(model_autobombas, "Modelos/modelo_incendios_autobombas.pkl")

['Modelos/modelo_incendios_autobombas.pkl']