In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Cargar el dataset
file_path = './forestfires.csv'
df = pd.read_csv(file_path)

# Funciones de categorización
def categorize_ffmc(value):
    if value <= 30:
        return 'Bajo'
    elif value <= 60:
        return 'Moderado'
    elif value <= 80:
        return 'Alto'
    else:
        return 'Muy Alto'

def categorize_dmc(value):
    if value <= 10:
        return 'Bajo'
    elif value <= 20:
        return 'Moderado'
    elif value <= 30:
        return 'Alto'
    else:
        return 'Muy Alto'

def categorize_isi(value):
    if value <= 3:
        return 'Bajo'
    elif value <= 6:
        return 'Moderado'
    elif value <= 12:
        return 'Alto'
    else:
        return 'Muy Alto'

# Aplicar las funciones de categorización
df['FFMC_category'] = df['FFMC'].apply(categorize_ffmc)
df['DMC_category'] = df['DMC'].apply(categorize_dmc)
df['ISI_category'] = df['ISI'].apply(categorize_isi)

# Seleccionar características y la etiqueta (DC)
X = df.drop(columns=['DC', 'area'])  # Eliminamos DC del conjunto de características, así como el área quemada
y = df['DC']

# Identificar características numéricas y categóricas
numeric_features = ['X', 'Y', 'FFMC', 'DMC', 'ISI', 'temp', 'RH', 'wind', 'rain']
categorical_features = ['month', 'day', 'FFMC_category', 'DMC_category', 'ISI_category']

# Preprocesamiento: Estandarizar características numéricas y codificar categóricas
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

# Configuración del modelo (Regresión Lineal)
model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

# Implementar K-Fold Cross-Validation
k = 5  # Número de folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Realizar Cross-Validation y calcular R² para cada fold
cv_scores = cross_val_score(model_pipeline, X, y, cv=kf, scoring='r2')

# Mostrar resultados
print(f"R² en cada fold: {cv_scores}")
print(f"R² promedio: {cv_scores.mean()}")
print(f"Desviación estándar de R²: {cv_scores.std()}")

# Entrenar el modelo en todos los datos
model_pipeline.fit(X, y)

# Guardar el modelo entrenado
joblib.dump(model_pipeline, 'dc_prediction_model.pkl')

# Cargar el modelo y hacer una predicción con nuevos datos
model_loaded = joblib.load('dc_prediction_model.pkl')

# Ejemplo de nuevos datos
new_data = pd.DataFrame({
    'X': [7],
    'Y': [5],
    'FFMC': [86.2],
    'DMC': [26.2],
    'ISI': [5.1],
    'temp': [22.0],
    'RH': [45],
    'wind': [3.1],
    'rain': [0.0],
    'month': ['mar'],
    'day': ['fri'],
    'FFMC_category': ['Muy Alto'],
    'DMC_category': ['Alto'],
    'ISI_category': ['Moderado']
})

# Realizar la predicción
prediction = model_loaded.predict(new_data)
print(f"Predicción del DC: {prediction[0]}")




R² en cada fold: [0.96764548        nan 0.9437327         nan        nan]
R² promedio: nan
Desviación estándar de R²: nan
Predicción del DC: 95.4773137918273


Traceback (most recent call last):
  File "/home/mcabre/.local/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 139, in __call__
    score = scorer._score(
  File "/home/mcabre/.local/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 371, in _score
    y_pred = method_caller(
  File "/home/mcabre/.local/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 89, in _cached_call
    result, _ = _get_response_values(
  File "/home/mcabre/.local/lib/python3.10/site-packages/sklearn/utils/_response.py", line 239, in _get_response_values
    y_pred, pos_label = prediction_method(X), None
  File "/home/mcabre/.local/lib/python3.10/site-packages/sklearn/pipeline.py", line 603, in predict
    Xt = transform.transform(Xt)
  File "/home/mcabre/.local/lib/python3.10/site-packages/sklearn/utils/_set_output.py", line 313, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
  File "/home/mcabre/.local/lib/python3.10/site-packages/sklearn/compose/_column_transf