In [1]:
# Установка библиотек
!pip install openpyxl
!pip install catboost
!pip install optuna

Collecting catboost
  Downloading catboost-1.2.7-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp311-cp311-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7
Collecting optuna
  Downloading optuna-4.2.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.1-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.8-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.2.0-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.4/383.4 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.14.1-py3-none-any.whl (233 kB)
[2

In [4]:
# импорт библиотек
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder
from catboost import CatBoostClassifier
import optuna

In [2]:
# Загрузка файла с компьютера
from google.colab import files
uploaded = files.upload()

# Чтение данных
df = pd.read_excel('WildFire_Prediction_Data_Set.xlsx', engine='openpyxl')

# Проверка
print(df.head())

Saving WildFire_Prediction_Data_Set.xlsx to WildFire_Prediction_Data_Set.xlsx
               NDVI,LST,BURNED_AREA,CLASS
0  0.506782,14584.272727,4.692308,no_fire
1        0.52215,14655.833333,5.0,no_fire
2               0.682284,14780.0,5.0,fire
3            0.120046,13298.5,3.5,no_fire
4            0.568734,14743.0,5.0,no_fire


In [5]:
# Загрузка и подготовка данных
df = pd.read_excel('WildFire_Prediction_Data_Set.xlsx')
df = df['NDVI,LST,BURNED_AREA,CLASS'].str.split(',', expand=True)
df.columns = ['NDVI', 'LST', 'BURNED_AREA', 'CLASS']

# Преобразование типов данных
df['NDVI'] = df['NDVI'].astype(float)
df['LST'] = df['LST'].astype(float)
df['BURNED_AREA'] = df['BURNED_AREA'].astype(float)
le = LabelEncoder()
df['CLASS'] = le.fit_transform(df['CLASS'])  # 1 - fire, 0 - no_fire

# Разделение данных
X = df[['NDVI', 'LST', 'BURNED_AREA']]
y = df['CLASS']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Функция для оптимизации гиперпараметров с Optuna
def objective(trial):
    params = {
        'iterations': trial.suggest_int('iterations', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.3),
        'depth': trial.suggest_int('depth', 4, 10),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-3, 10.0),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'random_strength': trial.suggest_float('random_strength', 1e-3, 10),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'auto_class_weights': 'Balanced',
        'verbose': False
    }

    model = CatBoostClassifier(**params)
    model.fit(X_train, y_train, eval_set=(X_test, y_test), early_stopping_rounds=50)
    preds = model.predict(X_test)
    f1 = f1_score(y_test, preds)
    return f1

# Оптимизация гиперпараметров
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# Обучение модели с лучшими параметрами
best_params = study.best_params
best_params['auto_class_weights'] = 'Balanced'
best_model = CatBoostClassifier(**best_params, verbose=False)
best_model.fit(X_train, y_train)

# Оценка модели
test_preds = best_model.predict(X_test)
print(f"Best trial F1-score: {f1_score(y_test, test_preds):.4f}")
print(f"Accuracy: {accuracy_score(y_test, test_preds):.4f}")

# Важность признаков
feature_importances = best_model.get_feature_importance()
for name, importance in zip(X.columns, feature_importances):
    print(f"{name}: {importance:.2f}%")

[I 2025-01-31 15:30:34,367] A new study created in memory with name: no-name-cde05f50-55c0-4d27-90b7-93f9de595ddb
[I 2025-01-31 15:30:35,102] Trial 0 finished with value: 0.8290766208251473 and parameters: {'iterations': 296, 'learning_rate': 0.07898244207533174, 'depth': 7, 'l2_leaf_reg': 9.943318727457973, 'border_count': 242, 'random_strength': 2.4103100896919467, 'bagging_temperature': 0.011653535450827701}. Best is trial 0 with value: 0.8290766208251473.
[I 2025-01-31 15:30:35,813] Trial 1 finished with value: 0.8072289156626506 and parameters: {'iterations': 272, 'learning_rate': 0.009425906713624699, 'depth': 8, 'l2_leaf_reg': 1.6576431262859341, 'border_count': 80, 'random_strength': 1.785518762063917, 'bagging_temperature': 0.3681914608877369}. Best is trial 0 with value: 0.8290766208251473.
[I 2025-01-31 15:30:36,248] Trial 2 finished with value: 0.8369351669941061 and parameters: {'iterations': 287, 'learning_rate': 0.1339882317035224, 'depth': 6, 'l2_leaf_reg': 1.7034755998

Best trial F1-score: 0.8942
Accuracy: 0.8309
NDVI: 35.88%
LST: 35.24%
BURNED_AREA: 28.88%
