In [108]:
import mlflow.pytorch
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
import yaml

from model import SimpleNN

In [109]:
def load_config(config_path: str):
    with open(config_path, 'r') as file:
        config = yaml.safe_load(file)
    return config

In [110]:
config = load_config('config.yaml')

In [111]:
# Load the data
tree_census = pd.read_csv('ready.csv')
tree_census.head(3)

Unnamed: 0,tree_dbh,stump_diam,curb_loc,status,health,spc_latin,spc_common,steward,sidewalk,user_type,...,root_grate,root_other,trunk_wire,trnk_light,trnk_other,latitude,longitude,brch_light,brch_shoe,brch_other
0,3,0,1,0,0,11,104,3,1,1,...,0,0,0,0,0,40.723092,-73.844215,0,0,0
1,21,0,1,0,0,108,97,3,0,1,...,0,0,0,0,0,40.794111,-73.818679,0,0,0
2,3,0,1,0,1,57,83,0,0,2,...,0,0,0,0,0,40.717581,-73.936608,0,0,0


In [112]:
X = tree_census.drop(['health'], axis=1)
y = tree_census[['health']]

In [113]:
X

Unnamed: 0,tree_dbh,stump_diam,curb_loc,status,spc_latin,spc_common,steward,sidewalk,user_type,problems,...,root_grate,root_other,trunk_wire,trnk_light,trnk_other,latitude,longitude,brch_light,brch_shoe,brch_other
0,3,0,1,0,11,104,3,1,1,212,...,0,0,0,0,0,40.723092,-73.844215,0,0,0
1,21,0,1,0,108,97,3,0,1,82,...,0,0,0,0,0,40.794111,-73.818679,0,0,0
2,3,0,1,0,57,83,0,0,2,212,...,0,0,0,0,0,40.717581,-73.936608,0,0,0
3,10,0,1,0,57,83,3,0,2,82,...,0,0,0,0,0,40.713537,-73.934456,0,0,0
4,21,0,1,0,124,6,3,0,2,82,...,0,0,0,0,0,40.666778,-73.975979,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
652167,25,0,1,0,108,97,3,0,2,212,...,0,0,0,0,0,40.713211,-73.954944,0,0,0
652168,7,0,1,0,41,30,0,1,2,212,...,0,0,0,0,0,40.715194,-73.856650,0,0,0
652169,12,0,1,0,11,104,3,0,2,212,...,0,0,0,0,0,40.620762,-74.136517,0,0,0
652170,9,0,1,0,11,104,3,1,1,212,...,0,0,0,0,0,40.850828,-73.903115,0,0,0


In [114]:
y

Unnamed: 0,health
0,0
1,0
2,1
3,1
4,1
...,...
652167,1
652168,1
652169,1
652170,1


In [115]:
tree_census_scaler = StandardScaler()
X = tree_census_scaler.fit_transform(X)

In [116]:
train_loader = DataLoader(
    TensorDataset(
        torch.tensor(X).float(),
        torch.tensor(y.values).long()
    ),
    batch_size=1024,
    shuffle=True
)

In [117]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

input_size = X.shape[1]
model = SimpleNN(input_size).to(device)

criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.00001, momentum=0.9)

mlflow.set_experiment("Tree Health Classification")

with mlflow.start_run():
    # Логирование гиперпараметров
    mlflow.log_param("input_size", config['input_size'])
    mlflow.log_param("num_classes", config['num_classes'])
    mlflow.log_param("learning_rate", config['learning_rate'])
    mlflow.log_param("momentum", config['momentum'])
    mlflow.log_param("weight_decay", config['weight_decay'])
    mlflow.log_param("batch_size", config['batch_size'])
    mlflow.log_param("num_epochs", config['num_epochs'])
    mlflow.log_param("dropout_rate", config['dropout_rate'])

    # Обучение модели
    for epoch in range(config['num_epochs']):
        epoch_loss = 0
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            # Прямое распространение
            outputs = model(inputs)

            # Убираем лишнюю размерность из labels, если она есть
            if labels.dim() == 2 and labels.shape[1] == 1:
                labels = labels.squeeze(1)

            # Вычисление потерь
            loss = criterion(outputs, labels)

            # Обратное распространение и оптимизация
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        avg_loss = epoch_loss / len(train_loader)
        print(f'Epoch [{epoch + 1}/{config["num_epochs"]}], Loss: {avg_loss:.4f}')

        # Логирование метрик
        mlflow.log_metric("loss", avg_loss, step=epoch)

        # Сохранение модели на каждом шаге
        torch.save(model.state_dict(), config['model_path'])
        mlflow.log_artifact(config['model_path'])

    # Логирование модели
    mlflow.pytorch.log_model(model, "model")

Using device: cuda
Epoch [1/100], Loss: 1.0780
Epoch [2/100], Loss: 1.0347
Epoch [3/100], Loss: 0.9948
Epoch [4/100], Loss: 0.9579
Epoch [5/100], Loss: 0.9235
Epoch [6/100], Loss: 0.8919
Epoch [7/100], Loss: 0.8624
Epoch [8/100], Loss: 0.8357
Epoch [9/100], Loss: 0.8112
Epoch [10/100], Loss: 0.7888
Epoch [11/100], Loss: 0.7688
Epoch [12/100], Loss: 0.7507
Epoch [13/100], Loss: 0.7349
Epoch [14/100], Loss: 0.7209
Epoch [15/100], Loss: 0.7087
Epoch [16/100], Loss: 0.6978
Epoch [17/100], Loss: 0.6883
Epoch [18/100], Loss: 0.6802
Epoch [19/100], Loss: 0.6732
Epoch [20/100], Loss: 0.6669
Epoch [21/100], Loss: 0.6614
Epoch [22/100], Loss: 0.6565
Epoch [23/100], Loss: 0.6521
Epoch [24/100], Loss: 0.6483
Epoch [25/100], Loss: 0.6452
Epoch [26/100], Loss: 0.6421
Epoch [27/100], Loss: 0.6394
Epoch [28/100], Loss: 0.6370
Epoch [29/100], Loss: 0.6348
Epoch [30/100], Loss: 0.6323
Epoch [31/100], Loss: 0.6305
Epoch [32/100], Loss: 0.6286
Epoch [33/100], Loss: 0.6270
Epoch [34/100], Loss: 0.6257
Epoc



Epoch [100/100], Loss: 0.5859


