In [1]:
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
import logging
from datetime import datetime
from tqdm import tqdm
from multiprocessing import Pool, freeze_support

In [2]:
class NNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, hidden_size_two, hidden_size_three, output_size):
        super(NNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size_two, bias=True)
        self.relu = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size_two, hidden_size_three, bias=True)
        self.fc4 = nn.Linear(hidden_size_three, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.fc4(x)
        return x


In [3]:
def load_dataset(dataset_filename):
    with open(dataset_filename, 'r') as file:
        dataset = json.load(file)
    return dataset

In [4]:
log_format = '%(asctime)s - %(levelname)s - %(message)s'

# Создаем объект логгера
logger = logging.getLogger('MLModule')

# Устанавливаем уровень логгирования для консольного логгера
console_handler = logging.StreamHandler()
console_handler.setLevel('INFO')
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)

# Устанавливаем уровень логгирования для файлового логгера
file_handler = logging.FileHandler(f"log-ml_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.log")
file_handler.setLevel('DEBUG')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)

logger.setLevel(logging.DEBUG)

In [5]:
# Гиперпараметры
nn_params = {
    'input_size': 8,
    'hidden_size': 8,
    'hidden_size_two': 8,
    'hidden_size_three': 8,
    'output_size': 1,
    'num_epochs': 1,
    'learning_rate': 0.001,
    'batch_size':  256
}

In [6]:
 # Данные по модели
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")
model_name = 'MODEL_V_6.pth'
nn_params = nn_params
model = NNetwork(nn_params['input_size'], nn_params['hidden_size'], nn_params['hidden_size_two'],
                            nn_params['hidden_size_three'], nn_params['output_size']).to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=nn_params['learning_rate'])
scaler_x = None
scaler_y = None

# Настройка датасета
dataset_filename = 'DATASET.json'

In [7]:
logger.info('Загрузка датасета ...')
dataset = load_dataset(dataset_filename)
logger.info('Загрузка датасета завершена!')

2023-12-13 22:39:22,217 - INFO - Загрузка датасета ...
2023-12-13 22:40:56,956 - INFO - Загрузка датасета завершена!


In [8]:
all_outer_values = []
all_central_values = []
logger.info(f'Разбитие на samples')

for item in tqdm(dataset):
    all_outer_values.append(item['input'])
    all_central_values.append(item['output'])

2023-12-13 22:40:56,976 - INFO - Разбитие на samples
100%|██████████| 31794210/31794210 [00:07<00:00, 4234267.58it/s]


In [9]:
# Предобработка данных
logger.info('Предобработка данных')

outer_x = np.array(all_outer_values, dtype=np.float32).T
central_y = np.array(all_central_values, dtype=np.float32)

outer_x = torch.tensor(all_outer_values, dtype=torch.float32)
central_y = torch.tensor(all_central_values, dtype=torch.float32)

X = outer_x.t()
y = central_y.view(-1, 1)

# Нормализация данных
logger.info('Нормализация данных')
scaler_x = StandardScaler()
scaler_y = StandardScaler()

# Разделение данных на обучающий и тестовый наборы
logger.info('Разбиение данных')
X_train, X_test, y_train, y_test = train_test_split(outer_x, central_y, test_size=0.2, random_state=42)

# Преобразование данных в тензоры PyTorch
if not torch.cuda.is_available():
    X_train_scaled = torch.tensor(scaler_x.fit_transform(X_train), dtype=torch.float32).to('cuda')
    y_train_scaled = torch.tensor(scaler_y.fit_transform(y_train.reshape(-1, 1)), dtype=torch.float32).to('cuda')

    X_test_scaled = torch.tensor(scaler_x.transform(X_test), dtype=torch.float32).to('cuda')
    y_test_scaled = torch.tensor(scaler_y.transform(y_test.reshape(-1, 1)), dtype=torch.float32).to('cuda')
else:
    X_train_scaled = torch.tensor(scaler_x.fit_transform(X_train), dtype=torch.float32)
    y_train_scaled = torch.tensor(scaler_y.fit_transform(y_train.reshape(-1, 1)), dtype=torch.float32)

    X_test_scaled = torch.tensor(scaler_x.transform(X_test), dtype=torch.float32)
    y_test_scaled = torch.tensor(scaler_y.transform(y_test.reshape(-1, 1)), dtype=torch.float32)

train_set = TensorDataset(X_train_scaled, y_train_scaled)
test_set = TensorDataset(X_test_scaled, y_test_scaled)

train_loader = DataLoader(train_set, batch_size=nn_params['batch_size'], shuffle=True)
test_loader = DataLoader(test_set, batch_size=nn_params['batch_size'], shuffle=True)

2023-12-13 22:41:04,508 - INFO - Предобработка данных
2023-12-13 22:41:27,751 - INFO - Нормализация данных
2023-12-13 22:41:27,752 - INFO - Разбиение данных


In [10]:
print("Доступность CUDA:", torch.cuda.is_available())
print("Количество доступных устройств:", torch.cuda.device_count())
print("Текущее устройство:", torch.cuda.current_device())

torch.cuda.empty_cache()
print(torch.cuda.memory_summary())

for name, param in model.named_parameters():
    print(f"Параметр {name} на устройстве {param.device}")

Доступность CUDA: True
Количество доступных устройств: 1
Текущее устройство: 0
|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |      0 B   |      0 B   |      0 B   |      0 B   |
|       from large pool |      0 B   |      0 B   |      0 B   |      0 B   |
|       from small pool |      0 B   |      0 B   |      0 B   |      0 B   |
|---------------------------------------------------------------------------|
| Active memory         |      0 B   |      0 B   |      0 B   |      0 B   |
|       from large pool |      0 B   |      0 B   |      0 B   |      0 B   |
|       from small pool |      0 B   |      0 B   |      0 B   

In [11]:
# Обучение
logger.info('Обучение сети')
with torch.autograd.profiler.profile(use_cuda=True) as prof:
    for epoch in range(nn_params['num_epochs']):
        for batch_inp, batch_out in tqdm(train_loader):
            model.train()

            outputs = model(batch_inp)
            loss = criterion(outputs, batch_out)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        logger.info(f'Epoch: {epoch}, Loss: {loss.item():.5f}')
print(prof)

2023-12-13 22:41:32,904 - INFO - Обучение сети
 18%|█▊        | 17905/99357 [01:38<07:09, 189.78it/s]

In [None]:
from torch.autograd import Variable

# Тестирование модели
logger.info(f'Тестирование ...')
with torch.autograd.profiler.profile(use_cuda=True) as prof:
    final_loss = []
    model.eval()
    for batch_inp, batch_out in tqdm(test_loader):
        outputs = model(batch_inp)
        loss = criterion(outputs, batch_out)
        final_loss.append(loss.item())
print(prof)
logger.info(f'Ошибка модели составляет: {np.mean(final_loss) * 100:5f} %\nТочность прогноза: {final_accuracy * 100 / len(test_loader.dataset): .2f}%')

2023-12-13 21:58:01,911 - INFO - Тестирование ...
2023-12-13 21:58:01,911 - INFO - Тестирование ...
2023-12-13 21:58:01,911 - INFO - Тестирование ...
100%|██████████| 4/4 [00:38<00:00,  9.64s/it]
2023-12-13 21:58:40,495 - INFO - Ошибка модели составляет: 0.285390 %
Точность прогноза:  0.00%
2023-12-13 21:58:40,495 - INFO - Ошибка модели составляет: 0.285390 %
Точность прогноза:  0.00%
2023-12-13 21:58:40,495 - INFO - Ошибка модели составляет: 0.285390 %
Точность прогноза:  0.00%


In [None]:
# Сохранение модели
logger.info(f'Модель обучена, сохраняю модель в файл {model_name + "_new"}')
checkpoint = {'model_state_dict': model.state_dict(),
                      'scaler_x_mean': scaler_x.mean_,
                      'scaler_x_scale': scaler_x.scale_,
                      'scaler_y_mean': scaler_y.mean_,
                      'scaler_y_scale': scaler_y.scale_, }

torch.save(checkpoint, model_name + "_new")