In [1]:
import pandas as pd
import numpy as np
import random
from IPython.display import display

import matplotlib.pyplot as plt
import itertools
from tqdm.notebook import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from torch.utils.data import DataLoader, TensorDataset

In [9]:
df = pd.read_csv('../data/clusters_synthetic_dataset.csv')
df.head(3)

Unnamed: 0,X_1,Y_1,Z_1,t_1,X_2,Y_2,Z_2,t_2,X_3,Y_3,Z_3,t_3,X_4,Y_4,Z_4,t_4,theta,phi
0,-25.359,5.885,-6.684,78.875263,-37.609,5.885,-6.684,74.805427,-37.609,-7.315,-6.684,99.676584,-25.359,-7.315,-6.684,103.74642,35,100
1,-25.359,37.335,-6.684,19.617847,-37.609,37.335,-6.684,15.548011,-37.609,24.135,-6.684,40.419168,-25.359,24.135,-6.684,44.489004,35,100
2,6.479,12.65,0.0,58.44304,-6.872,12.65,0.0,54.007417,-6.872,-12.63,0.0,101.639451,6.479,-12.63,0.0,106.075074,35,100


In [10]:
t_columns = ['t_1', 't_2', 't_3', 't_4']
df['min_t'] = df[t_columns].min(axis=1)

for t_col in t_columns:
    df[t_col] = df[t_col] - df['min_t']

df.drop('min_t', axis=1, inplace=True)

df.head(3)

Unnamed: 0,X_1,Y_1,Z_1,t_1,X_2,Y_2,Z_2,t_2,X_3,Y_3,Z_3,t_3,X_4,Y_4,Z_4,t_4,theta,phi
0,-25.359,5.885,-6.684,4.069836,-37.609,5.885,-6.684,0.0,-37.609,-7.315,-6.684,24.871157,-25.359,-7.315,-6.684,28.940993,35,100
1,-25.359,37.335,-6.684,4.069836,-37.609,37.335,-6.684,0.0,-37.609,24.135,-6.684,24.871157,-25.359,24.135,-6.684,28.940993,35,100
2,6.479,12.65,0.0,4.435623,-6.872,12.65,0.0,0.0,-6.872,-12.63,0.0,47.632034,6.479,-12.63,0.0,52.067657,35,100


In [11]:
for idx in range(len(df)):
    if random.random() < 0.5:
        t_col = random.choice(t_columns)
        df.at[idx, t_col] = np.nan

In [15]:
df.head(5)

Unnamed: 0,X_1,Y_1,Z_1,t_1,X_2,Y_2,Z_2,t_2,X_3,Y_3,Z_3,t_3,X_4,Y_4,Z_4,t_4,theta,phi
0,-25.359,5.885,-6.684,4.069836,-37.609,5.885,-6.684,0.0,-37.609,-7.315,-6.684,24.871157,-25.359,-7.315,-6.684,28.940993,35,100
1,-25.359,37.335,-6.684,4.069836,-37.609,37.335,-6.684,0.0,-37.609,24.135,-6.684,24.871157,-25.359,24.135,-6.684,28.940993,35,100
2,6.479,12.65,0.0,4.435623,-6.872,12.65,0.0,0.0,-6.872,-12.63,0.0,47.632034,6.479,-12.63,0.0,52.067657,35,100
3,37.369,10.956,-14.946,4.075883,22.469,10.956,-15.266,0.0,22.469,-3.952,-15.346,28.307926,37.369,-3.952,-15.076,32.520428,35,100
4,37.369,45.572,-16.166,3.975419,22.977,45.572,-16.461,0.0,22.977,28.482,-16.111,31.244273,37.367,28.482,-16.511,37.118043,35,100


In [22]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [16]:
X = df.drop(columns=['theta', 'phi']).values
y = df[['theta', 'phi']].values

In [17]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [18]:
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32)

In [34]:
mask_tensor = torch.isnan(X_tensor)

In [38]:
X_train, X_test, y_train, y_test,  mask_train, mask_test = train_test_split(
    X_tensor, y_tensor, mask_tensor, test_size=0.2, random_state=42)

In [40]:
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)
mask_train, mask_test = mask_train.to(device), mask_test.to(device)

In [48]:
print(mask_train.shape, X_train.shape, y_train.shape)

torch.Size([52142, 16]) torch.Size([52142, 16]) torch.Size([52142, 2])


In [43]:
class ThetaPhiPredictionModel(nn.Module):
    def __init__(self):
        super(ThetaPhiPredictionModel, self).__init__()
        self.fc1 = nn.Linear(X_train.shape[1], 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 2)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [44]:
model = ThetaPhiPredictionModel().to(device)

In [45]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [59]:
def train_model(model, X_train, y_train, mask_train, criterion, optimizer, epochs=3000):
    for epoch in tqdm(range(epochs), desc="Training", ncols=100, position=0, leave=True):
        model.train()
        optimizer.zero_grad()

        # Применяем маску для исключения NaN значений в X_train
        # Маска для строк, где нет NaN в X_train
        valid_mask = ~torch.isnan(X_train).any(dim=1)

        # Применяем маску на входные данные и целевые значения
        # Оставляем только строки без NaN в X_train
        X_train_valid = X_train[valid_mask]
        # Оставляем только соответствующие целевые значения
        y_train_valid = y_train[valid_mask]

        # Прогоняем данные через модель
        y_pred = model(X_train_valid)

        # Рассчитываем потери
        loss = criterion(y_pred, y_train_valid)

        loss.backward()  # Обратное распространение
        optimizer.step()  # Обновление параметров модели

        tqdm.write(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

In [60]:
train_model(model, X_train.to(device), y_train.to(device),
            mask_train.to(device), criterion, optimizer)

Training:   0%|                                                            | 0/3000 [00:00<?, ?it/s]

Epoch [1/3000], Loss: 22.5973
Epoch [2/3000], Loss: 22.5841
Epoch [3/3000], Loss: 22.5709
Epoch [4/3000], Loss: 22.5578
Epoch [5/3000], Loss: 22.5446
Epoch [6/3000], Loss: 22.5315
Epoch [7/3000], Loss: 22.5184
Epoch [8/3000], Loss: 22.5053
Epoch [9/3000], Loss: 22.4922
Epoch [10/3000], Loss: 22.4790
Epoch [11/3000], Loss: 22.4658
Epoch [12/3000], Loss: 22.4526
Epoch [13/3000], Loss: 22.4395
Epoch [14/3000], Loss: 22.4263
Epoch [15/3000], Loss: 22.4131
Epoch [16/3000], Loss: 22.3998
Epoch [17/3000], Loss: 22.3866
Epoch [18/3000], Loss: 22.3733
Epoch [19/3000], Loss: 22.3601
Epoch [20/3000], Loss: 22.3468
Epoch [21/3000], Loss: 22.3335
Epoch [22/3000], Loss: 22.3201
Epoch [23/3000], Loss: 22.3066
Epoch [24/3000], Loss: 22.2931
Epoch [25/3000], Loss: 22.2796
Epoch [26/3000], Loss: 22.2661
Epoch [27/3000], Loss: 22.2525
Epoch [28/3000], Loss: 22.2388
Epoch [29/3000], Loss: 22.2251
Epoch [30/3000], Loss: 22.2113
Epoch [31/3000], Loss: 22.1975
Epoch [32/3000], Loss: 22.1836
Epoch [33/3000], 

In [63]:
def test_model(model, X_test, y_test):
    model.eval()  # Переводим модель в режим оценки

    # Маска для строк без NaN в X_test
    valid_mask = ~torch.isnan(X_test).any(dim=1)

    # Применяем маску к тестовым данным и целевым значениям
    X_test_valid = X_test[valid_mask]
    y_test_valid = y_test[valid_mask]

    # Получаем предсказания модели
    with torch.no_grad():
        y_pred = model(X_test_valid)

    # Вычисляем отклонения
    mae = torch.mean(torch.abs(y_pred - y_test_valid))
    mse = torch.mean((y_pred - y_test_valid) ** 2)

    print(f"Test MAE: {mae.item():.4f}")
    print(f"Test MSE: {mse.item():.4f}")

In [None]:
test_model(model, X_test, y_test)

Test MAE: 1.3739
Test MSE: 3.4130


: 