In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Training 5 neural networks for $ Map_i $, i=1,...,5

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

from main import import_dataset_from_file
from torch.utils.data import Dataset, DataLoader
import torch

In [6]:
class RosneftDataset(Dataset):

    def __init__(self, X, y) -> None:
        self.X = X
        self.y = y

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx]), torch.tensor(self.y[idx]).unsqueeze(0)

    def __len__(self):
        return len(self.y)

In [7]:
import torch.nn as nn

In [8]:
class Map(nn.Module):

    def __init__(self, hidden_size=15) -> None:
        super().__init__()

        self.hidden_size = hidden_size

        self.linear = nn.Sequential(
            nn.Linear(in_features=2, out_features=hidden_size),
            nn.BatchNorm1d(hidden_size),
            nn.ReLU(),
            nn.Linear(in_features=hidden_size, out_features=hidden_size),
            nn.ReLU(),
            nn.Linear(in_features=hidden_size, out_features=1),
        )

    def forward(self, x):
        # input x: [BATCH_SIZE x 2]
        return self.linear(x)

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [10]:
from tqdm import tqdm


losses = None


def train(model, criterion, optimizer, epoches, tr_loader, val_loader):
    global losses
    losses = {
        'train': [],
        'val': [],
    }
    
    for epoch in range(epoches):

        # train
        model.train()
        train_loss = 0

        for X_batch, y_batch in tqdm(tr_loader):
            X_batch_cuda = X_batch.to(device)

            optimizer.zero_grad()
            y_pred = model(X_batch_cuda)
            loss = criterion(y_batch.to(device), y_pred)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()**0.5
        
        losses['train'].append(train_loss / len(tr_loader))
        
        # val
        model.eval()
        val_loss = 0

        for X_batch, y_batch in tqdm(val_loader):
            X_batch_cuda = X_batch.to(device)

            with torch.no_grad():
                y_pred = model(X_batch_cuda)
                loss = criterion(y_batch.to(device), y_pred)

            val_loss += loss.item()**0.5
        
        losses['val'].append(val_loss / len(val_loader))

        # show epoch results
        print(f'Epoch: {epoch+1}/{epoches}; Train loss: {losses["train"][-1]}; Val loss: {losses["val"][-1]}')

In [11]:
maps = [None] * 5
scalers = [None] * 5


def train_val_map(idx, epoches=1):
    # dataframe
    map_df = import_dataset_from_file(f"..\\Data\\Map_{idx+1}.txt")
    map_df

    # train_test_split
    cols = ['x', 'y']
    X_train, X_test, y_train, y_test = train_test_split(map_df[cols], map_df['z'], test_size=0.2, shuffle=True)
    
    # scaling
    scaler = StandardScaler()
    scaled_train = scaler.fit_transform(X_train.values.astype(np.float32))
    scaled_test = scaler.transform(X_test.values.astype(np.float32))
    scalers[idx] = scaler

    # datasets and dataloaders
    train_dataset = RosneftDataset(scaled_train, y_train.values.astype(np.float32))
    test_dataset = RosneftDataset(scaled_test, y_test.values.astype(np.float32))

    train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=True)

    # model
    map = Map().to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(map.parameters(), lr=1e-3)

    # training
    train(map, criterion, optimizer, epoches, train_dataloader, test_dataloader)

    # validating
    pred = map(torch.tensor(scaled_test).to(device)).cpu().detach().numpy()
    mse = mean_squared_error(y_test, pred)
    print('Validation RMSE:', mse**0.5)

    # saving
    maps[idx] = map
    torch.save(map.state_dict(), f'MapNN_{idx+1}.saved')


In [12]:
EPOCHES = [1, 1, 3, 3, 1]

for i in range(5):
    print(f'Training Map{i+1}')
    train_val_map(i, epoches=EPOCHES[i])
    print()

Training Map1


100%|██████████| 7479/7479 [00:46<00:00, 161.15it/s]
100%|██████████| 1870/1870 [00:07<00:00, 248.77it/s]


Epoch: 1/1; Train loss: 0.014261415110524145; Val loss: 0.009616294063010893
Validation RMSE: 0.009674899164298853

Training Map2


100%|██████████| 7479/7479 [00:44<00:00, 166.39it/s]
100%|██████████| 1870/1870 [00:08<00:00, 227.61it/s]


Epoch: 1/1; Train loss: 0.012711351891788875; Val loss: 0.009529526262540212
Validation RMSE: 0.009550505721082696

Training Map3


100%|██████████| 7482/7482 [00:51<00:00, 146.23it/s]
100%|██████████| 1871/1871 [00:09<00:00, 200.60it/s]


Epoch: 1/3; Train loss: 3.4282093974818966; Val loss: 1.5677119505230415


100%|██████████| 7482/7482 [00:43<00:00, 171.47it/s]
100%|██████████| 1871/1871 [00:08<00:00, 227.37it/s]


Epoch: 2/3; Train loss: 1.5954701174238264; Val loss: 1.5548607467934412


100%|██████████| 7482/7482 [00:48<00:00, 153.44it/s]
100%|██████████| 1871/1871 [00:08<00:00, 221.68it/s]


Epoch: 3/3; Train loss: 1.5843055835204605; Val loss: 1.5623939138064686
Validation RMSE: 1.568614471680574

Training Map4


100%|██████████| 7482/7482 [00:47<00:00, 159.07it/s]
100%|██████████| 1871/1871 [00:08<00:00, 220.59it/s]


Epoch: 1/3; Train loss: 847.8536460769476; Val loss: 67.95809170323136


100%|██████████| 7482/7482 [00:48<00:00, 155.76it/s]
100%|██████████| 1871/1871 [00:08<00:00, 220.58it/s]


Epoch: 2/3; Train loss: 63.59396969481197; Val loss: 57.58228299401968


100%|██████████| 7482/7482 [00:46<00:00, 159.39it/s]
100%|██████████| 1871/1871 [00:08<00:00, 218.29it/s]


Epoch: 3/3; Train loss: 56.92251125684435; Val loss: 56.268387328874894
Validation RMSE: 56.656887378067466

Training Map5


100%|██████████| 7479/7479 [00:48<00:00, 155.73it/s]
100%|██████████| 1870/1870 [00:08<00:00, 210.64it/s]

Epoch: 1/1; Train loss: 0.05277778197244947; Val loss: 0.006609018227411521
Validation RMSE: 0.006624040922150767






In [13]:
for scaler in scalers:
    print(scaler.mean_, scaler.var_)

[57080.9767305  34365.70094526] [53556016.59630211 29055722.89106974]
[57082.08067108 34366.34404761] [53589788.07829565 29066824.58453535]
[57080.17631595 34369.18386232] [53560445.06731844 29078077.19462908]
[57080.99299975 34363.3525286 ] [53539940.95065048 29064751.62036899]
[57074.60889936 34367.25621712] [53551786.85135974 29067356.70088754]


## Predicting $ map_i $ for pairs (x, y)

In [14]:
from main import import_dataset_from_file, export_dataset_to_file

In [15]:
result_df = import_dataset_from_file('../Data/Result_schedule.txt')
result_df

Unnamed: 0,x,y,z
0,68239.21,24467.0,
1,68251.71,24467.0,
2,68264.21,24467.0,
3,68276.71,24467.0,
4,68289.21,24467.0,
...,...,...,...
1196585,53264.21,46942.0,
1196586,53276.71,46942.0,
1196587,53289.21,46942.0,
1196588,53301.71,46942.0,


In [17]:
xynp = result_df[['x', 'y']].values.astype(np.float32)

for i in range(5):
    xy = scalers[i].transform(xynp)
    xy_tensor = torch.tensor(xy)

    maps[i].eval()
    with torch.no_grad():
        map_pred = maps[i](xy_tensor).cpu().detach().numpy()
        
    result_df[f'map{i+1}'] = map_pred


In [18]:
result_df

Unnamed: 0,x,y,z,map1,map2,map3,map4,map5
0,68239.21,24467.0,,0.157425,0.120834,38.681908,4221.596191,1.672716
1,68251.71,24467.0,,0.157404,0.120826,38.688210,4221.678711,1.672740
2,68264.21,24467.0,,0.157384,0.120819,38.694515,4221.762207,1.672764
3,68276.71,24467.0,,0.157363,0.120811,38.700821,4221.844727,1.672788
4,68289.21,24467.0,,0.157343,0.120804,38.707130,4221.928223,1.672812
...,...,...,...,...,...,...,...,...
1196585,53264.21,46942.0,,0.152270,0.126343,37.392536,4168.096191,1.682565
1196586,53276.71,46942.0,,0.152260,0.126346,37.399055,4168.264648,1.682557
1196587,53289.21,46942.0,,0.152250,0.126348,37.405571,4168.434082,1.682550
1196588,53301.71,46942.0,,0.152241,0.126351,37.412086,4168.602539,1.682543


In [19]:
result_df.to_csv('NN_preresults.csv')