In [1]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import resample

In [2]:
torch.cuda_version, torch.cuda.is_available()

('11.8', True)

In [3]:
rain = pd.read_csv('./rain_preprocessed.csv', sep=';')
rain.head(2)

Unnamed: 0,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,WindDir3pm,...,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow,Day,Month
0,0,13.4,22.9,0.6,5.468232,7.611178,0,44.0,0,1,...,-1.478788,-1.223882,8.0,4.50993,16.9,21.8,0,0,1,12
1,0,7.4,25.1,0.0,5.468232,7.611178,1,44.0,4,2,...,-1.047782,-1.118839,4.447461,4.50993,17.2,24.3,0,0,2,12


In [4]:
print(rain['RainTomorrow'].value_counts())

rain0 = rain[rain['RainTomorrow'] == 0]
rain1 = rain[rain['RainTomorrow'] == 1]

rain0 = resample(rain0,
                 replace=False,
                 n_samples=len(rain1),
                 random_state=73)

rain = pd.concat([rain1, rain0])
print(rain['RainTomorrow'].value_counts())

X_rain = rain.drop(columns=['RainTomorrow'])
y_rain = rain['RainTomorrow']
print(X_rain.shape, y_rain.shape)

X_rain_train, X_rain_test, y_rain_train, y_rain_test = train_test_split(X_rain, y_rain, test_size=0.33)

RainTomorrow
0    113583
1     31877
Name: count, dtype: int64
RainTomorrow
1    31877
0    31877
Name: count, dtype: int64
(63754, 23) (63754,)


In [5]:
class MyClassificationModel(nn.Module):
    def __init__(self):
        super(MyClassificationModel, self).__init__()

        self.seq = nn.Sequential(
            nn.Linear(23, 8),
            nn.Mish(),
            nn.Linear(8, 2),
            nn.Softmax(),
        )

    def forward(self, x):
        y = self.seq(x)
        return y

In [37]:
torch.manual_seed(73)
model = MyClassificationModel()#.to(device=torch.device('cuda'))
print(model)

MyClassificationModel(
  (seq): Sequential(
    (0): Linear(in_features=23, out_features=8, bias=True)
    (1): Mish()
    (2): Linear(in_features=8, out_features=2, bias=True)
    (3): Softmax(dim=None)
  )
)


In [27]:
all(param.is_cuda for param in model.parameters())

False

In [38]:
model = model.cuda()
all(param.is_cuda for param in model.parameters())

True

In [33]:
torch.cuda.memory_allocated(), torch.cuda.memory_reserved()

(21314048, 44040192)

In [39]:
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [40]:
# X = torch.from_numpy(np.array(X_rain_train)).float()
# y = torch.from_numpy(np.array(y_rain_train)).long()

X = torch.FloatTensor(np.array(X_rain_train)).cuda()
y = torch.LongTensor(np.array(y_rain_train)).cuda()

train_ds = TensorDataset(X, y)
train_dl = DataLoader(train_ds, batch_size=1024, shuffle=True)

In [41]:
import time
epochs = 9

training_time = 0
for epoch in range(epochs):
    s = time.monotonic()
    for x_b, y_b in train_dl:
        outputs = model(x_b)
        loss_value = loss(outputs, y_b)
        
        optimizer.zero_grad()
        loss_value.backward()
        optimizer.step()
    training_time += time.monotonic() - s

    print(f'Эпоха {epoch + 1}, Значение функции потерь: {loss_value.item()}')
    print(f'[INFO] Training time: {training_time} seconds.\n')

Эпоха 1, Значение функции потерь: 0.645328938961029
[INFO] Training time: 0.7820000000065193 seconds.

Эпоха 2, Значение функции потерь: 0.601635217666626
[INFO] Training time: 1.3599999999860302 seconds.

Эпоха 3, Значение функции потерь: 0.5944720506668091
[INFO] Training time: 2.0 seconds.

Эпоха 4, Значение функции потерь: 0.5665609240531921
[INFO] Training time: 2.5789999999979045 seconds.

Эпоха 5, Значение функции потерь: 0.5458889603614807
[INFO] Training time: 3.2969999999913853 seconds.

Эпоха 6, Значение функции потерь: 0.544800877571106
[INFO] Training time: 3.9070000000065193 seconds.

Эпоха 7, Значение функции потерь: 0.56392502784729
[INFO] Training time: 4.562999999994645 seconds.

Эпоха 8, Значение функции потерь: 0.5553288459777832
[INFO] Training time: 5.157000000006519 seconds.

Эпоха 9, Значение функции потерь: 0.5135165452957153
[INFO] Training time: 5.812999999994645 seconds.



In [42]:
X_test = torch.FloatTensor(np.array(X_rain_test)).cuda()
y_test = torch.LongTensor(np.array(y_rain_test)).cuda()

y_pred = model(X_test)

In [43]:
y_pred

tensor([[1.0869e-05, 9.9999e-01],
        [1.2542e-01, 8.7458e-01],
        [6.7592e-04, 9.9932e-01],
        ...,
        [9.2169e-01, 7.8315e-02],
        [1.6521e-02, 9.8348e-01],
        [4.6294e-01, 5.3706e-01]], device='cuda:0', grad_fn=<SoftmaxBackward0>)

In [44]:
from sklearn.metrics import classification_report


print(classification_report(
    np.argmax(y_pred.cpu().detach().numpy(), axis=1),
    y_rain_test,
))

              precision    recall  f1-score   support

           0       0.76      0.75      0.76     10589
           1       0.75      0.75      0.75     10450

    accuracy                           0.75     21039
   macro avg       0.75      0.75      0.75     21039
weighted avg       0.75      0.75      0.75     21039

