In [None]:
!pip install utils

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting utils
  Downloading utils-1.0.1-py2.py3-none-any.whl (21 kB)
Installing collected packages: utils
Successfully installed utils-1.0.1


In [None]:
import torch
from torch.utils.data import Dataset
from torch.utils.data import random_split
import os
import torch.nn as nn
from argparse import ArgumentParser, Namespace
import enum
from pathlib import Path
from typing import Dict
from datetime import datetime
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
import numpy as np
import torch.optim as optim
from torch.utils.data import DataLoader
from utils import *

In [None]:
class DSCOVRMagneticFieldToWindProtonDataset(Dataset):
    def __init__(self, data_path, start_year, end_year):
        self.x = torch.tensor([]).float()
        self.y = torch.tensor([]).float()
        self.load_in(data_path, start_year, end_year)

    def load_in(self, data_path, start_year, end_year):
        for year in range(start_year, end_year+1):
            year_data = torch.load(os.path.join(data_path, f"data_{year}.pt"))
            self.x = torch.cat([self.x, year_data["X"].float()], dim=0)
            self.y = torch.cat([self.y, year_data["Y"].float()], dim=0)
        print("total x shape:", self.x.shape)
        print("total y shape:", self.y.shape)

    def __len__(self) -> int:
        return self.x.shape[0]

    def __getitem__(self, index):
        return self.x[index, :, :], self.y[index, :, :]


In [None]:
class Seq2Seq(nn.Module):
    def __init__(
        self,
        input_dim: int,
        hidden_dim: int,
        output_dim: int,
        num_layers: int) -> None:
        super(Seq2Seq, self).__init__()

        self.rnn = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True)
        self.dnn = nn.Sequential(
            nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, seq: torch.Tensor) -> torch.Tensor:
        output, hn = self.rnn(seq)
        output = self.dnn(output)
        return output

In [None]:
from torchmetrics.wrappers import multioutput

In [None]:
  dataset=torch.load("/content/drive/MyDrive/data_2021.pt")
  d=DSCOVRMagneticFieldToWindMagneticField('/content/drive/MyDrive',2021,2021)
  data_len= len(d)
  train_ratio,test_ratio = 0.7,0.3
  train_len= int(data_len*train_ratio)
  test_len= data_len - train_len
  train_dataset,test_dataset = random_split(d, [train_len,test_len])
  print(f"train,test dataset len: {len(train_dataset)}, {len(test_dataset)}")
  train_loader = DataLoader(train_dataset,10000, shuffle=True)
  test_loader = DataLoader(test_dataset, 10000, shuffle=False)
  model = Seq2Seq(3,3,3,3)
  optimizer = torch.optim.Adam(model.parameters(), lr=0.8)
  scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=2, min_lr=5e-5)
  criterion = torch.nn.MSELoss()
  loss_train=0
  prev_train=100
  while abs(loss_train-prev_train)>0.001:
    model.train()
    prev_train=loss_train
    loss_train, acc_train, iterations= 0, 0, 0
    for x_prime, y in train_loader:
      outputs = model(x_prime)
      loss = criterion(outputs, y)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      iterations += 1
      loss_train += loss.item()
    loss_train /= iterations
    print(f"train_loss: {loss_train:.4f}")
    torch.save(model.state_dict(), os.path.join("/content/drive/MyDrive", "dscovr_proton3.pt"))

total x_prime shape: torch.Size([8, 10000, 3])
total x shape: torch.Size([8, 10000, 3])
train,test dataset len: 5, 3
<torch.utils.data.dataloader.DataLoader object at 0x7fceb388e650>
train_loss: 2.2955
train_loss: 2.7485
train_loss: 2.4303
train_loss: 3.8339
train_loss: 1.7122
train_loss: 1.2457
train_loss: 1.6948
train_loss: 1.4540
train_loss: 1.1415
train_loss: 1.2264
train_loss: 1.4117
train_loss: 1.3211
train_loss: 1.1573
train_loss: 1.1737
train_loss: 1.2818
train_loss: 1.2750
train_loss: 1.1680
train_loss: 1.1396
train_loss: 1.2125
train_loss: 1.2285
train_loss: 1.1635
train_loss: 1.1389
train_loss: 1.1708
train_loss: 1.1845
train_loss: 1.1622
train_loss: 1.1396
train_loss: 1.1445
train_loss: 1.1611
train_loss: 1.1541
train_loss: 1.1358
train_loss: 1.1356


In [None]:
model = Seq2Seq(3,3,3,3)
model.load_state_dict(torch.load(os.path.join('/content/drive/MyDrive', "dscovr_proton3.pt")))
model.eval()
acc_test=0
with torch.no_grad():
  loss_test, acc_test, iter_test = 0, 0, 0
  for x_prime, y in test_loader:
    outputs = model(x_prime)
    loss = criterion(outputs, y)
    print(loss.item())
    iter_test += 1
    loss_test += loss.item()
    for i in range(outputs.shape[0]):
      for j in range(outputs.shape[1]):
        for k in range(outputs.shape[2]):
          if loss_test<1.1 and abs(outputs[i][j][k]-y[i][j][k])<=loss_test:
            acc_test+=1
          elif abs(outputs[i][j][k]-y[i][j][k])<1.4:
            acc_test+=1
  loss_test /= iter_test
  acc_test/=(outputs.shape[0])*(outputs.shape[1])*(outputs.shape[2])
  no=outputs.numpy()
  ny=y.numpy()
  nof=np.ndarray.flatten(no)
  nyf=np.ndarray.flatten(ny)
  correlation=np.correlate(nof,nyf)
  print(nyf)
  print(nof)
  print(f"accuracy:{acc_test*100:.2f},test_loss: {loss_test:.4f}")
  visualize_path = os.path.join('/content/drive/MyDrive', "visualize")
  if not os.path.exists(visualize_path):
    os.mkdir(visualize_path)
  with torch.no_grad():
    predictions = torch.tensor([]).to('cpu')
    truthy = torch.tensor([]).to('cpu')	

1.0436975955963135
[-1.2606956  -1.2207489   1.9505613  ... -0.46414185 -2.2159753
  1.2354926 ]
[ 2.3952012  -3.9514227   1.6521806  ...  0.40122044 -1.6959336
  1.1675054 ]
accuracy:89.03,test_loss: 1.0437
