<a href="https://colab.research.google.com/github/pkonowrocki/Weather-prediction/blob/master/CloudsCodeComparison.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#boilerplate code
import subprocess
file_id = '1XfiON89EFCsw5zhtD4hq1hn5Z21vGzpG'
subprocess.run(['pip', 'install', 'PyDrive'])
subprocess.run(['apt-get', 'install', 'unzip'])
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
import os
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
downloaded = drive.CreateFile({'id':file_id})
downloaded.GetContentFile('Dataset.zip')
if not os.path.exists('./Dataset'):
  subprocess.run(['unzip', './Dataset.zip'])
print(f'{"".join(["-" for _ in range(10)])}DATA READY{"".join(["-" for _ in range(10)])}')

#code
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from sklearn import preprocessing

if torch.cuda.is_available():
    device = torch.device('cuda:0')
    print('cuda')
else:
    device = torch.device('cpu')
    print('cpu')

def testNetwork(Xtest, Ytest, net, criterion):
  x = torch.Tensor(Xtest).to(device)
  output = net(x)
  loss = criterion(output, torch.Tensor(np.array([Ytest]).T).to(device))
  return torch.mean(loss).cpu().detach().numpy(), torch.std(loss).cpu().detach().numpy()

def trainNetwork(path, code, net = None, epochs = None, criterion = None, checkEvery = None, optimizer = None, val = 0.0, scaler = None, batch = 1, gradClipping = 0, seed = 0):
  torch.manual_seed(seed)
  np.random.seed(seed)

  Xtrain = np.genfromtxt(f'{path}/{code}-train-input.csv', delimiter=',')
  Ytrain = np.genfromtxt(f'{path}/{code}-train-output.csv', delimiter=',')
  Xtest = np.genfromtxt(f'{path}/{code}-test-input.csv', delimiter=',')
  Ytest = np.genfromtxt(f'{path}/{code}-test-output.csv', delimiter=',')
  if scaler is None:
    scaler = preprocessing.StandardScaler().fit(Xtrain)
  Xtrain = scaler.transform(Xtrain)
  Xtest = scaler.transform(Xtest)

  if not val == 0.0:
    n = int(len(Ytrain)*val)+1 if int(len(Ytrain)*val)+1 < len(Ytrain) else int(len(Ytrain)*val)
    idxs = np.random.choice(np.arange(0, len(Ytrain), 1), n)
    Xval = Xtrain[idxs]
    Yval = Ytrain[idxs]
    Xtrain = np.delete(Xtrain, idxs, axis=0)
    Ytrain = np.delete(Ytrain, idxs)
  else:
    Xval = None

  if epochs is None:
    epochs = 1000
  if checkEvery is None:
    checkEvery = 5
  if net is None:
    numInputs = Xtrain.shape[1]
    numOutputs = 1
    hiddenSize = 50
    net = nn.Sequential(
      nn.Linear(numInputs, 2*hiddenSize),
      nn.ELU(),
      nn.Linear(2*hiddenSize, hiddenSize),
      nn.ELU(),
      nn.Linear(hiddenSize, numOutputs)).to(device)
  if criterion is None:
    criterion = nn.MSELoss(reduction='none')
  if optimizer is None:
    optimizer = optim.Adadelta(net.parameters())

  net.to(device)

  if batch == 1:
    splitXtrain = Xtrain
    splitYtrain = Ytrain
  else:
    batch = int(len(Ytrain)/batch)
    splitXtrain = np.array_split(Xtrain, batch)
    splitYtrain = np.array_split(Ytrain, batch)

  for epoch in range(epochs):
    runningLoss = 0.0
    for i, x in enumerate(splitXtrain, 0):
      if np.any(np.isnan(x)):
        raise('NaN value in input')
      if np.any(np.isnan(splitYtrain[i])):
        raise('NaN value in target')
      optimizer.zero_grad()
      y = torch.Tensor(np.array([splitYtrain[i]]).T).to(device)
      x = torch.Tensor(x).to(device)
      output = net(x)
      if np.any(np.isnan(output.cpu().detach().numpy())):
        raise('NaN value in output')
      loss = torch.mean(criterion(output, y))
      loss.backward()
      if not gradClipping == 0:
        torch.nn.utils.clip_grad_norm_(net.parameters(), gradClipping)
      optimizer.step()
      runningLoss += loss.item()

    if epoch % checkEvery == checkEvery-1 or epoch == 0:
      if not Xval is None:
        lossVal, stdVal  = testNetwork(Xval, Yval, net, criterion)
      
      lossTest, stdTest = testNetwork(Xtest, Ytest, net, criterion)
      print(f'{epoch}, {runningLoss/len(splitYtrain)}, {lossTest}, {stdTest}{", " + str(lossVal) +", " + str(stdVal) if not Xval is None else ""}')
  
  lossVal, _ = testNetwork(Xtest, Ytest, net, criterion)
  print(f'Finally: {lossVal}')



for _ in range(100):
  seed = np.random.randint(np.iinfo(np.int32).max)
  print(f'Clouds Okta Wind Natural (seed: {seed})')
  trainNetwork('./Dataset/Temperature/Imputation', 'CloudsOktaWindNatural', val = 0.2, batch = 32, gradClipping = 2, seed = seed)
  print(f'Clouds OneHot Wind Natural (seed: {seed})')
  trainNetwork('./Dataset/Temperature/Imputation', 'CloudsOneHotWindNatural', val = 0.2, batch = 32, gradClipping = 2, seed = seed)

----------DATA READY----------
cuda
Clouds Okta Wind Natural (seed: 1852923047)
0, 33.95376178747899, 14.278973579406738, 22.32658576965332, 15.709663, 25.954372
4, 5.0482873244865525, 7.17915678024292, 14.109153747558594, 6.514617, 14.2115
9, 4.478553293089609, 6.949760437011719, 13.3834810256958, 6.3702283, 14.3336525
14, 4.197041241784354, 6.70111608505249, 13.185812950134277, 6.040023, 13.736758
19, 4.0302082816330165, 6.706127166748047, 12.828426361083984, 6.014402, 13.118506
24, 3.932388739650314, 6.834314346313477, 13.02103328704834, 5.9510937, 13.1642
29, 3.789182015174144, 6.755159378051758, 12.73604679107666, 5.949465, 12.74895
34, 3.6917991525256957, 6.369372367858887, 12.226123809814453, 5.6780424, 12.286836
39, 3.6002064896757537, 6.167762279510498, 12.12816047668457, 5.4856625, 11.980576
44, 3.459660556485524, 6.476086139678955, 12.419175148010254, 5.8750625, 12.467112
49, 3.4353583726206343, 6.396608352661133, 12.592206954956055, 5.73705, 12.622529
54, 3.3288733381032944