<a href="https://colab.research.google.com/github/mancunian1792/DS5220/blob/master/NeuralODE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!pip install prodict

In [0]:
!pip install git+https://github.com/rtqichen/torchdiffeq.git

In [0]:
!pip install -U -q PyDrive

In [0]:
import os
import argparse
import logging
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torchdiffeq import odeint as odeint
from prodict import Prodict

In [0]:
args = Prodict.from_dict({"network": "odenet","tol": 1e-3, "nepochs": 10000, "lr": 0.01, "batch_size": 128, "test_batch_size": 1, "gpu": 1})

Now, we need to import the data from google drive

First , importing and authenticating the drive

In [0]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
train_link = "https://drive.google.com/open?id=1m84l0jLwbTuFliZZ_J5IZCY6nZvGAvFN"
test_link = "https://drive.google.com/open?id=1sf3MNvKjUDJLLQU0fIrRk_BgCkXNMywR"
submission_link = "https://drive.google.com/open?id=1c7sGKW-nzMgJqd5HSHhRNWwUdhM24art"

train_fluff, trainId = train_link.split('=')
test_fluff , testId = test_link.split('=')
subm_fluff, subId = submission_link.split('=')


train_downloaded = drive.CreateFile({"id":trainId})
test_downloaded = drive.CreateFile({"id": testId})
subm_downloaded = drive.CreateFile({"id": subId})
train_downloaded.GetContentFile('train.csv')
test_downloaded.GetContentFile('test.csv')
subm_downloaded.GetContentFile('sample_submission.csv')

In [0]:
class DatasetSantander(Dataset):
    
    def __init__(self, file_path, transform=None, output_col = None, test = False):
        self.data = pd.read_csv(file_path)
        self.n = self.data.shape[0]
        self.transform = transform
        self.features = [x for x in self.data.columns if x.startswith("var")]
        if output_col and test == False:
            self.y = self.data[output_col].astype(np.float32).values.reshape(-1, 1)
        else:
            self.y =  np.zeros((self.n, 1))
        if self.features:
            self.cont_X = self.data[self.features].astype(np.float32).values
        else:
            self.cont_X = np.zeros((self.n, 1))
            
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        x = self.cont_X[index]
        y = self.y[index]

        if self.transform is not None:
            x = self.transform(x)
            
        return x, y

def get_data_loaders(batch_size=128, test_batch_size=1000):
    
    transform_train = torch.tensor

    transform_test = torch.tensor

    train_loader = DataLoader(
        DatasetSantander(file_path='train.csv', transform= transform_train, output_col="target"
    ), batch_size=batch_size, shuffle=True, num_workers=2, drop_last=True)

    train_eval_loader = DataLoader(
        DatasetSantander(file_path='train.csv', transform= transform_train, output_col= "target"
    ), batch_size=batch_size, shuffle=False, num_workers=2, drop_last=True)

    test_loader = DataLoader(
        DatasetSantander(file_path='test.csv', transform= transform_test, output_col="target",
    test = True), batch_size=test_batch_size, shuffle=False, num_workers=2, drop_last=True)

    return train_loader, train_eval_loader, test_loader





In [0]:
class ODEfunc(nn.Module):
    def __init__(self, H):
        super(ODEfunc, self).__init__()
        self.lin1 = nn.Linear(H, H)
        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(0.2)
        self.lin2 = nn.Linear(H, H)
        self.prelu = nn.PReLU(1)
        self.nfe = 0
    
    def forward(self,t,x):
        self.nfe += 1
        out = self.lin1(x)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.lin2(out)
        out = self.prelu(out)
        return out


class ODEBlock(nn.Module):

    def __init__(self, odefunc):
        super(ODEBlock, self).__init__()
        self.odefunc = odefunc
        self.integration_time = torch.tensor([0, 1]).float()

    def forward(self, x):
        self.integration_time = self.integration_time.type_as(x)
        out = odeint(self.odefunc, x, self.integration_time, rtol=args.tol, atol=args.tol)
        return out[1]

    @property
    def nfe(self):
        return self.odefunc.nfe

    @nfe.setter
    def nfe(self, value):
        self.odefunc.nfe = value


class Flatten(nn.Module):

    def __init__(self):
        super(Flatten, self).__init__()

    def forward(self, x):
        shape = torch.prod(torch.tensor(x.shape[1:])).item()
        return x.view(-1, shape)


class RunningAverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self, momentum=0.99):
        self.momentum = momentum
        self.reset()

    def reset(self):
        self.val = None
        self.avg = 0

    def update(self, val):
        if self.val is None:
            self.avg = val
        else:
            self.avg = self.avg * self.momentum + val * (1 - self.momentum)
        self.val = val

def inf_generator(iterable):
    """Allows training with DataLoaders in a single infinite loop:
        for i, (x, y) in enumerate(inf_generator(train_loader)):
    """
    iterator = iterable.__iter__()
    while True:
        try:
            yield iterator.__next__()
        except StopIteration:
            iterator = iterable.__iter__()


def learning_rate_with_decay(batch_size, batch_denom, batches_per_epoch, boundary_epochs, decay_rates):
    initial_learning_rate = args.lr * batch_size / batch_denom

    boundaries = [int(batches_per_epoch * epoch) for epoch in boundary_epochs]
    vals = [initial_learning_rate * decay for decay in decay_rates]

    def learning_rate_fn(itr):
        lt = [itr < b for b in boundaries] + [True]
        i = np.argmax(lt)
        return vals[i]

    return learning_rate_fn


def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)



In [0]:
device = torch.device('cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu')
print("What is device ? Is it none ?", device)
is_odenet = args.network == 'odenet'
N, D_in, H, D_out = 128, 200, 200, 1
feature_layers = [ODEBlock(ODEfunc(H))] if is_odenet else [ResBlock(128, 128) for _ in range(6)]
fc_layers = [nn.Linear(H, D_out), nn.Sigmoid()]
model = nn.Sequential(*feature_layers, *fc_layers).to("cuda")
print('Number of parameters: {}'.format(count_parameters(model)))
model.cuda()
criterion = nn.BCELoss().to(device.type)

train_loader, train_eval_loader, test_loader = get_data_loaders(args.batch_size, args.test_batch_size)
data_gen = inf_generator(train_loader)
batches_per_epoch = len(train_loader)
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
best_acc = 0
for itr in range(args.nepochs):
    print("In iter ::", itr)
    optimizer.zero_grad()
    x, y = data_gen.__next__()
    x = x.to(device.type)
    y = y.to(device.type)
    logits = model(x)
    loss = criterion(logits, y)
    print("Loss is", loss)
    loss.backward()
    optimizer.step()
  


What is device ? Is it none ? cuda:1
Number of parameters: 80602
In iter :: 0


Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, b

Loss is tensor(0.9625, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
In iter :: 1
Loss is tensor(3.0221, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
In iter :: 2
Loss is tensor(2.3745, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
In iter :: 3
Loss is tensor(2.1587, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
In iter :: 4
Loss is tensor(2.8063, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
In iter :: 5
Loss is tensor(2.3745, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
In iter :: 6
Loss is tensor(2.3745, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
In iter :: 7
Loss is tensor(2.3745, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
In iter :: 8
Loss is tensor(3.4539, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
In iter :: 9
Loss is tensor(2.1587, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
In iter :: 10
Loss is tensor(2.3745, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
In iter :: 

In [0]:

+device = torch.device('cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu')
print("What is device ? Is it none ?", device)

In [0]:
import gc
gc.collect()

0

In [0]:
test_data = pd.read_csv('test.csv')
ID_Code = test_data["ID_code"]
test_x = pd.DataFrame(test_data.drop(["ID_code"], axis=1)).reset_index(drop=True)

In [0]:
from torch.autograd import Variable

In [0]:
test_x_torch = Variable(torch.from_numpy(test_x.values).float(), requires_grad = False).to("cuda")

In [0]:
pred = model(test_x_torch[0:100])

In [0]:
predictions = []
for i in range(0, len(test_loader)):
  pred = model(test_x_torch[i])
  predictions.append(pred.cpu().detach().numpy())
allP = flatten(predictions)
len(allP)
submissions = pd.read_csv('sample_submission.csv')
submissions['target'] = allP
submissions.to_csv('neuralode.csv')
print("All Done.")

200000

In [0]:
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize
import psutil
import humanize
import os
import GPUtil as GPU



In [0]:
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
 process = psutil.Process(os.getpid())
 print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
 print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm()

Gen RAM Free: 7.3 GB  | Proc size: 9.7 GB
GPU RAM Free: 13832MB | Used: 1247MB | Util   8% | Total 15079MB


In [0]:
def flatten(listOfLists):
    "Flatten one level of nesting"
    return list(chain.from_iterable(listOfLists))

In [0]:
from itertools import chain

In [0]:
from google.colab import files

In [0]:
files.download('neuralode.csv')