In [29]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset
import matplotlib.pyplot as plt


In [30]:
def add_noise(img,noise_type="gaussian"):
  
  row,col=28,28
  
  if noise_type=="gaussian":
    mean=0
    var=10
    sigma=var**.5
    noise=np.random.normal(-5.9,5.9,img.shape)
    noise=noise.reshape(row,col)
    img=img+noise
    return img

  if noise_type=="speckle":
    noise=np.random.randn(row,col)
    noise=noise.reshape(row,col)
    img=img+img*noise
    return img

In [31]:
class AutoEncoder(nn.Module):
    
    def __init__(self):
        super(AutoEncoder, self).__init__()
        
        #encoder
        self.e1 = nn.Linear(784,28)
        self.e2 = nn.Linear(28,250)
        self.e3 = nn.Linear()
        
        #Latent View
        self.lv = nn.Linear(250,10)
        
        #Decoder
        self.d1 = nn.Linear(10,250)
        self.d2 = nn.Linear(250,500)
        
        self.output_layer = nn.Linear(500,784)
        
    def forward(self,x):
        x = F.sigmoid(self.e1(x))
        x = F.sigmoid(self.e2(x))
        
        x = torch.sigmoid(self.lv(x))
        
        x = F.sigmoid(self.d1(x))
        x = F.sigmoid(self.d2(x))
        
        x = self.output_layer(x)
        return x

In [32]:
ae = AutoEncoder()
print(ae)

TypeError: __init__() missing 2 required positional arguments: 'in_features' and 'out_features'

In [33]:
import torch
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F


class CDAutoEncoder(nn.Module):
    """
    Convolutional denoising autoencoder layer for stacked autoencoders.
    This module is automatically trained when in model.training is True.

    Args:
        input_size: The number of features in the input
        output_size: The number of features to output
        stride: Stride of the convolutional layers.
    """
    def __init__(self, input_size, output_size, stride):
        super(CDAutoEncoder, self).__init__()

        self.forward_pass = nn.Sequential(
            nn.Conv2d(input_size, output_size, kernel_size=2, stride=stride, padding=0),
            nn.ReLU(),
        )
        self.backward_pass = nn.Sequential(
            nn.ConvTranspose2d(output_size, input_size, kernel_size=2, stride=2, padding=0), 
            nn.ReLU(),
        )

        self.criterion = nn.MSELoss()
        self.optimizer = torch.optim.SGD(self.parameters(), lr=0.1)

    def forward(self, x):
        # Train each autoencoder individually
        x = x.detach()
        # Add noise, but use the original lossless input as the target.
        x_noisy = x * (Variable(x.data.new(x.size()).normal_(0, 0.1)) > -.1).type_as(x)
        y = self.forward_pass(x_noisy)

        if self.training:
            x_reconstruct = self.backward_pass(y)
            #loss = self.criterion(x_reconstruct, Variable(x.data, requires_grad=False))
            loss = nn.CrossEntropyLoss()
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            
        return y.detach()

    def reconstruct(self, x):
        return self.backward_pass(x)


class StackedAutoEncoder(nn.Module):
    """
    A stacked autoencoder made from the convolutional denoising autoencoders above.
    Each autoencoder is trained independently and at the same time.
    """

    def __init__(self):
        super(StackedAutoEncoder, self).__init__()

        self.ae1 = CDAutoEncoder(500, 500, 500)
        self.ae2 = CDAutoEncoder(500, 500, 500)
        self.ae3 = CDAutoEncoder(500, 500, 500)

    def forward(self, x):
        a1 = self.ae1(x)
        a2 = self.ae2(a1)
        a3 = self.ae3(a2)

        if self.training:
            return a3

        else:
            return a3, self.reconstruct(a3)

    def reconstruct(self, x):
            a2_reconstruct = self.ae3.reconstruct(x)
            a1_reconstruct = self.ae2.reconstruct(a2_reconstruct)
            x_reconstruct = self.ae1.reconstruct(a1_reconstruct)
            return x_reconstruct


In [34]:
import pandas as pd
import torch
from torch.utils.data import Dataset
from sklearn.preprocessing import StandardScaler

class FeatureDataset(Dataset):
    
    def __init__(self, file_name):
        
        # read csv and load data from rows into variables
        file_out = pd.read_csv(file_name)
        x = file_out.iloc[1:200, 1:200].values
        y = file_out.iloc[1:200, 1:200].values
        
        # Feature Scaling 
        #sc = StandardScaler()
        #x_train = sc.fit_transform(x)
        
        x_train = x
        y_train = y
        
        # Convert to torch tensors
        self.X_train = torch.tensor(x_train)
        self.y_train = torch.tensor(y_train)
        
    def __getitem__(self, index):
        return self.X_train[idx], self.y_train[idx]

    def __len__(self):
        return len(self.y_train)

In [35]:
dataset = pd.read_csv('patient_vecs.csv')

Unnamed: 0,SUBJECT_ID,GENDER,EXPIRE_FLAG,ETHNICITY,DIAGNOSIS,HAS_CHARTEVENTS_DATA,DRUG,FORMULARY_DRUG_CD,age,ICD9_CODE
0,2,['M'],[0],['ASIAN'],['NEWBORN'],[1],"['NEO*IV*Gentamicin', 'Syringe (Neonatal) *D5W...","['GENT10I', 'NEOSYRD5W', 'AMP500I', 'AMPVL']",[0],"['V3001', 'V053', 'V290']"
1,4,['F'],[0],['WHITE'],"['FEVER,DEHYDRATION,FAILURE TO THRIVE']",[1],"['Iso-Osmotic Dextrose', 'Insulin', 'Benzonata...","['VANCOBASE', 'GLAR100I', 'BENZ100', 'INSULIN'...",[47],"['042', '1363', '7994', '2763', '7907', '5715'..."
2,6,['F'],[0],['WHITE'],['CHRONIC RENAL FAILURE/SDA'],[1],"['Tacrolimus', 'Warfarin', 'Heparin Sodium', '...","['TACR1', 'WARF5', 'HEPAPREMIX', 'HEPBASE', 'F...",[65],"['40391', '4440', '9972', '2766', '2767', '285..."
3,8,['M'],[0],['WHITE'],['NEWBORN'],[1],"['Send 500mg Vial', 'NEO*IV*Gentamicin', 'NEO*...","['AMPVL', 'GENT10I', 'NAMP500I', 'NEOSYRD5W']",[0],"['V3001', '7706', '7746', 'V290', 'V502', 'V053']"
4,9,['M'],[1],['UNKNOWN/NOT SPECIFIED'],['HEMORRHAGIC CVA'],[1],"['SW', 'Labetalol HCl', 'Potassium Chloride', ...","['KCLBASE', 'LABE100I', 'KCL20P', 'D5W250', 'N...",[41],"['431', '5070', '4280', '5849', '2765', '4019']"


Unnamed: 0,SUBJECT_ID,GENDER,EXPIRE_FLAG,ETHNICITY,HAS_CHARTEVENTS_DATA,DRUG,FORMULARY_DRUG_CD,age,ICD9_CODE
0,2,['M'],[0],['ASIAN'],[1],"['NEO*IV*Gentamicin', 'Syringe (Neonatal) *D5W...","['GENT10I', 'NEOSYRD5W', 'AMP500I', 'AMPVL']",[0],"['V3001', 'V053', 'V290']"
1,4,['F'],[0],['WHITE'],[1],"['Iso-Osmotic Dextrose', 'Insulin', 'Benzonata...","['VANCOBASE', 'GLAR100I', 'BENZ100', 'INSULIN'...",[47],"['042', '1363', '7994', '2763', '7907', '5715'..."
2,6,['F'],[0],['WHITE'],[1],"['Tacrolimus', 'Warfarin', 'Heparin Sodium', '...","['TACR1', 'WARF5', 'HEPAPREMIX', 'HEPBASE', 'F...",[65],"['40391', '4440', '9972', '2766', '2767', '285..."
3,8,['M'],[0],['WHITE'],[1],"['Send 500mg Vial', 'NEO*IV*Gentamicin', 'NEO*...","['AMPVL', 'GENT10I', 'NAMP500I', 'NEOSYRD5W']",[0],"['V3001', '7706', '7746', 'V290', 'V502', 'V053']"
4,9,['M'],[1],['UNKNOWN/NOT SPECIFIED'],[1],"['SW', 'Labetalol HCl', 'Potassium Chloride', ...","['KCLBASE', 'LABE100I', 'KCL20P', 'D5W250', 'N...",[41],"['431', '5070', '4280', '5849', '2765', '4019']"


In [None]:
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8)

In [19]:
len(msk)

39363

In [20]:
for i in enumerate(dataloader):
        img, target = data
        target = Variable(target) #.cuda()
        img = Variable(img) #.cuda()
        features = model(img).detach()
        prediction = classifier(features.view(features.size(0), -1))
        loss = criterion(prediction, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        pred = prediction.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

KeyError: Caught KeyError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/Users/nicenoize/opt/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py", line 2646, in get_loc
    return self._engine.get_loc(key)
  File "pandas/_libs/index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 1618, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 1626, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 34709

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/nicenoize/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
    data = fetcher.fetch(index)
  File "/Users/nicenoize/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/Users/nicenoize/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/Users/nicenoize/opt/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py", line 2800, in __getitem__
    indexer = self.columns.get_loc(key)
  File "/Users/nicenoize/opt/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py", line 2648, in get_loc
    return self._engine.get_loc(self._maybe_cast_indexer(key))
  File "pandas/_libs/index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 1618, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 1626, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 34709


In [None]:
import os
import time

import torch
#import torchvision
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
#from torchvision import transforms
#from torchvision.datasets import MNIST, CIFAR10
#from torchvision.utils import save_image

#from model import StackedAutoEncoder

num_epochs = 1000
batch_size = 128

#dataset = CIFAR10('../data/cifar10/', transform=img_transform)
#dataset = FeatureDataset('patient_vecs.csv')


dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8)

model = StackedAutoEncoder() #.cuda()

for epoch in range(num_epochs):
    if epoch % 10 == 0:
        # Test the quality of our features with a randomly initialzed linear classifier.
        classifier = nn.Linear(512 * 16, 10) #.cuda()
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)

    model.train()
    total_time = time.time()
    correct = 0
    for i, data in enumerate(dataloader):
        img, target = data
        target = Variable(target) #.cuda()
        img = Variable(img) #.cuda()
        features = model(img).detach()
        prediction = classifier(features.view(features.size(0), -1))
        loss = criterion(prediction, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        pred = prediction.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    total_time = time.time() - total_time

    model.eval()
    img, _ = data
    img = Variable(img) #.cuda()
    features, x_reconstructed = model(img)
    reconstruction_loss = torch.mean((x_reconstructed.data - img.data)**2)

    if epoch % 10 == 0:
        print("Saving epoch {}".format(epoch))
        orig = to_img(img.cpu().data)
        save_image(orig, './imgs/orig_{}.png'.format(epoch))
        pic = to_img(x_reconstructed.cpu().data)
        save_image(pic, './imgs/reconstruction_{}.png'.format(epoch))

    print("Epoch {} complete\tTime: {:.4f}s\t\tLoss: {:.4f}".format(epoch, total_time, reconstruction_loss))
    print("Feature Statistics\tMean: {:.4f}\t\tMax: {:.4f}\t\tSparsity: {:.4f}%".format(
        torch.mean(features.data), torch.max(features.data), torch.sum(features.data == 0.0)*100 / features.data.numel())
    )
    print("Linear classifier performance: {}/{} = {:.2f}%".format(correct, len(dataloader)*batch_size, 100*float(correct) / (len(dataloader)*batch_size)))
    print("="*80)

torch.save(model.state_dict(), './CDAE.pth')

In [6]:
for idx in enumerate(dataset):
    print(idx)

NameError: name 'dataset' is not defined

In [40]:
import torch
import numpy as np
from torch.utils.data.sampler import SubsetRandomSampler

class CustomDatasetFromCSV(Dataset):
    def __init__(self, csv_path, transform=None):
        self.data = pd.read_csv(csv_path)
         self.height = 48
        self.width = 48
        self.transform = transform

    def __getitem__(self, index):
        # This method should return only 1 sample and label 
        # (according to "index"), not the whole dataset
        # So probably something like this for you:
        pixel_sequence = self.data['pixels'][index]
        face = [int(pixel) for pixel in pixel_sequence.split(' ')]
        face = np.asarray(face).reshape(self.width, self.height)
        face = cv2.resize(face.astype('uint8'), (self.width, self.height))
        label = self.labels[index]

        return face, label

    def __len__(self):
        return len(self.labels)


dataset = CustomDatasetFromCSV('patient_vecs.csv')
batch_size = 16
validation_split = .2
shuffle_dataset = True
random_seed= 42

# Creating data indices for training and validation splits:
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
if shuffle_dataset :
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

KeyError: 'emotion'