# **Linear Regression**

In [5]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#Hyperparameters

input_size = 1
output_size = 1
number_epochs = 60
learning_rate = 0.001



#Toy DataSet

x_train = np.array([[3.3], [4.4], [5.5], [6.71], [6.93], [4.168],
                    [9.779], [6.182], [7.59], [2.167], [7.042],
                    [10.791], [5.313], [7.997], [3.1]],dtype= np.float32)

y_train = np.array([[1.7], [2.76], [2.09], [3.19], [1.694], [1.573],
                    [3.366], [2.596], [2.53], [1.221], [2.827],
                    [3.465], [1.65], [2.904], [1.3]], dtype= np.float32)

# Linear regression model
model = nn.Linear(input_size, output_size)


#loss and optimizar
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

#Train the model
loss = 0
for epoch in range(number_epochs):

  #convert numpy arrays to tensors
  inputs = torch.from_numpy(x_train)
  targets = torch.from_numpy(y_train)

  outputs = model(inputs)
  loss = criterion(outputs, targets)

  #backward propagation and optimize
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()
  if (epoch+1) % 5 == 0:
    print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, number_epochs, loss.item()))



#Plot the graph
predicted = model(torch.from_numpy(x_train)).detach.numpy()
plt.plot(x_train, y_train, 'ro', label= 'Original data')
plt.plot(x_train, predicted, label = 'Fitted line')
plt.legend()
plt.show()


# Save the model checkpoint
torch.save(model.state_dict(),'model.ckpt')










# **Logistic Regression**

In [None]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torchvision
import torchvision.transforms as transforms


#Hyperparameters

learning_rate = 0.001
number_epochs = 5
num_classes = 10
batch_size = 100
input_size = 28*28


## MNIST Dataset (images and labels)

train_dataset = torchvision.datasets.MNIST(root='../../data',
                                        train=True,
                                        transform= transforms.ToTensor(),
                                        download=True)


test_dataset = torchvision.datasets.MNIST(root='../../data',
                                       train=False,
                                       transform=transforms.ToTesnor(),
                                       download=True)



#Data loader (input pipline)
train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True
                                           )


test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                          batch_size = batch_size,
                                          shuffle = False)



#Logistic Regression Model
model = nn.Linear(input_size, num_classes)



#loss and optimizer
# nn.CrossEntropyLoss() computes softmax internally
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate)

#Train the model
total_step = len(train_loader)

for epoch in range(number_epochs):

  for i, (image,target) in enumerate(train_loader):

        # Reshape images to (batch_size, input_size)
        images = images.reshape(-1, input_size)
        output = model(input)
        loss = criterion(output,target)

        #Backward and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
          print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                 .format(epoch+1, number_epochs, i+1, total_step, loss.item()))

#Test the model
#In test phase, we don't need to compute the gradient
with torch.no_grad():
  correct = 0
  total = 0
  for images, labels in test_loader:
    images = images.shape(-1, input_size)
    outputs = model(images)
    _ , predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

  print('Accuracy of the model on the 10000 text images: {}%'.format(100* correct/total))


# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')






# **Feedforward_Neural_Network**

In [15]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import torchvision
import torchvision.transforms as transforms



#Device configuration

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#Hyperparameters
learning_rate = 0.001
batch_size = 100
num_epochs = 5
num_classes = 10
input_size = 28*28
hidden_size = 500




# MNIST DataSet (images, labels)

train_data = torchvision.datasets.MNIST(root = '../../data',
                                        train = True,
                                        transform = transforms.ToTensor(),
                                        download = True)

test_data = torchvision.datasets.MNIST(root = '../../data',
                                       train = False,
                                       transform = transforms.ToTensor(),
                                       download = True)


#DataLoader (Data Pipline)

train_loader = torch.utils.data.DataLoader(dataset = train_data,
                                           batch_size = batch_size,
                                           shuffle = True)


test_loader = torch.utils.data.DataLoader(dataset = test_data,
                                          batch_size = batch_size,
                                          shuffle = False)



#Fully conected neural network with one hidden layer
class NeuralNet(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

  def forward(self,x):
    out = self.fc1(x)
    out = self.relu(out)
    out = self.fc2(out)
    return out


#calling the model (FeedForward Neural Network)
model = NeuralNet(input_size, hidden_size, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)


#Train the model
Total_steps = len(train_loader)


for epoch in range(num_epochs):
  for i,(images,targets) in enumerate(train_loader):

    #Move tensors to the configured device
    inputs = images.reshape(-1,input_size).to(device)
    labels = targets.to(device)


    #Forward pass
    outputs = model(inputs)
    loss = criterion(outputs,labels)


    #Backward and Optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


    if (i+1)%100 ==0:
      print('Epoch[{}/{}], Step[{}/{}], Loss: {:.4f}'
      .format(epoch+1, num_epochs, i+1, Total_steps, loss.item()))



#Test the model
#In test phase, we don't need to compute the gradients(for memory efficiency)
with torch.no_grad():
  correct = 0
  total = 0
  for images, labels in test_loader:
    images= images.reshape(-1,input_size).to(device)
    labels = labels.to(device)

    outputs = model(images)
    _ , predicted = torch.max(outputs.data,1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()


  print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))



#Save the model checkpoint
torch.save(model.state_dict(),'model.ckpt')



Epoch[1/5], Step[100/600], Loss: 0.4934
Epoch[1/5], Step[200/600], Loss: 0.3000
Epoch[1/5], Step[300/600], Loss: 0.3203
Epoch[1/5], Step[400/600], Loss: 0.1805
Epoch[1/5], Step[500/600], Loss: 0.0965
Epoch[1/5], Step[600/600], Loss: 0.1907
Epoch[2/5], Step[100/600], Loss: 0.0762
Epoch[2/5], Step[200/600], Loss: 0.0525
Epoch[2/5], Step[300/600], Loss: 0.1232
Epoch[2/5], Step[400/600], Loss: 0.0830
Epoch[2/5], Step[500/600], Loss: 0.2476
Epoch[2/5], Step[600/600], Loss: 0.0931
Epoch[3/5], Step[100/600], Loss: 0.0366
Epoch[3/5], Step[200/600], Loss: 0.0419
Epoch[3/5], Step[300/600], Loss: 0.1308
Epoch[3/5], Step[400/600], Loss: 0.1272
Epoch[3/5], Step[500/600], Loss: 0.0857
Epoch[3/5], Step[600/600], Loss: 0.1357
Epoch[4/5], Step[100/600], Loss: 0.0624
Epoch[4/5], Step[200/600], Loss: 0.0449
Epoch[4/5], Step[300/600], Loss: 0.0162
Epoch[4/5], Step[400/600], Loss: 0.0579
Epoch[4/5], Step[500/600], Loss: 0.1076
Epoch[4/5], Step[600/600], Loss: 0.0265
Epoch[5/5], Step[100/600], Loss: 0.0172


# **Convolutional Neural Network**

### Formula for output dimensions after convolution:


Output Size (H, W)=  ((Input_Size(H,W)+ 2*Padding - Kernel Size) / Stride) +1


### Formula for output dimensions after pooling:

Output Size (H,W)= ((Input Size (H,W) - Kernel Size)/ Stride) + 1

In [16]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

#Device Configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')



#Hyperparameters
learning_rate = 0.001
batch_size = 60
num_epochs = 5
num_classes = 10
input_size = 28*28
hidden_size = 100


#MNIST Dataset

train_data = torchvision.datasets.MNIST(root = '../../data',
                                        train = True,
                                        transform = transforms.ToTensor(),
                                        download = True)

test_data = torchvision.datasets.MNIST(root = '../../data',
                                       train = True,
                                       transform = transforms.ToTensor(),
                                       download = True)

#DataLoader (Data Pipline)

train_loader = DataLoader(dataset = train_data, batch_size = batch_size , shuffle = True)

test_loader = DataLoader(dataset = test_data, batch_size = batch_size , shuffle = False)


#Convolutional neural network (two convolutional layers)

class ConvNet(nn.Module):
  def __init__(self, num_classes=10):
    super(ConvNet, self).__init__()
    self.layer1= nn.Sequential(
        nn.Conv2d(1, 16, kernel_size=5, stride=1, padding= 2),
        nn.BatchNormal2d(16),
        nn.Relu(),
        nn.MaxPool2d(kernel_size=2, stride=2)
    )
    self.layer2 = nn.Sequential(
        nn.Conv2d(16, 32, kernel_size= 5, stride=1, padding=2),
        nn.BatchNormal2d(16),
        nn.Relu(),
        nn.MaxPool2d(kernel_size=2, stride=2))
    self.fc = nn.Linear(7*7*32, num_classes)

  def forward(self,x):
    out = self.layer1(x)
    out = self.layer2(out)
    out = out.reshape(out.size(0), -1)
    out = self.fc(out)

    return out


#Optimizar and loss
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)


model = ConvNet(num_classes).to(device)


#Train the model
total_steps = len(train_loader)

for epoch in range(num_epochs):

  for i, (images,labels) in enumerate(train_loader):

    #Move tensors to the configured device
    images = images.to(device)
    labels = labels.to(device)

    #Forward Phase
    outputs = model(images)
    loss = criterion(outputs,labels)

    ## Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (i+1)%100 == 0 :
      print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_steps, loss.item()))







#Test the model
#In test phase, we don't need to compute the gradients(for memory efficiency)
with torch.no_grad():

  correct = 0
  total = 0

  for images, labels in test_loader:

    images = images.to(device)
    labels = labels.to(device)

    outputs = model(images)
    _ , predicted = torch.max(outputs.data,1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

  print('Accuracy of the network on the 10000 test images: {}%'.format(100 *correct / total))
  print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))


#Save the model checkpoint
torch.save(model.state_dict(),'model.ckpt')


# **Deep Residual Network**

In [23]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms



#Device Configuration
#device =. torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#Hyperparameters

learning_rate = 0.001
batch_size = 100
num_epochs = 80

#Image Preprocessing modules (Transformers)
transform = transforms.Compose([
    transforms.Pad(4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor()]
)




#CIFAR10 Dataset
train_data = torchvision.datasets.CIFAR10(root = '../../data',
                                         train = True,
                                         transform = transform,
                                         download = True)

test_data = torchvision.datasets.CIFAR10(root = '../../data',
                                        train = False,
                                        transform = transform,
                                        download = True)



#DataLoader (Pipline)
train_loader = torch.utils.data.DataLoader(dataset = train_data,
                                           batch_size = batch_size,
                                           shuffle = True,
                                           )

test_loader = torch.utils.data.DataLoader(dataset = test_data,
                                           batch_size = batch_size,
                                           shuffle = True,
                                           )


#Residual Networks Model


# first we define the 3*3 convolution
def conv3x3(in_channels, out_channels, stride=1):
  return nn.Conv2d(in_channels out_channels, kernel_size=5, stride = stride,  padding = 1, bias = False)




#We Define Residual Block
class ResidualBlock(nn.Module):
  def __init__(self, in_channels, out_channels, stride=1, downsample=None):
    super(ResidualBlock, self).__init__()
    self.conv1 = conv3x3(in_channels, out_channels, stride)
    self.bn1 = nn.BatchNorm2d(out_channels)
    self.relu = nn.ReLU(inplace=True)
    self.conv2 = conv3x3(out_channels, out_channels)
    self.bn2 = nn.BatchNormal2d(out_channels)
    self.downsample = downsample

  def forward(self,x):
    residual = x
    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)
    out = self.conv2(out)
    out = self.bn2(out)
    if self.downsample:
      residual = self.downsample(x)
    out += residual
    out = self.relu(out)
    return out

#Then we define ResNet
class ResNet(nn.Module):
  def __init__(self, block, layers, num_classes=10):
    super(ResNet, self).__init__()
    self.in_channels = 16
    self.conv = conv3x3(3, 16)
    self.bn = nn.BatchNorm2d(16)
    self.relu = nn.ReLU(inplace = True)
    self.layer1 = self.make_layer(block, 16, layers[0])
    self.layer2 = self.make_layer(block, 32, layers[1], 2)
    self.layer3 = self.make_layer(block, 64, layers[2], 2)
    self.avg_pool = nn.AvgPool2d(8)
    self.fc = nn.Linear(64, num_classes)

  def make_layer(self, block, out_channels, blocks, stride=1):
    downsample = None
    if (stride != 1) or (self.in_chnnels != out_channels):
      downsample = nn.sequential(
          conv3x3(self.in_channels, out_channels, stride = stride),
          nn.BatchNorm2d(out_channels))
      layers = []
      layers.append(block(self.in_channels, out_channels, stride,downsample))
      self.in_channels. = out_channels
      for i in range(1, blocks):
        layers.append(block(out_channels, out_channels))
      return nn.Sequential(*layers)



    def forward(self, x):
        out = self.conv(x)
        out = self.bn(out)
        out = self.relu(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out



#calling the model
model = ResNet(ResidualBlock, [2, 2, 2]).to(device)


#Optimizer and Loss
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
criterion = nn.CrossEntropyLoss()


#Update Learning Rate
def update_lr(optimizer, lr):
  for param_group in optimizer.param_groups:
    param_group['lr'] = lr



#Train the model
total_step = len(train_loader)
curr_lr = learning_rate

for epoch in range(num_epochs):

  for i, (images, labels) in enumerate(train_loader):
    images = images.to(device)
    labels = labels.to(device)


    #Forward pass
    outputs = model(images)
    loss = criterion(outputs, labels)

    #Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


    if (i+1) % 100 == 0:
      print("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}"
      .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

    if (epoch+1)%20 == 0:
      curr_lr /= 3
      update_lr(optimizer, curr_lr)



#Test the model

model.eval()
with torch.no_grad():
  correct = 0
  total = 0
  for images, labels in test_loader:
    images = images.to(device)
    labels = labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

  print('Accuracy of the model on the test images: {}%'.format(100*correct/total))


#save the model checkpoint
torch.save(model.state_dict(), 'resnet.ckpt')


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../../data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:11<00:00, 14.6MB/s]


Extracting ../../data/cifar-10-python.tar.gz to ../../data
Files already downloaded and verified


# **Recurrent Neural Network**

In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

#Device Configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#Hyperparametes
sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 100
num_epochs = 2
learning_rate  = 0.01




# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root = '../../root',
                                           train = True,
                                           transform = transforms.ToTensor(),
                                           download = True)

test_dataset = torchvision.datasets.MNIST(root = '../../root',
                                          train = False,
                                          transform = transforms.ToTensor(),
                                          download = True)

# Data Loader

train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)

Test_loader = torch.utils.data.DataLoader(dataset = test_datatset,
                                          batch_size = batch_size,
                                          shuffle = False)





#Model Recurrent Neural Network (Many to one)
class RNN(nn.Mudole):
  def __init__(self, input_size, hidden_size, num_layers, num_classes):
    super(RNN, self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first = True)
    self.fc = nn.Linear(hidden_size, num_classes)


  def forward(self, x):

      #set initial hidden and cell states
      h0 = torch.zeros(self.num_layers, x_size(0), self.hidden_size).to(device)
      c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)


      #Forward Propagate LSTM
      out, _ = self.lstm(x, (h0, c0))

      # Decode the hidden state of the last time step
      out = self.fc(out[:, -1, :])
      return out


model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)





#optimizer and loss
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
loss = nn.CrossEntropyLoss()





#Train the model
Total_step = len(train_loader)
for epoch in range(num_epochs):

  for i,(images, targets) in enumerate(train_loader):
    #Trnasfer the train data into the device in a suitable shape
    images = images.reshape(-1, sequence_length, input_size).to(device)
    labels = targets.to(devie)



    #Outputs
    outputs = model(images)
    loss = criterion(outputs, labels)

    #Optimize and backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (i+1)%100 == 0 :
      print('Epochs[{}/{}], Step [{}/{}], Loss: {:.4f}'
      .format(epoch+1, num_epochs, i+1, total_step, loss.item()))




#Test the model

model.evel()

with torch.no_grad():

  correct = 0
  total = 0

  for images,targets in test_loader:

    images = images.reshape(-1,sequence_length,input_size).to(device)
    labels = targets.to(device)
    outputs = model(images)




    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()


  print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))


#Save the model
torch.save(model.state_dict(), 'model.ckpt')





# **Bidirectional Recurrent Neural Network**

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import pandas as pd




#Dvice Configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')



#Hyperparameters

sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_epochs = 2
batch_size = 70
learning_rate = 0.003
num_classes =10



#Dataset

train_data = torchvision.datasets.MNIST(root = '../../data',
                                        train = True,
                                        transforms = transforms.ToTensor(),
                                        download = True)

test_data = torchvision.datasets.MNIST(root = '../../data',
                                       train = False,
                                       transforms = transforms.ToTensor(),
                                       download = True)

#DataLoader (Pipline)
train_loader = torch.utils.data.DataLoader(dataset = train_data,
                                           batch_size =batch_size,
                                           shuffle = True)

test_loader =torch.utils.data.DataLoader(dataset = test_data,
                                         batch_size = batch_size
                                         shuffle =False)




#Bidirectional recurrent neural network(many-to-one)

class BiRNN(nn.Module):
  def __init__(self, input_size, hidden_size, num_layers, num_classes):
    super(BiRNN, self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first = True, bidirectional = True)
    slef.fc = nn.Linear(hidden_size*2, num_classes)

  def forward(self, x):

    #set the initial states
    h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_layers).to(device)
    c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_layers).to(device)


    #Forward Propagate LSTM

    out, _ = self.lstm(x,(h0,c0))


    # Decode the hidden state of the last time step
    out = self.fc(out[:,-1,:])

    return out


#optimizer and loss

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)



#Train the model

total_steps = len(train_loader)

for epoch in range(num_epochs):

  for i, (images, labels) in enumerate(train_loader):

    #transfer data into the model
    images = images.reshape(-1, sequence_length, input_size).to(device)
    labels = labels.to(device)

    #prediction and loss value
    outputs = model(images)
    loss = criterion(outputs,labels)

    #Optimization and Backward propagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


  print('Epoch [{}/{}], Step [{}/{}], Loss {:.4f}'.format(epoch_1, num_epochs, i+1, total_steps, loss.item()))



#Test the model
model.eval()
with torch.no_grad():

  total = 0
  correct = 0

  for images,labels in test_loader:

    images = images.reshape(-1, sequence_length, input_size).to(device)
    labels = labels.to(device)
    outputs = model(images)

    _, predicted = torch.max(outputs.data,1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

print('Test Accuracy of the model on 1000 test images: {}%'.format(100*correct/total))

#save the model
torch.save(model.state_dict, 'model.ckpt')








# **language_model**

In [None]:
import torch
import torch.nn as nn
import torchvision
from torch.nn.utils import clip_grad_norm_
from data_utils import Dictionary, Corpus

#Device_Configuration

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


#Hyperparameters
num_epochs =  5
embed_size = 128
hidden_size = 1024
num_samples = 1000     # number of words to be sampled
num_layers = 1
batch_size = 20
learning_rate = 0.002
seq_length = 30


#Load "Penn Treebank" dataset
corpus = Corpus()
ids = corpus.get_data('data/train.txt', batch_size)
vocab_size  = len(corpus.dictionary)
num_batches = ids.size(1) // seq_length



#RNN based Language Model
class RNNLM(nn.Module):
  def __init__(self, vocab_size, embed_size, num_layers, hidden_size):
    super(RNNLM,self).__init__()
    self.embed = nn.Embedding(vocab_size, embed_size)
    self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
    self.linear = nn.Linear(hidden_size, num_classes)


  def forward(self,x):

    #Embed word ids to vectors
    x = self.embed(x)


    #Forward Propagate LSTM
    out, (h, c) = self.lstm(x, h)

    #Linear forward,  Reshape output to (batch_size*sequence_length, hidden_size)
    out = out.reshape(out.size(0)*out.size(1), out.size(2))

    #Decode hidden states of all time steps
    out = self.linear(out)

    return out, (h, c)


model = RNNLM(vocab_size, embed_size, num_layers, hidden_size)


#Optimizer and loss
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
criterion = nn.CrossEntropyLoss()

# Truncated backpropagation
def detach(states):
  return [state.detach() for state in states]

#Train the model
for epoch in range(num_epochs):

  #Set initial hidden and cell states
  states = (torch.zeros(num_layers, batch_size, hidden_size).to(device),
            torch.zeros(num_layers, batch_size, hidden_szie).to(device))


  for i in range(0, ids.size(1)-seq_length,seq_length):
    #Get mini_batch inputs and outputs
    inputs = ids[:,i,i+seq_length].to(device)
    targets = ids[:,i+1,i+1+seq_length].to(device)


    #states, Forward phase
    states = detach(states)
    outputs, states = model(inputs,states)
    loss = criterion(outputs, targets.reshape(-1))

    #Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    #prevent explodig gradients and vanishing gradients
    clip_grad_norm_(model.parameters(),0.5)
    optimizer.step()

    step = (i+1) // seq_length

    if(step%100 ==0):
      print('Epoch [{}/{}], Step [{}/[]], loss:{.4f}'.format(
          epoch+1, num_batches, step, total_step, loss.item(), np.exp(loss.item())))


#Test the Model

with torch.no_grad():

  with open('sample.txt', 'w') as f:
    #set initial hidden and cell states
    state = (torch.zeros(num_layers, 1, hidden_size).to(device),
             torch.zeros(num_layers, 1, hidden_size).to(device))

    #Select one word id randomly
    prob = torch.ones(vocab_size)
    input = torch.multinomial(prob,num_samples=1).unsqueeze(1).to(device)

    for i in range(num_samples):

      #Forward propagate RNN
      output, state = model(input, state)

      #Sample a word id
      prob = output.exp()
      word_id = torch.multinomial(prob, num_samples=1).item()

      #Fill input with sampled word id for the next time step
      input.fill(word_id)

      #File write
      word = curpus.dictionary.idx2word[word_id]
      word = '\n' if word == '<eos>' else word + ' '
      f.write(word)


      if (i+1) % 100 == 0:
        print('Sampled [{}/{}] words and save to {}'.format(i+1, num_samples, 'sample.txt'))



#Save the model checkpoints
torch.save(model.state_dict(), 'model.ckpt')


# **Generative Adverserial Network**

In [None]:
import torch.nn
import os
import torchvision
from torchvision.transforms import transforms
from torchvision.utils import save_image

#Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#Hyperparameters
learning_rate = 0.001
latent_size = 64
hidden_size = 256
image_size = 784
num_epochs = 200
batch_size = 100
sample_dir = 'samples'


#Create a directory if not exists
if not os.path.exists(sample_dir):
  os.makedirs(sample_dir)


# Image processing
# transform = transforms.Compose([
#                 transforms.ToTensor(),
#                 transforms.Normalize(mean=(0.5, 0.5, 0.5),   # 3 for RGB channels
#                                      std=(0.5, 0.5, 0.5))])

#Image processing.   # 1 for greyscale channels
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std = [0,5])

])


#MNIST dataset
mnist = torchvision.datasets.MNIST(root = '../../data/',
                                   train=True,
                                   transform = transform,
                                   download = True)



#DataLoader
data_loader = torch.utils.data.DataLoader(dataset = mnist,
                                          batch_size = batch_size,
                                          shuffle =True)


#Discriminator
D = nn.Sequential(
    nn.Linear(image_size, hidden_size),
    nn.LeakyRelu(0.2),
    nn.Linear(hidden_size, hidden_size),
    nn.LeakyRelu(0.2),
    nn.Linear(hidden_size, 1),
    nn.Sigmoid())

#Generator
G = nn.Sequential(
    nn.Linear(latent_size, hidden_size),
    nn.ReLU(),
    nn.Linear(hidden_size, hidden_size),
    nn.ReLU(),
    nn.Linear(hidden_size, image_size),
    nn.Tanh())


#Device Setting
D = D.to(device)
G = G.to(device)


#Binary_Cross Entropy Loss and Optimizer
criterion = nn.BCELoss()
d_optimizer = torch.optim.Adam(D.parameters(), lr = 0.0002)
g_optimizer = torch.optim.Adam(G.parameters(), lr = 0.0002)


#denorm
def denorm(x):
  out = (x+1) / 2
  return out.clamp(0,1)


def reset_grad():
  d_optimizer.zero_grad()
  g_optimizer.zero_grad()



#Starting Training
total_step = len(data_loader)
for epoch in range(num_epochs):


  for i, (images, _) in enumerate(data_loader):

    images = images.reshape(batch_size, -1).to(device)


    # Create the labels which are later used as input for the BCE loss
    real_labels = torch.zeros(batch_size, 1).to(device)
    fake_labels = torch.zeros(batch_size, 1).to(device)

    #################Train the discriminator################
    #Compute the BCE Loss using realimages where BCE_Loss(x,y): -y * log(D(x)) - (1 - y)* log(1 - D(x))
    #Second term of the loss is always zero since real_labels == 1

    outputs = D(images)
    d_loss_real = criterion(outputs, real_labels)
    real_score = outputs

    #Compute BCELoss using Fake Images
    #First term of the loss is always zero since fake_labels === 0

    z = torch.randn(batch_size, latent_size).to(device)
    fake_images = G(z)
    outputs = D(fake_images)
    d_loss_fake = criterion(outputs, fake_labels)
    fake_score = outputs

    #Backprop and optimize
    d_loss = d_loss_real + d_loss_fake
    reset_grad()
    d_loss.backward()
    d_optimizer.step()


    ###############Train the generator#############

    #Compute the loss with fake images
    #Instead of minimizing log(1-(D(G(x))), we try to maximize log(D(G(x))) by training G(x)!
    g_loss = criterion(outputs, real_labels)

    #Backprop and optimize
    reset_grad()
    g_loss.backward()
    g_optimizer.step()

    if(i+1) % 200 == 0:
      print('Epoch [{}/{}], Step [{}/{}], d_loss: {:.4f}, g_loss: {:.4f}, D(x): {:.2f}, D(G(z)): {:.2f}'
      .format(epoch, num_epochs, i+1, total_step, d_loss.item(), g_loss.item(),
              real_score.mean().item(), fake_score.mean().item()))



  #Save the real images
  if (epoch+1) == 1:
    images = images.reshape(images.size(0), 1, 28, 28)
    save_image(denorm(images), os.path.join(sample_dir, 'real_images.png'))


   # Save sampled images
   fake_images = fake_images.reshape(fake_images.size(0), 1, 28, 28)
   save_image(denorm(fake_images), os.path.join(sample_dir, 'fake_images-{}.png'.format(epoch+1)))

# Save the model checkpoints
torch.save(G.state_dict(), 'G.ckpt')
torch.save(D.state_dict(), 'D.ckpt')


# **Variational Autoencoder**

In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision.transforms import transforms
from torchvision.utils import save_image

#Device Configuration
device = torch.device('cuda' torch.cuda.is_available() else 'cpu')

#Create a directory if not exist
sample_dir = 'samples'
if not os.path.exists(sample_dir):
  os.makedirs(sample_dir)


#Hyperparameters
image_size = 28*28
h_dim = 400
z_dim = 20
num_epochs = 15
batch_size = 128
learning_rate = 1e-3


# MNIST dataset
train_data = torchvision.datasets.MNIST(root = '../../data',
                                        train = True,
                                        transform = transforms.ToTensor(),
                                        download = True)

#DataLoader
train_loader = torch.utils.data.DataLoader(dataset = train_data,
                                           batch_size = batch_size,
                                           shuffle = True)


# VAE model
class VAE(nn.Module):
  def __init__(self,image_size= 784, h_dim = 400, z_dim = 20):
    super(VAE, self).__init__()
    self.fc1 = nn.Linear(image_size, h_dim)
    self.fc2 = nn.Linear(h_dim, z_dim)
    self.fc3 = nn.Linear(h_dim, z_dim)
    self.fc4 = nn.Linear(z_dim, h_dim)
    self.fc5 = nn.Linear(h_dim, image_size)

  def encode(self, x):
    h = F.relu(self.fc1(x))
    return self.fc2(h), self.fc3(h)

  def reparameterize(self, mu, log_var):
    std = torch.exp(log_var/2)
    eps = torch.randn_like(std)
    return mu + eps * std

  def decode(self,z):
    h = F.relu(self.fc4(z))
    return F.sigmoid(self.fc5(h))


  def forward(self, x):
    mu, log_var = self.encode(x)
    z = self.reparameterize(mu, log_var)
    x_reconst = self.decode(z)
    return x_reconst, mu, log_var


#Optimizer and Calling model
model = VAE().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

#Training the model
for epoch in range(num_epochs):

  for i,(x,_) in enumerate(train_loader):

    #Forward pass
    x = x.to(device).view(-1, image_size)
    x_reconst, mu, log_var = model(x)

    #Compute Reconstruction Loss and KL Divergence
    #For KL divergence, see Appendix B in VAE paper or http://yunjey47.tistory.com/43
    reconst_loss = F.binary_cross_entropy(x_reconst, x, size_average= False)
    kl_div = -0.5*torch.sum(1 + log_var - mu.pow(2) - log_var.exp())


    #Backprop and optimize
    loss = reconst_loss + kl_div
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (i+1) % 10 == 0 :
      print('Epoch [{}/{}], Step [{}/{}], Reconst Loss: {:.4f}, Kl Div: {:.4f}'
            .format(epoch+1, num_epochs, i+1, len(data_loader), reconst_loss.item(), kl_div.item()))


  with torch.no_grad():

    #save the sampled images
    z = torch.randn(batch_size, z_dim).to(device)
    out = model.decode(z).view(-1,1,28,28)
    save_image(out, os.path.join(sample_dir, 'sampled-{}.png'.format(epoch+1)))

    #save the reconstructed images
    out, _, _ = model(x)
    x_concat = torch.cat([x.view(-1,1,28,28), out.view(-1, 1, 28, 28)], dim =3)
    save_image(x_concat, os.path.join(sample_dir, 'reconst-{}.png'.format(epoch+1)))



# **Transfer Style Learning**

In [None]:
import torch
import torch.nn as nn
import torchvision
from torchvision.transforms import transforms
import numpy as np
from PIL import image
import argparse
from __future__ import division


#Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#Loading images
def load_image(image_path, transform = None, max_size = None, shape = None):
  """load an image and convert it to a torch tensor"""
  image = Image.open(image_path)


  if max_size:
    scale = max_size/ max(image.size)
    size = np.array(image.size)*scale
    image = image.resize(size.astype(int), Image.ANTIALIAS)


  if shape:
    image = image.resize(shape, Image.LANCZOS)


  if transform:
    image = transform(image).unsqueeze(0)

  return image.to(device)


class VGGNet(nn.Module):
  def __init__(self):

    """Select conv1_1 ~ conv5_1 activation maps."""
    super(VGGNet, self).__init__()
    self.select = ['0', '5', '10', '19', '28']
    self.vgg = models19.vgg19(pretrained = True).features

  def forward(self,x):
    """Extract multiple convolutional feature maps."""
    features = []
    for name, layers in self.vgg._modules.items():
      x = layer(x)
      if name in self.select:
        features.append(x)

    return features


def main(config):

    # Image preprocessing
    # VGGNet was trained on ImageNet where images are normalized by mean=[0.485, 0.456, 0.406] and std=[0.229, 0.224, 0.225].
    # We use the same normalization statistics here.

    transform = transforms.Compose([transforms.ToTensor(),
                                    transforms.Normalize(mean = (0.485, 0.456, 0.406),
                                                         std = (0.229, 0.224, 0.225))])


    # Load content and style images
    # Make the style image same size as the content image

    content = load_image(config.content, transform, max_size= config.max_size)
    style = load_image(config.style, transform, shape= [content.size(2), content.size(3)])


    # Initialize a target image with the content image
    target = content.clone().requires_grad_(True)


    #Optimizer and calling the model
    optimizer  = torch.optim.Adam([target], lr = learning_rate, betas=[0.5, 0.999])
    vgg = VGGNet().to(device).eval()


    for step in range(config.total_step):

      # Extract multiple(5) conv feature vectors
      target_features = vgg(target)
      style_features = vgg(style)
      content_features = vgg(content)


      style_loss = 0
      content_loss = 0


      for f1,f2,f3 in zip(target_features, content_features, style_features):

        #Compute the content loss with target and content images
        content_loss += torch.mean((f1 - f2)**2)

        #Reshape Convolutional Feature Maps
        _, c, h, w = f1.size()
        f1 = f1.view(c, h*w)
        f3 = f3.view(c, h*w)

        #Compute the gram matrix between the feature maps of each images(style and content)
        f1 = torch.mm(f1,f1.t())
        f3 = torch.mm(f3,f3.t())

        #Compute the style loss with target and style images
        style_loss += torch.mean((f1-f3)**2) / c*h*w

        #Compute total loss, Back Propagation, and Optimize
        loss = content_loss + config.style_weight * style_loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


        if (step+1) % config.log_step == 0:
          print('Step [{}/{}], Content Loss: {:.4f}, Style Loss: {:.4f}'
                .format(step+1, config.total_step, content_loss.item(), style_loss.item()))

        if (step+1) % config.sample_step == 0:
          #save the generated image
          denorm = transform.Normalize((-2.12, -2.04, -1.80),(4.37, 4.46, 4.44))
          img = target.clone().squeeze()
          img = denorm(img).clamp_(0,1)
          torchvision.utils.save_image(img, 'output-{}.png'.format(step+1))


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--content', type=str, default='png/content.png')
    parser.add_argument('--style', type=str, default='png/style.png')
    parser.add_argument('--max_size', type=int, default=400)
    parser.add_argument('--total_step', type=int, default=2000)
    parser.add_argument('--log_step', type=int, default=10)
    parser.add_argument('--sample_step', type=int, default=500)
    parser.add_argument('--style_weight', type=float, default=100)
    parser.add_argument('--lr', type=float, default=0.003)
    config = parser.parse_args()
    print(config)
    main(config)


# **Image Captioning**

In [None]:
import torch.nn as nn
import torch
import torchvision.models as models
from torch.nn.utils.rnn import pack_padded_sequence

#define an Encoder to extract features from the images

class EncoderCNN(nn.Module):
  def __init__(self,embed_size):

    """Load the pretrained ResNet-152 model and replace top fc layer."""
    super(EncoderCNN, self).__init__()
    resnet = models.resnet152(pretrained = True)

    # delete the last fc layer.
    modules = list(resnet.children())[:-1]
    self.resnet = nn.Sequential(*modules)
    self.linear = nn.Linear(resnet.fc.in_features, embed_size)
    self.bn = nn.BatchNorm1d(embed_size, momentum=0.01)

    def forward(self, images):

      """Extract feature vectors from input images."""
      with torch.no_grad():
        features = self.resnet(images)

      features = features.reshape(features.size(0), -1)
      features = self.bn(self.linear(features))
      return features


class DecoderRNN(nn.Module):

  def __init__(self, embed_size, hidden_size, vocab_size, num_layers, max_seq_length= 20):
    """Set the hyper-parameters and build the layers."""
    super(DecoderRNN, self).__init__()
    self.embed = nn.Embedding(vocab_size, embed_size)
    self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
    self.linear = nn.Linear(hidden_size, vocab_size)
    self.max_seg_length = max_seq_length


    def forward(self, features, captions, lengths):
      """Decode image feature vectors and generates captions."""
      embeddings = self.embed(captions)
      embeddings = torch.cat((features.unsqueeze(1), embeddings), 1)
      packed = pack_padded_sequence(embeddings, lengths, batch_first=True)
      hiddens, _ = self.lstm(packed)
      outputs = self.linear(hiddens[0])
      return outputs

    def sample(self, features, states=None):
      """Generate captions for given image features using greedy search"""
      sampled_ids = []
      inputs = features.unsqueeze(1)

      for i in range(self.max_seq_length):

        hiddens, states = self.lstm(inputs, states)
        outputs = self.linear(hiddens.squeeze(1))
        _, predicted = outputs.max(1)
        sampled_ids.append(predicted)
        inputs = self.embed(predicted)
        inputs = inputs.unsqueeze(1)

      sampled_ids = torch.stack(sampled_ids, 1)                # sampled_ids: (batch_size, max_seq_length)
      return sampled_ids



