In [1]:
import torch
import numpy as np

In [2]:
arr=np.array([1,2,3,4])
tensor=torch.from_numpy(arr)
print(type(tensor))
num=tensor.numpy()
print(type(num))

<class 'torch.Tensor'>
<class 'numpy.ndarray'>


In [3]:
import pandas as pd
series=pd.Series([1,2,4,5])
tensor=torch.from_numpy(series.values)
print(type(tensor))

<class 'torch.Tensor'>


In [4]:
# to return values from tensor
tensor[0].item()

1

In [5]:
#dot product
u=torch.tensor([1,3])
v=torch.tensor([4,3])
product=torch.dot(u,v)
product

tensor(13)

In [6]:
#linspace
torch.linspace(-2,2,steps=4)

tensor([-2.0000, -0.6667,  0.6667,  2.0000])

In [7]:
#Derivative of values set requires_grad parameter
x=torch.tensor(4,requires_grad=True,dtype=float)
y=x**2
y

tensor(16., dtype=torch.float64, grad_fn=<PowBackward0>)

In [8]:
#backward() for calculating derivative
y.backward()
y

tensor(16., dtype=torch.float64, grad_fn=<PowBackward0>)

In [9]:
from torch.utils.data import Dataset
class toy_set(Dataset):
    def __init__(self,length=100,transform=False):
        self.x=2*torch.ones(length,2)
        self.y=torch.ones(length,1)
        self.len=length
        self.transform=transform
    def __len__(self):
        return self.len
    def __getitem__(self,index):
        sample=self.x[index],self.y[index]
        if self.transform:
            sample=self.transform(sample)
        return sample

In [10]:
class add_mult(object):
    def __init__(self,addx=1,muly=1):
        self.addx=addx
        self.muly=muly
    def __call__(self,sample):
        x=sample[0]
        y=sample[1]
        x=x+self.addx
        y=y*self.muly
        sample=x,y
        return sample

In [11]:
from torchvision import transforms
#Apply multiple transform methods on dataset elements
multiple_t=transforms.Compose([add_mult(),add_mult()])

In [12]:
#torchvision has some inbuilt datasets which we can use
import torchvision.datasets as dsets
dataset=dsets.MNIST(root="./",download=True,transform=transforms.ToTensor()) #transform converts the image to a tensor
dataset

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./
    Split: Train
    StandardTransform
Transform: ToTensor()

In [13]:
#Linear regression model
from torch.nn import Linear
model=Linear(in_features=1,out_features=1)
print(model.parameters())
X=torch.tensor([[1.0]])
print(X.type())
yhat=model(X)
yhat

<generator object Module.parameters at 0x7f96a294a890>
torch.FloatTensor


tensor([[-0.5775]], grad_fn=<AddmmBackward0>)

In [20]:
# we generally create custom modules which inherits nn.Module to pack multiple models in a class
import torch.nn as nn

class LR(nn.Module):
    def __init__(self,in_featrues=1,out_features=1):
        super(LR,self).__init__()
        self.linear=nn.Linear(in_features=1,out_features=1)
    
    def forward(self,X):
        out=self.linear(X)
        return out
    def criterion(self,yhat,y):
        return torch.mean((yhat-y)**2)

In [25]:
model=LR(1,1)
yhat=model.forward(X)


In [26]:
model.state_dict()

OrderedDict([('linear.weight', tensor([[0.6237]])),
             ('linear.bias', tensor([-0.5253]))])

In [27]:
w=torch.tensor(-15.0,requires_grad=True)
b=torch.tensor(10.0,requires_grad=True)
X=torch.arange(-3,3,0.1).view(-1,1)
lr=0.01
# print(X)
f=1*X-1
y=f+0.1*torch.randn(X.size())
# print(y)

In [30]:
# print(w.data,w.grad)
for epoch in range(15):
    yhat=model.forward(X)
    loss=model.criterion(yhat,y)
    loss.backward()
    w.data=w.data-lr*w.grad.data
    w.grad.data.zero_()
    b.data=b.data-lr*b.grad.data
    b.data.grad.zero_()
    
criterion=nn.MSELoss()


In [31]:
#linear regression in multiple dimension
import torch.nn as nn
from torch.nn import Linear 
torch.manual_seed(1)
model=Linear(in_features=2,out_features=1)
print(model.parameters)

<bound method Module.parameters of Linear(in_features=2, out_features=1, bias=True)>


In [32]:
X=torch.tensor([[1.0,2,0],[1.0,3,0],[4.0,2,0]])
X=torch.tensor([[1.0,2.0],[2.0,5.0]])

yhat=model(X)
print(yhat)

tensor([[-0.3969],
        [-0.9689]], grad_fn=<AddmmBackward0>)


In [33]:
class MLR(nn.Module):
    def __init__(self,inn=1,out=1):
        super(MLR,self).__init__()
        self.in_features=inn
        self.out_features=out
        self.Linear=Linear(in_features=inn,out_features=out)
    def forward(self,X):
        return self.Linear(X)

In [34]:
model=MLR(2,1)
yhat=model.forward(X)
yhat

tensor([[-0.5754],
        [-2.2406]], grad_fn=<AddmmBackward0>)

In [35]:
from torch import nn,optim
import torch
from torch.utils.data import Dataset, DataLoader
class Data2d(Dataset):
    def __init__(self):
        self.x=torch.zeros(20,2)
        self.x[:,0]=torch.arange(-1,1,0.1)
        self.x[:,1]=torch.arange(-1,1,0.1)
        self.w=torch.tensor([[1.0],[1.0]])
        self.b=1
        self.f=torch.mm(self.x,self.w)+self.b
        self.y=self.f+0.1*torch.randn((20,1))
        self.len=self.x.shape[0]
    def __len__(self):
        return self.len
    def __getitem__(self,idx):
        return self.x[idx],self.y[idx]

In [36]:
dataloader=Data2d()
criterion=nn.MSELoss()
trainloader=DataLoader(dataset=dataloader,batch_size=2)
model=MLR(2,1)
optimizer=optim.SGD(model.parameters(),lr=0.1)


In [37]:
for epoch in range(10):
    for x,y in trainloader:
        yhat=model.forward(x)
        loss=criterion(yhat,y)
        optimizer.zero_grad() # Makes all gradeints zero
        loss.backward()
        optimizer.step() # calculates change in weights and bias

In [38]:
#Logistic regression
import torch
import torch.nn as nn
z=torch.arange(-100.0,100.0,1).view(-1,1)
sig=nn.Sigmoid()
y=sig(z)


In [42]:
# We use nn.Sequential package for logistic regression
#For logistic regression, we require a linear function followed by a sigmoid function,
#We can combine this in a sequential function
model=nn.Sequential(nn.Linear(1,1),nn.Sigmoid())
yhat=model(z)
# yhat

In [40]:
class LogisticRegression(nn.Module):
    def __init__(self,inn=1,out=1):
        super(MLR,self).__init__()
        self.in_features=inn
        self.out_features=out
        self.Linear=Linear(in_features=inn,out_features=out)
    def forward(self,X):
        return torch.Sigmoid(self.Linear(X))

In [43]:
import torch
import torch.nn as nn,optim
import torchvision.transforms as transforms
import torchvision.datasets as dsets 
train_dataset=dsets.MNIST(root="./",download=True,train=True,transform=transforms.ToTensor()) #transform converts the iamge to a tensor
val_dataset=dsets.MNIST(root="./",download=True,train=False,transform=transforms.ToTensor()) #transform converts the iamge to a tensor


In [None]:
# train_dataset[0]


In [None]:
class Softmax(nn.Module):
    def __init__(self,inn=1,out=1):
        super(Softmax,self).__init__()
        self.in_features=inn
        self.out_features=out
        self.Linear=Linear(in_features=inn,out_features=out)
    def forward(self,X):
        return self.Linear(X)

In [None]:
input_dim=28*28
output_dim=10
smodel=Softmax(input_dim,output_dim)

In [None]:
train_loader=torch.utils.data.DataLoader(dataset=train_dataset,batch_size=100)
val_loader=torch.utils.data.DataLoader(dataset=val_dataset,batch_size=500)
optimizer=optim.SGD(model.parameters(),lr=0.01)
criterion=nn.CrossEntropyLoss()


In [None]:
#When the loss is defined as CrossEntropyLoss in pytorch, it will automatically perform softmax classification 

n_epoch=10
for e in range(n_epoch):
    for x,y in train_loader:
        optimizer.zero_grad()
        z=smodel(x.view(-1,28*28))
        _,z=torch.max(z.data,1)
        print(z)
        print(y)
        loss=criterion(z,y)
        loss.backward()
        optimizer.step()

    correct=0
    for x,y in val_loader:
        z=smodel(x.view(-1,28*28))
        _,yhat=torch.max(z.data,1) #max is similar to argmax function
        correct+=(yhat==y).sum().item()
accuracy=correct/len(train_loader)
print(accuracy)

In [None]:
from torch import sigmoid 
class NN(nn.Module):
    def __init__(self,inn,out):
        super(NN,self).__init__()
        self.hidden=1
        self.lin1=nn.Linear(inn,self.hidden)
        self.lin2=nn.Linear(self.hidden,out)
    def forward(self,x):
        x=sigmoid(self.lin1(x))
        x=sigmoid(self.lin2(x))
        return x 

In [None]:
model=NN(1,1)
x=torch.tensor([[0.4]])
y=model.forward(x)
y

In [None]:
model=nn.Sequential(
    nn.Linear(1,2),nn.Sigmoid(),nn.Linear(2,4),nn.Sigmoid()
    )

In [None]:
import torch
a=[1,2,3]
t_a=torch.tensor(a,dtype=torch.int32)
print(t_a.shape)
import numpy as np
b=np.array([4,5,6],dtype=np.int32)
t_b=torch.from_numpy(b)
print(type(t_b))
t_ones=torch.ones(2,3)
print(t_ones)
t_rand=torch.rand(3,4)
print(t_rand)

In [None]:
print(type(t_a),t_a.dtype)
t_a_new=t_a.to(torch.int64)
print(t_a_new.dtype)

In [None]:
#linear regression in multiple dimension
import torch.nn as nn
from torch.nn import Linear 
torch.manual_seed(1)
model=Linear(in_features=2,out_features=1)
print(model.parameters)
model=nn.Sequential(
    nn.Linear(2,1),
    nn.ReLU()
)
print(model[0].weight)
nn.init.xavier_normal_(model[0].weight)
print(model[0].weight)
print(model)

In [None]:
class NoisyLayer(nn.Module):
    def __init__(self,input_size,output_size,noise_dev=0.1):
        super().__init__()
        w=torch.Tensor(input_size,output_size)
        self.w=nn.Parameter(w)
        nn.init.xavier_normal_(self.w)
        b=torch.Tensor(output_size).fill_(0)
        print("bias: ",b)
        self.b=nn.Parameter(b)
        self.noise_dev=noise_dev
        
    def forward(self,x,training=True):
        if training:
            noise=torch.normal(0.0,self.noise_dev,x.shape)
            x_new=torch.add(x,noise)
        else:
            x_new=x
        return torch.add(torch.mm(x_new,self.w),self.b)
    
    

In [None]:
torch.manual_seed(1)
noisy_layer=NoisyLayer(4,2)
x=torch.zeros((1,4))
print(noisy_layer.forward(x,training=True))
print(noisy_layer.forward(x,training=True))
print(noisy_layer.forward(x,training=False))

In [None]:
import pandas as pd
url="http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data"
df=pd.read_csv(url,na_values="?",skipinitialspace=True,
              sep=" ",comment="\t",
               names=['Mgp','Cylinders','Displacement','Horsepower','Weight','Acceleration','Model','Origin'])

In [None]:
df.head(4)

In [None]:
df.shape

In [None]:
df.dropna(inplace=True)
df.reset_index(drop=True,inplace=True)
df.shape

In [None]:
df.head(2)

In [None]:
import sklearn
import numpy as np
from sklearn.model_selection import train_test_split
df_train, df_test = sklearn.model_selection.train_test_split(df, train_size=0.8, random_state=1)

In [None]:
df_stats=df_train.describe().transpose()
df_stats

In [None]:
numeric_column_names=df.select_dtypes(include=np.number).columns.tolist()
numeric_column_names
numeric_column_names = ['Cylinders', 'Displacement','Horsepower', 'Weight','Acceleration']

In [None]:
df_train_norm,df_test_norm=df_train.copy(),df_test.copy()
for cols in numeric_column_names:
    m=df_stats.loc[cols,'mean']
    std=df_stats.loc[cols,'std']
    print(m,std)
    df_train[cols].apply(lambda x: (x-m)/std)
    df_test[cols].apply(lambda x: (x-m)/std)
df_train_norm.head(5)

In [None]:
df_train_norm.iloc[203]

In [None]:
import torch
boundaries=torch.tensor([73,76,79])
v=torch.tensor(df_train_norm['Model'].values)
df_train_norm['ModelYearBucketed']=torch.bucketize(v,boundaries,right=True)

In [None]:
v=torch.tensor(df_test_norm['Model'].values)
df_test_norm['ModelYearBucketed']=torch.bucketize(v,boundaries,right=True)

In [None]:
numeric_column_names.append("ModelYearBucketed")

In [None]:
from torch.utils.data import Dataset
class TensorDataset(Dataset):
    def __init__(self,x,y):
        self.x=x
        self.y=y
        self.len=self.x.shape[1]

    def __len__(self):
        return self.len
    def __getitem__(self,index):
        return self.x[index],self.y[index]

In [None]:
from torch.nn.functional import one_hot
total_origin=len(set(df_train['Origin']))
origin_encoded=one_hot(torch.from_numpy(df_train_norm["Origin"].values)%total_origin)
x_train_numeric=torch.tensor(df_train_norm[numeric_column_names].values)
x_train=torch.cat([x_train_numeric,origin_encoded],1).float()
origin_encoded=one_hot(torch.from_numpy(df_test_norm["Origin"].values)%total_origin)
x_test_numeric=torch.tensor(df_test_norm[numeric_column_names].values)
x_test=torch.cat([x_test_numeric,origin_encoded],1).float()
y_train=torch.tensor(df_train_norm['Mgp'].values).float()
y_test=torch.tensor(df_test_norm['Mgp'].values).float()


In [None]:
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
train_ds=TensorDataset(x_train,y_train)
batch_size=8
torch.manual_seed(1)
train_dl=DataLoader(train_ds,batch_size,shuffle=True)
hidden_units=[8,4]
input_size=x_train.shape[1]
all_layers=[]
for h in hidden_units:
    layer=nn.Linear(input_size,h)
    all_layers.append(layer)
    all_layers.append(nn.ReLU())
    input_size=h
all_layers.append(nn.Linear(input_size,1))
model=nn.Sequential(*all_layers)
print(model.parameters)

In [None]:
loss_fn=nn.MSELoss()
optimizer=torch.optim.SGD(model.parameters(),lr=0.001)
epochs=1000

In [None]:

for e in range(epochs):
    loss_hist_train=0
    for x_batch,y_batch in train_dl:
        y_pred=model.forward(x_batch)
        loss=loss_fn(y_pred,y_batch)
#         print(loss)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        loss_hist_train+=loss.item()
    if e % 20==0:
        print(f'Epoch {e} Loss {loss_hist_train/len(train_dl):.4f}')

In [None]:
with torch.no_grad():
    pred = model(x_test.float())[:, 0]
    loss = loss_fn(pred, y_test)
    print(f'Test MSE: {loss.item():.4f}')
    print(f'Test MAE: {nn.L1Loss()(pred, y_test).item():.4f}')

In [None]:
#RNN model from scratch
import torch
import torch.nn as nn
torch.manual_seed(1)
model=nn.RNN(input_size=5,hidden_size=2,num_layers=1,batch_first=True)
print(model.weight_ih_l0)
w_xh=model.weight_ih_l0
w_hh=model.weight_hh_l0
b_xh=model.bias_ih_l0
b_hh=model.bias_hh_l0
print(w_xh.shape,w_hh.shape,b_xh.shape,b_hh.shape)

In [None]:
x_seq=torch.tensor([[1]*5,[2]*5,[3]*5]).float()
output,hn=model(torch.reshape(x_seq,(1,3,5))) # (1,3,5)=(batch_size,sequence(rows),no_features(columns))
print(x_seq[0].shape)

In [None]:
#manually computing
out_man=[]
for i in range(3):
    xt=torch.reshape(x_seq[i],(1,5))
    print(f'At timestep {i} input={xt.numpy()}')
    ht=torch.matmul(xt,torch.transpose(w_xh,0,1))+b_xh
    print("Hidden at ",i,": ",ht.detach().numpy())
    if i>0:
        prev_h=out_man[i-1]
    else:
        prev_h=torch.zeros(ht.shape)
    ot=ht+torch.matmul(prev_h,torch.transpose(w_hh,0,1))+b_hh
    ot=torch.tanh(ot)
    out_man.append(ot)
    print("Output at ",i,": ",ot.detach().numpy())

In [None]:
!pip uninstall -y torchtext 
!pip install --no-cache-dir torchtext==0.13.0
from torchtext.datasets import IMDB
train_data=IMDB(split='train')
test_data=IMDB(split='test')



In [None]:
from torch.utils.data.dataset import random_split
torch.manual_seed(1)
train_datset,val_dataset=random_split(list(train_data),[20000,5000])

In [None]:
#Find unique tokens
import re
from collections import Counter, OrderedDict
def tokenizer(text):
    text = re.sub('<[^>]*>', '', text)
    emoticons = re.findall(
        '(?::|;|=)(?:-)?(?:\)|\(|D|P)', text.lower()
    text = re.sub('[\W]+', ' ', text.lower()) +\
        ' '.join(emoticons).replace('-', '')
    tokenized = text.split()
    return tokenized
        

In [None]:
token_counts=Counter()
for label,line in train_dataset:
    tokens=tokenizer(line)
    token_counts.update(tokens)
print('Vocab-size:', len(token_counts))   

In [None]:
from torchtext.vocab import vocab
sorted_by=sorted(token_counts.items(),lambda x:x[1],reverse=True)
ordered_doct=OrderedDict(sorted_by)
vocab=vocab(ordered_dict)
vocab.insert_token("<pad>",0)
vocab.insert_token("<unk>",1)



In [None]:
textt_pipeline=lambda x: [vocab(line) for line in tokenizer(x)]
label_pipeline=lambda x : 1 if x=='pos' or 0

In [None]:
def collate_batch(batch):
    label_list, text_list, lengths = [], [], []
    for _label, _text in batch:
        label_list.append(label_pipeline(_label))
        processed_text = torch.tensor(text_pipeline(_text),
        dtype=torch.int64)
        text_list.append(processed_text)
        lengths.append(processed_text.size(0))
    label_list = torch.tensor(label_list)
    lengths = torch.tensor(lengths)
    padded_text_list = nn.utils.rnn.pad_sequence(
    text_list, batch_first=True)
    return padded_text_list, label_list, lengths

In [None]:
from torch.utils.data import DataLoader
dataloader = DataLoader(train_dataset, batch_size=4,shuffle=False, collate_fn=collate_batch)

In [None]:
batch_size = 32
train_dl = DataLoader(train_dataset, batch_size=batch_size,shuffle=True, collate_fn=collate_batch)
valid_dl = DataLoader(valid_dataset, batch_size=batch_size,shuffle=False, collate_fn=collate_batch) 
test_dl = DataLoader(test_dataset, batch_size=batch_size,shuffle=False, collate_fn=collate_batch)

In [None]:
embedding=nn.Embedding(
    num_embedding=10 # Input sequence
    embedding_dim=3, # Size of embedding features
    padding_idx=0
    )

In [None]:
class RNN(nn.Module):
    def __init__(self, vocab_size,embed_dim,rnn_hidden_size,fc_hidden_size):
        super().__init__()
        self.embedding=nn.Embedding(vocab_size,embedding_dim=embed_dim,padding_idx=0)
        
        self.rnn = nn.LSTM(embed_dim, rnn_hidden_size,batch_first=True)
        self.fc1=nn.Linear(rnn_hidden_size,fc_hidden_size)
        self.relu1=nn.ReLU()
        self.fc1=nn.Linear(fc_hidden_size,1)
        self.sigmoid=nn.Sigmoid()
        
    def forward(self, text,lengths):
         out=self.embedding(text)
        
model = RNN(64, 32)
print(model)
model(torch.randn(5, 3, 64))