In [1]:
#https://pytorch.org/docs/stable/nn.functional.html

In [None]:
#transpose1d 

#applies a 1D transposed convolution to an input signal

import torch 
import torch.nn.functional as F 

inputs = torch.randn(2,16,50) 
weights = torch.randn(16,33,5)

bias = torch.rand(33)

output = F.conv_transpose1d(
    input = inputs,
    weight = weights,
    bias = bias,
    stride = 2,
    padding = 1,
    output_padding=1,
    groups=1,
    dilation=1
)

print('Input shape:',inputs.shape)
print('Weight shape:', weights.shape)
print('Bias shape:', bias.shape)
print('Output shape:', output.shape)

Input shape: torch.Size([2, 16, 50])
Weight shape: torch.Size([16, 33, 5])
Bias shape: torch.Size([33])
Output shape: torch.Size([2, 33, 102])


In [2]:
#transpose2d

#applies 2d transposed convolution to an input image. 
# Transposed convolutions are commonly used in tasks like upsampling image generation and semantic segmentations

import torch 
import torch.nn as nn

# batch_size = 1, in_channels = 4 , h = 5 w =5
inputs = torch.randn(1,4,5,5)

# in_channel = 4 , out_channels = 8 kernel_height = 3, kernel_width = 3
weights = torch.randn( 4, 8, 3,3)
bias = torch.randn(8)

output = F.conv_transpose2d(
    input = inputs,
    weight = weights,
    bias = bias,
    stride = 2,
    padding = 1,
    output_padding = 1,
    groups = 1,
    dilation =1
)

print('Input shape:',inputs.shape)
print('Weight shape:',weights.shape)
print('Bias shape:',bias.shape)
print('Output shape:', output.shape)


Input shape: torch.Size([1, 4, 5, 5])
Weight shape: torch.Size([4, 8, 3, 3])
Bias shape: torch.Size([8])
Output shape: torch.Size([1, 8, 10, 10])


In [3]:
#transpose3d 

#function applies a 3d transposed convolution to a 3d tensor
#transposed convolutions in 3d are commonly used in tasks like volumetric upsamping ,medical imagin and video processing

import torch 
import torch.nn.functional as F

# define input tensor
    # batch_size = 2 , in_channels = 16, depth = 50, kernel_height =3, kernel_widht = 3
inputs = torch.randn(2,16,50,10,20) 

#define weight tensor
    #in_channels = 16, out_channels =33, kernel_depth=3, kernel_height=3, kernel_width =3
weights = torch.randn(16,33,3,3,3)

bias = torch.randn(33) #define optional bias tensor (Out_channel = 33)

output = F.conv_transpose3d(
    input=inputs,
    weight=weights,
    bias = bias,
    stride =2, #stride of the conv
    padding = 1,
    output_padding =1,
    groups = 1,
    dilation =1 #dilation rate
)

print('Input shaoe:',inputs.shape)
print('Weight shape:',weights.shape)
print('Bias shape:', bias.shape)
print('Output shape:', output.shape)

Input shaoe: torch.Size([2, 16, 50, 10, 20])
Weight shape: torch.Size([16, 33, 3, 3, 3])
Bias shape: torch.Size([33])
Output shape: torch.Size([2, 33, 100, 20, 40])


In [None]:
#http://turbolearn.ai/

In [None]:
#unfold 

# function extracts sliding local bloacls from batched input tensor
# this is commonly used in tasks like convolutional neural networks where you need to extract small regions of an image for processing

import torch 
import torch.nn.functional as F 

#define a 4d input
    # batch_size = 1 , channels =3, height = 4, width =4
input_tensor = torch.arange(48, dtype=torch.float32).reshape(1,3,4,4)
print('Input Tensor Shape:', input_tensor.shape)
print('Input Tensor:\n', input_tensor)

#parameters for the unfold operation
kernel_size = (2,2)
stride = 1
padding = 0
dilation = 1

unfolded = F.unfold(input=input_tensor, kernel_size = kernel_size, stride=stride, padding=padding, dilation=dilation)

#print the unfolder tensor
print('\n Unfolder Tensor Shape:',unfolded.shape)
print('Unfolded Tensor:\n', unfolded)

#fold
#Optinaly , fold the tensor back to its original shape
#it aggragtes overlapping patches using summation by default
folded = F.fold(input=unfolded, output_size=(4,4),kernel_size=kernel_size, padding=padding, dilation=dilation)
print('\nFolded Tensor Shape:', folded.shape)
print('Folded Tensor:\n', folded)

Input Tensor Shape: torch.Size([1, 3, 4, 4])
Input Tensor:
 tensor([[[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.],
          [12., 13., 14., 15.]],

         [[16., 17., 18., 19.],
          [20., 21., 22., 23.],
          [24., 25., 26., 27.],
          [28., 29., 30., 31.]],

         [[32., 33., 34., 35.],
          [36., 37., 38., 39.],
          [40., 41., 42., 43.],
          [44., 45., 46., 47.]]]])

 Unfolder Tensor Shape: torch.Size([1, 12, 9])
Unfolded Tensor:
 tensor([[[ 0.,  1.,  2.,  4.,  5.,  6.,  8.,  9., 10.],
         [ 1.,  2.,  3.,  5.,  6.,  7.,  9., 10., 11.],
         [ 4.,  5.,  6.,  8.,  9., 10., 12., 13., 14.],
         [ 5.,  6.,  7.,  9., 10., 11., 13., 14., 15.],
         [16., 17., 18., 20., 21., 22., 24., 25., 26.],
         [17., 18., 19., 21., 22., 23., 25., 26., 27.],
         [20., 21., 22., 24., 25., 26., 28., 29., 30.],
         [21., 22., 23., 25., 26., 27., 29., 30., 31.],
         [32., 33., 34., 36., 37., 

In [6]:
#avg_pool1d

# applies 1d average pooling over an input singal
# used in tasks like time series analysis or audio processing 

import torch
import torch.nn.functional as F 

#define a 3d input tensor 
    #batch_size = 1, channels = 1, length =7
input_tensor = torch.tensor([[1,2,3,4,5,6,7]], dtype = torch.float32)

print('Input Tensor Shape:', input_tensor.shape)
print('Input Tensor:\n',input_tensor)

kernel_size = 3,
stride = 2
padding = 0
ceil_mode = False
count_include_pad = True 

#aaply 1d average pooling 
output = F.avg_pool1d(
    input = input_tensor,
    kernel_size = kernel_size,
    stride = stride ,
    padding = padding,
    ceil_mode = ceil_mode,
    count_include_pad = count_include_pad
)

print('\nOutput Tensor Shape:', output.shape)
print('Output Tensor:\n', output)

Input Tensor Shape: torch.Size([1, 7])
Input Tensor:
 tensor([[1., 2., 3., 4., 5., 6., 7.]])

Output Tensor Shape: torch.Size([1, 3])
Output Tensor:
 tensor([[2., 4., 6.]])


In [11]:
#torch.nn.functional.avg_pool2d 

# function applies 2d average pooling over an input image or feature map
# commonly used in CNN for downsampling

import torch 
import torch.nn.functional as F

#define 4d input tensor
    #batch_size =1, channels=1, height=6, width=6
input_tensor = torch.arange(36, dtype=torch.float32).reshape(1, 1, 6, 6)
print("Input Tensor Shape:", input_tensor.shape)
print("Input Tensor:\n", input_tensor)

# Parameters for the avg_pool2d operation
kernel_size = (2, 2)  # Size of the pooling window
stride = (2, 2)       # Stride of the pooling window
padding = 0           # Padding added to the input
ceil_mode = False     # Use floor instead of ceil to compute output size
count_include_pad = True  # Include zero-padding in averaging calculation
divisor_override = None   # Use the default divisor (pooling region size)

# Apply 2D average pooling
output = F.avg_pool2d(
    input=input_tensor,
    kernel_size=kernel_size,
    stride=stride,
    padding=padding,
    ceil_mode=ceil_mode,
    count_include_pad=count_include_pad,
    divisor_override=divisor_override
)

# Print the output tensor
print("\nOutput Tensor Shape:", output.shape)
print("Output Tensor:\n", output)



Input Tensor Shape: torch.Size([1, 1, 6, 6])
Input Tensor:
 tensor([[[[ 0.,  1.,  2.,  3.,  4.,  5.],
          [ 6.,  7.,  8.,  9., 10., 11.],
          [12., 13., 14., 15., 16., 17.],
          [18., 19., 20., 21., 22., 23.],
          [24., 25., 26., 27., 28., 29.],
          [30., 31., 32., 33., 34., 35.]]]])

Output Tensor Shape: torch.Size([1, 1, 3, 3])
Output Tensor:
 tensor([[[[ 3.5000,  5.5000,  7.5000],
          [15.5000, 17.5000, 19.5000],
          [27.5000, 29.5000, 31.5000]]]])


In [5]:

#torch.nn.functional.avg_pool3d

# apples 3d average pooling over a volumetirc input tensor
# commonly used in tasks like medical imaging

import torch
import torch.nn.functional as F 

input_tensor = torch.arange(64, dtype=torch.float32).reshape(1,1,4,4,4)
print('Input Tensor Shape:', input_tensor.shape)
print('Input Tensor:', input_tensor)

#parameters for the avg_pool2d operation
kernel_size = (2,2,2) #size of the pooling window
stride = (2,2,2) #stride of the pooling window 
padding = 0 #padding added to the input
ceil_mode = False  #use floor instead of ceil to compute output size
count_include_pad = True #include zero-padding in averaging calculation
divisor_override = None  #use the default divisor (pooling region size)

#apply 3d average pooling
output = F.avg_pool3d(
    input = input_tensor,
    kernel_size=kernel_size,
    stride=stride,
    padding=padding,
    ceil_mode=ceil_mode,
    count_include_pad=count_include_pad,
    divisor_override=divisor_override
)

#print the output tensor
print('\n OUtput Tensor Shape:', output.shape)
print('Output Tensor:', output)

# For each pooling window, the average of the values inside the window is computed.
# The output shape depends on the input shape, kernel size, stride, and padding.


Input Tensor Shape: torch.Size([1, 1, 4, 4, 4])
Input Tensor: tensor([[[[[ 0.,  1.,  2.,  3.],
           [ 4.,  5.,  6.,  7.],
           [ 8.,  9., 10., 11.],
           [12., 13., 14., 15.]],

          [[16., 17., 18., 19.],
           [20., 21., 22., 23.],
           [24., 25., 26., 27.],
           [28., 29., 30., 31.]],

          [[32., 33., 34., 35.],
           [36., 37., 38., 39.],
           [40., 41., 42., 43.],
           [44., 45., 46., 47.]],

          [[48., 49., 50., 51.],
           [52., 53., 54., 55.],
           [56., 57., 58., 59.],
           [60., 61., 62., 63.]]]]])

 OUtput Tensor Shape: torch.Size([1, 1, 2, 2, 2])
Output Tensor: tensor([[[[[10.5000, 12.5000],
           [18.5000, 20.5000]],

          [[42.5000, 44.5000],
           [50.5000, 52.5000]]]]])


In [None]:
#torch.nn.functional.max_pool1d

# apples 1d max pooling over an input signal, which is commonly used in task like time series analysis or audio processing
# max pooling extracts the maximum value from each sliding window
# reducint the dimensionality of the input while preserving important features

import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([[1,3,5,6,9,2,4]], dtype=torch.float32)
print('Input Tensor Shape:',input_tensor.shape)
print('Input Tensor:\n', input_tensor)

kernel_size = 3
stride = 2
padding = 0
dilation = 1
ceil_mode = False 
return_indices = True 

#apply 1d max pooling
output, indices = F.max_pool1d(
    input=input_tensor,
    kernel_size=kernel_size,
    stride=stride,
    padding=padding,
    dilation=dilation,
    ceil_model=ceil_mode,
    return_indices=return_indices
)

#print the output tensor and indices
print('\nOutput Tensor Shape:',output.shape)
print('Output Tensor (Max Values):\n',output)
print('Indices of Max Values:\n',indices)

Input Tensor Shape: torch.Size([1, 1, 4, 4, 4])
Input Tensor:
 tensor([[[[[ 0.,  1.,  2.,  3.],
           [ 4.,  5.,  6.,  7.],
           [ 8.,  9., 10., 11.],
           [12., 13., 14., 15.]],

          [[16., 17., 18., 19.],
           [20., 21., 22., 23.],
           [24., 25., 26., 27.],
           [28., 29., 30., 31.]],

          [[32., 33., 34., 35.],
           [36., 37., 38., 39.],
           [40., 41., 42., 43.],
           [44., 45., 46., 47.]],

          [[48., 49., 50., 51.],
           [52., 53., 54., 55.],
           [56., 57., 58., 59.],
           [60., 61., 62., 63.]]]]])

Output Tensor Shape: torch.Size([1, 1, 2, 2, 2])
Output Tensor:
 tensor([[[[[10.5000, 12.5000],
           [18.5000, 20.5000]],

          [[42.5000, 44.5000],
           [50.5000, 52.5000]]]]])


In [1]:
#torch.nn.functional.max_pool2d

# fuction apples 2d max pooling over an input tensor which is commonly used in CNN for downsampling feature mapes while preserving the most significant features
import torch 
import torch.nn.functional as F

input_tensor = torch.arange(36,dtype=torch.float32).reshape(1,1,6,6)
print('Input Tensor Shape:', input_tensor.shape)
print('Input Tensor:', input_tensor)

#parameters for the max_pool2d operation
kernel_size = (2,2)
stride = (2,2)
padding = 0
dilation = 1
ceil_mode = False 
return_indices = True

output, indices = F.max_pool2d(
    input=input_tensor,
    kernel_size=kernel_size,
    stride=stride,
    padding=padding,
    dilation=dilation,
    ceil_mode=ceil_mode,
    return_indices=return_indices
)

print('\nOutput Tensor Shape:',output.shape)
print('Output Tensor (Max values):\n',output)
print('Indices of Max Values:\n',indices)

# for each pooling window, the maxium value is extracted 
# if return_indices=True, the indices of the maximum values are also returned

Input Tensor Shape: torch.Size([1, 1, 6, 6])
Input Tensor: tensor([[[[ 0.,  1.,  2.,  3.,  4.,  5.],
          [ 6.,  7.,  8.,  9., 10., 11.],
          [12., 13., 14., 15., 16., 17.],
          [18., 19., 20., 21., 22., 23.],
          [24., 25., 26., 27., 28., 29.],
          [30., 31., 32., 33., 34., 35.]]]])

Output Tensor Shape: torch.Size([1, 1, 3, 3])
Output Tensor (Max values):
 tensor([[[[ 7.,  9., 11.],
          [19., 21., 23.],
          [31., 33., 35.]]]])
Indices of Max Values:
 tensor([[[[ 7,  9, 11],
          [19, 21, 23],
          [31, 33, 35]]]])


In [1]:
#max_pool3d

# function applies 3d max pooling over a volumetric input tensor which iscommonly used in tasks like medical imaging or video processing. max pooling extracts the maxium value from each sliding window reducing the dimensionality of the input while preserving important faetures

import torch 
import torch.nn.functional as F 
#define a 5D input tensor
    # batch_size = 1, channels = 1, depth = 4, height = 4, width = 4
input_tensor = torch.arange(64, dtype=torch.float32).reshape(1,1,4,4,4)
print('Input Tensor Shape:',input_tensor.shape)
print('Input Tensor:\n', input_tensor)

#parameters for the max_pool3d operation
kernel_size = (2,2,2) #size of the poolingwindow
stride = (2,2,2) #stride of the pooling window
padding= 0
dilation = 1
ceil_mode = False 
return_indices = True

output , indices = F.max_pool3d(
    input=input_tensor,
    kernel_size = kernel_size,
    stride=stride,
    padding=padding,
    dilation=dilation,
    ceil_mode=ceil_mode,
    return_indices=return_indices
)

print('\n Output Tensor Shape:', output.shape)
print('Output Tensor (Max Values):\n',output)
print('Indices of Max Values:\n',indices)


Input Tensor Shape: torch.Size([1, 1, 4, 4, 4])
Input Tensor:
 tensor([[[[[ 0.,  1.,  2.,  3.],
           [ 4.,  5.,  6.,  7.],
           [ 8.,  9., 10., 11.],
           [12., 13., 14., 15.]],

          [[16., 17., 18., 19.],
           [20., 21., 22., 23.],
           [24., 25., 26., 27.],
           [28., 29., 30., 31.]],

          [[32., 33., 34., 35.],
           [36., 37., 38., 39.],
           [40., 41., 42., 43.],
           [44., 45., 46., 47.]],

          [[48., 49., 50., 51.],
           [52., 53., 54., 55.],
           [56., 57., 58., 59.],
           [60., 61., 62., 63.]]]]])

 Output Tensor Shape: torch.Size([1, 1, 2, 2, 2])
Output Tensor (Max Values):
 tensor([[[[[21., 23.],
           [29., 31.]],

          [[53., 55.],
           [61., 63.]]]]])
Indices of Max Values:
 tensor([[[[[21, 23],
           [29, 31]],

          [[53, 55],
           [61, 63]]]]])


In [2]:
#max_unpool1d,
#perform the inverse operation of max_pool1d, reconstructing the orignal input tensor from the pooled output and indices returned by max_pool1d
# requre outou from a previos=is max_pool1d operation , including both the pooled vauues and the indices of the maximum values

import torch 
import torch.nn.functional as F 

#define a 3d input tensor
input_tensor = torch.tensor([[1,3,5,7,9,2,4]], dtype=torch.float32)
print('Input Tensor Shape:', input_tensor.shape)
print('Input Tensor:\n',input_tensor)

#parameters for max_pool1d
kernel_size = 3
stride = 2
padding = 0
return_indices = True 

pooled_output, indices = F.max_pool1d(
    input=input_tensor,
    kernel_size=kernel_size,
    stride=stride,
    padding=padding,
    return_indices=return_indices
)

print('\nPooled Output Shape:',pooled_output.shape)
print('Pooled Output:\n',pooled_output)
print('Indices of Max Values:\n', indices)

output_size = (1,1,7)

unpooled_output = F.max_unpool1d(
    input = pooled_output,
    indices = indices,
    kernel_size=kernel_size,
    stride=stride,
    padding=padding,
    output_size=output_size
)


print('\n Unnpooled Output Shape:', unpooled_output.shape)
print('Unpooled Output:\n', unpooled_output)


Input Tensor Shape: torch.Size([1, 7])
Input Tensor:
 tensor([[1., 3., 5., 7., 9., 2., 4.]])

Pooled Output Shape: torch.Size([1, 3])
Pooled Output:
 tensor([[5., 9., 9.]])
Indices of Max Values:
 tensor([[2, 4, 4]])

 Unnpooled Output Shape: torch.Size([1, 7])
Unpooled Output:
 tensor([[0., 0., 5., 0., 9., 0., 0.]])


In [3]:
#max_unpool2d

# this function performs the inver operation of max_pool2d, reconstructing the original input tensor form the pooled output and the indices returned by max_pool2d

import torch 
import torch.nn.functional as F

input_tensor = torch.arange(36, dtype=torch.float32).reshape(1,1,6,6)
print('Input Tensor Shape:', input_tensor.shape)
print('Input Tensor:\n', input_tensor)

kernel_size = (2,2)
stride = (2,2)
padding = 0
return_indices = True 

pooled_output, indices = F.max_pool2d(
    input = input_tensor,
    kernel_size = kernel_size,
    stride =stride,
    padding = padding,
    return_indices=return_indices
)

print('\nPooled Output Shape:', pooled_output.shape)
print('Pooled Output:\n',pooled_output)
print('Indices of Max Values:\n',indices)

output_size = (1,1,6,6)

#apply 2d max unpooling
unpooled_output = F.max_unpool2d(
    input = pooled_output,
    indices = indices,
    kernel_size = kernel_size,
    stride=stride,
    padding=padding,
    output_size=output_size
)

print('\nUnpooled Output Shape:', unpooled_output.shape)
print('Unpooled Output:\n', unpooled_output)


Input Tensor Shape: torch.Size([1, 1, 6, 6])
Input Tensor:
 tensor([[[[ 0.,  1.,  2.,  3.,  4.,  5.],
          [ 6.,  7.,  8.,  9., 10., 11.],
          [12., 13., 14., 15., 16., 17.],
          [18., 19., 20., 21., 22., 23.],
          [24., 25., 26., 27., 28., 29.],
          [30., 31., 32., 33., 34., 35.]]]])

Pooled Output Shape: torch.Size([1, 1, 3, 3])
Pooled Output:
 tensor([[[[ 7.,  9., 11.],
          [19., 21., 23.],
          [31., 33., 35.]]]])
Indices of Max Values:
 tensor([[[[ 7,  9, 11],
          [19, 21, 23],
          [31, 33, 35]]]])

Unpooled Output Shape: torch.Size([1, 1, 6, 6])
Unpooled Output:
 tensor([[[[ 0.,  0.,  0.,  0.,  0.,  0.],
          [ 0.,  7.,  0.,  9.,  0., 11.],
          [ 0.,  0.,  0.,  0.,  0.,  0.],
          [ 0., 19.,  0., 21.,  0., 23.],
          [ 0.,  0.,  0.,  0.,  0.,  0.],
          [ 0., 31.,  0., 33.,  0., 35.]]]])


In [4]:
#max_unpool3d 

# inverse operation of max_pool3d, reconstructing input tensor form the pooled output and indics retuned by max_pool3d
# function requires the outpu of a prevois max_pool3d, including both the pooled values and indices of the maximum values.

import torch 
import torch.nn.functional as F

input_tensor = torch.arange(64, dtype=torch.float32).reshape(1,1,4,4,4)
print('Input Tensor Shape:', input_tensor.shape)
print('Input Tensor:\n', input_tensor)

kernel_size = (2,2,2)
stride = (2,2,2)
padding = 0
return_indices = True 

pooled_output, indices = F.max_pool3d(
    input = input_tensor,
    kernel_size = kernel_size,
    stride =stride,
    padding =padding,
    return_indices=return_indices
)


print('\nPooled Output SHape:', pooled_output.shape)
print('Pooled Output:\n', pooled_output)
print('Indices of Max Values:\n', indices)

output_size = (1,1,4,4,4)

unpooled_output = F.max_unpool3d(
    input=pooled_output,
    indices=indices,
    kernel_size=kernel_size,
    stride=stride,
    padding=padding,
    output_size=output_size
)

print('\nUnpooled Output Shape:', unpooled_output.shape)
print('Unpooled Output:\n', unpooled_output)

Input Tensor Shape: torch.Size([1, 1, 4, 4, 4])
Input Tensor:
 tensor([[[[[ 0.,  1.,  2.,  3.],
           [ 4.,  5.,  6.,  7.],
           [ 8.,  9., 10., 11.],
           [12., 13., 14., 15.]],

          [[16., 17., 18., 19.],
           [20., 21., 22., 23.],
           [24., 25., 26., 27.],
           [28., 29., 30., 31.]],

          [[32., 33., 34., 35.],
           [36., 37., 38., 39.],
           [40., 41., 42., 43.],
           [44., 45., 46., 47.]],

          [[48., 49., 50., 51.],
           [52., 53., 54., 55.],
           [56., 57., 58., 59.],
           [60., 61., 62., 63.]]]]])

Pooled Output SHape: torch.Size([1, 1, 2, 2, 2])
Pooled Output:
 tensor([[[[[21., 23.],
           [29., 31.]],

          [[53., 55.],
           [61., 63.]]]]])
Indices of Max Values:
 tensor([[[[[21, 23],
           [29, 31]],

          [[53, 55],
           [61, 63]]]]])

Unpooled Output Shape: torch.Size([1, 1, 4, 4, 4])
Unpooled Output:
 tensor([[[[[ 0.,  0.,  0.,  0.],
           [ 0.,  

In [None]:
#1p_pool1d 

#function applies 1d power-average pooling operation over an input signal.
# the 'power-average' is computed as the Lp-norm of the values in each pooling window.
import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([[[1.0,2.0,3.0,4.0,5.0,6.0]]],dtype=torch.float32)
print('Input tensor shape:', input_tensor.shape)
print('input tensor:\n', input_tensor)

norm_type = 2.0
kernel_size = 3
stride = 2 
ceil_mode = False 

output = F.lp_pool1d(
    input = input_tensor,
    norm_type=norm_type,
    kernel_size=kernel_size,
    stride=stride,
    ceil_mode=ceil_mode
)


print('\nOutput Tensor Shape:', output.shape)
print('Output Tensor"\n', output)

Input tensor shape: torch.Size([1, 1, 6])
input tensor:
 tensor([[[1., 2., 3., 4., 5., 6.]]])

Output Tensor Shape: torch.Size([1, 1, 2])
Output Tensor"
 tensor([[[3.7417, 7.0711]]])


In [3]:
#1l_pool2d 
# function applies a 2d power-average pooling opration over an input tensor 
# the power-average is computed as the Lp norm of the value sin each pooling wind

import torch 
import torch.nn.functional as F 

input_tensor = torch.arange(36, dtype=torch.float32).reshape(1,1,6,6)
print('input tensor shape',input_tensor.shape)
print('input tensor', input_tensor)

norm_type = 2.0 
kernel_size = (2,2)
stride = (2,2)
ceil_mode = False 

output = F.lp_pool2d(
    input = input_tensor,
    norm_type = norm_type,
    kernel_size = kernel_size,
    stride = stride, 
    ceil_mode = ceil_mode
)

print('\nOutput Tensor Shape:', output.shape)
print('Output Tensor:\n', output)

input tensor shape torch.Size([1, 1, 6, 6])
input tensor tensor([[[[ 0.,  1.,  2.,  3.,  4.,  5.],
          [ 6.,  7.,  8.,  9., 10., 11.],
          [12., 13., 14., 15., 16., 17.],
          [18., 19., 20., 21., 22., 23.],
          [24., 25., 26., 27., 28., 29.],
          [30., 31., 32., 33., 34., 35.]]]])

Output Tensor Shape: torch.Size([1, 1, 3, 3])
Output Tensor:
 tensor([[[[ 9.2736, 12.5698, 16.1864],
          [31.5911, 35.5246, 39.4715],
          [55.3353, 59.3127, 63.2930]]]])


In [4]:
#lp_pool3d
# function applies a 3d power-average pooling operation over an input tensor.
# the power-average is computed as the Lp-norm of the values in each pooling window.

import torch 
import torch.nn.functional as F 

input_tensor = torch.arange(64, dtype=torch.float32).reshape(1,1,4,4,4)
print('Input Tensor Shape',input_tensor.shape)
print('Input Tensor:', input_tensor)

norm_type = 2.0 
kernel_size = (2,2,2)
stride = (2,2,2)
ceil_mode = False 

output = F.lp_pool3d(
    input = input_tensor,
    norm_type = norm_type,
    kernel_size = kernel_size,
    ceil_mode = ceil_mode
)

print('\n Output Tensor Shape:', output.shape)
print('Output Tensor:', output)

Input Tensor Shape torch.Size([1, 1, 4, 4, 4])
Input Tensor: tensor([[[[[ 0.,  1.,  2.,  3.],
           [ 4.,  5.,  6.,  7.],
           [ 8.,  9., 10., 11.],
           [12., 13., 14., 15.]],

          [[16., 17., 18., 19.],
           [20., 21., 22., 23.],
           [24., 25., 26., 27.],
           [28., 29., 30., 31.]],

          [[32., 33., 34., 35.],
           [36., 37., 38., 39.],
           [40., 41., 42., 43.],
           [44., 45., 46., 47.]],

          [[48., 49., 50., 51.],
           [52., 53., 54., 55.],
           [56., 57., 58., 59.],
           [60., 61., 62., 63.]]]]])

 Output Tensor Shape: torch.Size([1, 1, 2, 2, 2])
Output Tensor: tensor([[[[[ 37.7889,  42.3792],
           [ 57.3062,  62.5140]],

          [[122.4582, 128.0156],
           [144.7342, 150.3197]]]]])


In [5]:
# adaptive_max_pool1d 
# function applies 1d adaptive max pooling which dynamically adjusts the size of the pooling window to produce a fixed output size. It is particularly useful when you need a specific output size regardless of the input size 

import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([[[1.0,2.0,3.0,4.0,5.0,6.0,7.0]]], dtype=torch.float32)
print('Input Tensor Shape:', input_tensor.shape)
print('Input Tensor:\n', input_tensor)

output_size = 3
return_indices = True

output, indices = F.adaptive_max_pool1d(
    input = input_tensor,
    output_size = output_size, 
    return_indices = return_indices
)

print('\nOutput Tensor Shape:', output.shape)
print('Output Tensor (Max Values):\n', output)
print('Indices of Max Values:\n', indices)



Input Tensor Shape: torch.Size([1, 1, 7])
Input Tensor:
 tensor([[[1., 2., 3., 4., 5., 6., 7.]]])

Output Tensor Shape: torch.Size([1, 1, 3])
Output Tensor (Max Values):
 tensor([[[3., 5., 7.]]])
Indices of Max Values:
 tensor([[[2, 4, 6]]])


In [None]:
#max_pool2d 
# applies 2d adaptive max pooling, dynamically adjusts the size of the pooling windows to produce 
# a fixed output size , it is particularly useful when you need a specific output size regardless of the input size

import torch 
import torch.nn.functional as F 

input_tensor = torch.arange(36, dtype=torch.float32).reshape(1,1,6,6)
print('Input Tensor Shape', input_tensor.shape)
print('Input Tensor', input_tensor)

output_size = (3,3)
return_indices = True 

output, indices = F.adaptive_max_pool2d(
    input = input_tensor,
    output_size = output_size,
    return_indices = return_indices
)

print('\nOutput Tensor Shape:',output.shape)
print('Output Tensor (Max Values)',output)
print('Indices of Max Values:\n',indices)

Input Tensor Shape torch.Size([1, 1, 6, 6])
Input Tensor tensor([[[[ 0.,  1.,  2.,  3.,  4.,  5.],
          [ 6.,  7.,  8.,  9., 10., 11.],
          [12., 13., 14., 15., 16., 17.],
          [18., 19., 20., 21., 22., 23.],
          [24., 25., 26., 27., 28., 29.],
          [30., 31., 32., 33., 34., 35.]]]])

Output Tensor Shape: torch.Size([1, 1, 3, 3])
Output Tensor (Max Values) tensor([[[[ 7.,  9., 11.],
          [19., 21., 23.],
          [31., 33., 35.]]]])
Indices of Max Values:
 tensor([[[[ 7,  9, 11],
          [19, 21, 23],
          [31, 33, 35]]]])


In [9]:
#adaptive_max_pool3d 
# function applies 3d adaptive max pooling, 
# dynamically adjust the size of the pooling windows to produce a fixed output size. 
# it particularly useful when yo need a specific output size regardless the input size 

import torch 
import torch.nn.functional as F 

input_tensor = torch.arange(64,dtype=torch.float32).reshape(1,1,4,4,4)
print('Input Tensor Shape:', input_tensor.shape)
print('Input Tensor:\n', input_tensor)

output_size = (2,2,2)
return_indices = True 

output, indices = F.adaptive_max_pool3d(
    input = input_tensor,
    output_size = output_size,
    return_indices = return_indices 
)
print('\n Output Tensor Shape:', output.shape)
print('Output Tensor (Max Values):\n', output)
print('Indices of Max Values:\n', indices)

Input Tensor Shape: torch.Size([1, 1, 4, 4, 4])
Input Tensor:
 tensor([[[[[ 0.,  1.,  2.,  3.],
           [ 4.,  5.,  6.,  7.],
           [ 8.,  9., 10., 11.],
           [12., 13., 14., 15.]],

          [[16., 17., 18., 19.],
           [20., 21., 22., 23.],
           [24., 25., 26., 27.],
           [28., 29., 30., 31.]],

          [[32., 33., 34., 35.],
           [36., 37., 38., 39.],
           [40., 41., 42., 43.],
           [44., 45., 46., 47.]],

          [[48., 49., 50., 51.],
           [52., 53., 54., 55.],
           [56., 57., 58., 59.],
           [60., 61., 62., 63.]]]]])

 Output Tensor Shape: torch.Size([1, 1, 2, 2, 2])
Output Tensor (Max Values):
 tensor([[[[[21., 23.],
           [29., 31.]],

          [[53., 55.],
           [61., 63.]]]]])
Indices of Max Values:
 tensor([[[[[21, 23],
           [29, 31]],

          [[53, 55],
           [61, 63]]]]])


In [10]:
#adapative_avg_pool2d

#function applies 2d adaptive average pooling, dynamically adjust the size of the pooling windows to produce fixed outut size.
# it sizes it computes the average value wihtint each region mkaing it useful for downsampling while spatiol 

import torch 
import torch.nn.functional as F 

input_tensor = torch.arange(36, dtype=torch.float32).reshape(1,1,6,6)
print('Input Tensor Shape:', input_tensor.shape)
print('Input Tensor:\n', input_tensor)

output_size = (3,3)

output = F.adaptive_avg_pool2d(
    input = input_tensor,
    output_size=output_size
)

print('\nOutput Tensor Shape:', output.shape)
print('Output')

Input Tensor Shape: torch.Size([1, 1, 6, 6])
Input Tensor:
 tensor([[[[ 0.,  1.,  2.,  3.,  4.,  5.],
          [ 6.,  7.,  8.,  9., 10., 11.],
          [12., 13., 14., 15., 16., 17.],
          [18., 19., 20., 21., 22., 23.],
          [24., 25., 26., 27., 28., 29.],
          [30., 31., 32., 33., 34., 35.]]]])

Output Tensor Shape: torch.Size([1, 1, 3, 3])
Output


In [11]:
# adaptive_avg_pool3d 

# function applies 3d adaptive average pooling, which dynmically adjusts the size of the poooling windows to produce a dixed output size. 
# it computes the average value within each region , making it useful for downsampling volunteric data while preserving spatioal information.

import torch 
import torch.nn.functional as F 

input_tensor = torch.arange(64, dtype=torch.float32).reshape(1,1,4,4,4)
print('Input Tensor Shape', input_tensor.shape)
print('Input Tensor:', input_tensor)

output_size = (2,2,2)

output = F.adaptive_avg_pool3d(
    input = input_tensor,
    output_size=output_size
)

print('\n Output Tensor Shape:', output.shape)
print('\n Output Tensor (Average Values):', output)

Input Tensor Shape torch.Size([1, 1, 4, 4, 4])
Input Tensor: tensor([[[[[ 0.,  1.,  2.,  3.],
           [ 4.,  5.,  6.,  7.],
           [ 8.,  9., 10., 11.],
           [12., 13., 14., 15.]],

          [[16., 17., 18., 19.],
           [20., 21., 22., 23.],
           [24., 25., 26., 27.],
           [28., 29., 30., 31.]],

          [[32., 33., 34., 35.],
           [36., 37., 38., 39.],
           [40., 41., 42., 43.],
           [44., 45., 46., 47.]],

          [[48., 49., 50., 51.],
           [52., 53., 54., 55.],
           [56., 57., 58., 59.],
           [60., 61., 62., 63.]]]]])

 Output Tensor Shape: torch.Size([1, 1, 2, 2, 2])

 Output Tensor (Average Values): tensor([[[[[10.5000, 12.5000],
           [18.5000, 20.5000]],

          [[42.5000, 44.5000],
           [50.5000, 52.5000]]]]])


In [12]:
#max_pool2d 

# function applies 2d fractional max pooling whcih downsamples the input by selecting a random subset of pooling regions 

import torch 
import torch.nn.functional as F 

input_tensor = torch.arange(64, dtype=torch.float32).reshape(1,1,8,8)
print('Input Tensor Shape:', input_tensor.shape)
print('Input Tensor', input_tensor)

kernel_size = 3 
output_size = (4,4)
return_indices = True 

output , indices = F.fractional_max_pool2d(
    input = input_tensor,
    kernel_size = kernel_size, 
    output_size = output_size, 
    return_indices = return_indices
)

print('\nOutput Tensor Shape:', output.shape)
print('Output Tensor (Max Values):\n', output)
print('Indices of Max Values\n', indices)

Input Tensor Shape: torch.Size([1, 1, 8, 8])
Input Tensor tensor([[[[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11., 12., 13., 14., 15.],
          [16., 17., 18., 19., 20., 21., 22., 23.],
          [24., 25., 26., 27., 28., 29., 30., 31.],
          [32., 33., 34., 35., 36., 37., 38., 39.],
          [40., 41., 42., 43., 44., 45., 46., 47.],
          [48., 49., 50., 51., 52., 53., 54., 55.],
          [56., 57., 58., 59., 60., 61., 62., 63.]]]])

Output Tensor Shape: torch.Size([1, 1, 4, 4])
Output Tensor (Max Values):
 tensor([[[[18., 19., 21., 23.],
          [34., 35., 37., 39.],
          [42., 43., 45., 47.],
          [58., 59., 61., 63.]]]])
Indices of Max Values
 tensor([[[[18, 19, 21, 23],
          [34, 35, 37, 39],
          [42, 43, 45, 47],
          [58, 59, 61, 63]]]])


In [13]:
#max_pool3d

import torch 
import torch.nn.functional as F 

input_tensor = torch.arange(512, dtype=torch.float32).reshape(1,1,8,8,8)
print('Input Tensor Shape:', input_tensor.shape)
print('Input Tensor:\n', input_tensor)

kernel_size = 3
output_size = (4,4,4)
return_indices = True 

output, indices = F.fractional_max_pool3d(
    input = input_tensor,
    kernel_size = kernel_size,
    output_size = output_size,
    return_indices = return_indices 
)

print('\n Output Tensor Shape:', output.shape)
print('Output Tensor (Max Values)\n', output)
print('Indices of Max Values:\n',indices)

Input Tensor Shape: torch.Size([1, 1, 8, 8, 8])
Input Tensor:
 tensor([[[[[  0.,   1.,   2.,   3.,   4.,   5.,   6.,   7.],
           [  8.,   9.,  10.,  11.,  12.,  13.,  14.,  15.],
           [ 16.,  17.,  18.,  19.,  20.,  21.,  22.,  23.],
           [ 24.,  25.,  26.,  27.,  28.,  29.,  30.,  31.],
           [ 32.,  33.,  34.,  35.,  36.,  37.,  38.,  39.],
           [ 40.,  41.,  42.,  43.,  44.,  45.,  46.,  47.],
           [ 48.,  49.,  50.,  51.,  52.,  53.,  54.,  55.],
           [ 56.,  57.,  58.,  59.,  60.,  61.,  62.,  63.]],

          [[ 64.,  65.,  66.,  67.,  68.,  69.,  70.,  71.],
           [ 72.,  73.,  74.,  75.,  76.,  77.,  78.,  79.],
           [ 80.,  81.,  82.,  83.,  84.,  85.,  86.,  87.],
           [ 88.,  89.,  90.,  91.,  92.,  93.,  94.,  95.],
           [ 96.,  97.,  98.,  99., 100., 101., 102., 103.],
           [104., 105., 106., 107., 108., 109., 110., 111.],
           [112., 113., 114., 115., 116., 117., 118., 119.],
           [120., 12

In [18]:
#scaled_dot_product_attention

# computes scaled dot-product attention 
# which core operation transformer-based models 
# it supports features like attention masking, dropout and causal attension.
import torch
import torch.nn.functional as F

# Define query, key, and value tensors
batch_size = 2
num_heads = 4
seq_len_query = 8
seq_len_key_value = 10
embedding_dim = 64

query = torch.rand(batch_size, num_heads, seq_len_query, embedding_dim)  # Shape: (N, Hq, L, E)
key = torch.rand(batch_size, num_heads, seq_len_key_value, embedding_dim)  # Shape: (N, H, S, E)
value = torch.rand(batch_size, num_heads, seq_len_key_value, embedding_dim)  # Shape: (N, H, S, Ev)

# Optional attention mask (float mask that adds bias to attention scores)
attn_mask = torch.tril(torch.ones(seq_len_query, seq_len_key_value))  # Lower triangular mask
attn_mask = attn_mask.masked_fill(attn_mask == 0, float("-inf")).masked_fill(attn_mask == 1, 0.0)

# Parameters for scaled_dot_product_attention
dropout_p = 0.1  # Dropout probability
is_causal = False  # Whether to apply causal masking
scale = None  # Scaling factor (default is 1 / sqrt(embedding_dim))
enable_gqa = False  # Enable Grouped Query Attention (experimental)

# Apply scaled dot-product attention
attention_output = F.scaled_dot_product_attention(
    query=query,
    key=key,
    value=value,
    attn_mask=attn_mask,
    dropout_p=dropout_p,
    is_causal=is_causal,
    scale=scale,
    enable_gqa=enable_gqa
)

# Print the output tensor
print("Attention Output Shape:", attention_output.shape)
print("Attention Output:\n", attention_output)


Attention Output Shape: torch.Size([2, 4, 8, 64])
Attention Output:
 tensor([[[[0.2649, 0.4282, 0.9530,  ..., 0.6633, 0.2831, 0.6720],
          [0.6438, 0.5081, 0.8578,  ..., 0.7205, 0.1772, 0.8333],
          [0.4574, 0.6657, 0.6882,  ..., 0.7768, 0.4474, 0.9025],
          ...,
          [0.5056, 0.4177, 0.4656,  ..., 0.5272, 0.3969, 0.6563],
          [0.7042, 0.5366, 0.5024,  ..., 0.4751, 0.5923, 0.7479],
          [0.6596, 0.5777, 0.4738,  ..., 0.5001, 0.5570, 0.6923]],

         [[1.0778, 0.5960, 0.7437,  ..., 0.3600, 0.9837, 0.7020],
          [0.2299, 0.4553, 0.0534,  ..., 0.1464, 0.0436, 0.2850],
          [0.5496, 0.5532, 0.4163,  ..., 0.3606, 0.6794, 0.5596],
          ...,
          [0.2894, 0.4028, 0.4763,  ..., 0.4075, 0.4890, 0.4804],
          [0.4760, 0.3388, 0.4006,  ..., 0.3396, 0.4056, 0.4513],
          [0.5099, 0.3814, 0.4191,  ..., 0.4421, 0.4335, 0.4065]],

         [[0.6909, 0.0219, 0.7772,  ..., 0.5716, 0.5740, 1.0567],
          [0.7070, 0.0311, 0.3964,  ...

In [19]:
#threshold 

import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([[-1.0 , 2.0, -3.0], [4.0, -5.0, 6.0]])
print('Input Tensor:\n', input_tensor)

threshold = 0.0 
value = 0.0 
inplace = False 

output_tensor = F.threshold(input = input_tensor, threshold=threshold, value =value, inplace=inplace)

print('\nOutput Tensor:\n',output_tensor)

Input Tensor:
 tensor([[-1.,  2., -3.],
        [ 4., -5.,  6.]])

Output Tensor:
 tensor([[0., 2., 0.],
        [4., 0., 6.]])


In [20]:


import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([[-1.0, 2.0, -3.0],[4.0, -5.0, 6.0]])
print('Original Input Tensor:\n', input_tensor)

threshold = 0.0
value = 0.0 

F.threshold_(input=input_tensor, threshold=threshold, value=value)

print('\n Modified Input Tensor (In-Place):\n', input_tensor)




Original Input Tensor:
 tensor([[-1.,  2., -3.],
        [ 4., -5.,  6.]])

 Modified Input Tensor (In-Place):
 tensor([[0., 2., 0.],
        [4., 0., 6.]])


In [21]:
#relu 

# this function applies the rectified linear unit (ReLU)

import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([[-1.0, 2.0, -3.0],[4.0, -5.0, 6.0]])
print('Input Tensor:\n', input_tensor)

output_tensor = F.relu(input=input_tensor, inplace = False)
print('\nOutput Tensor(ReLU):\n', output_tensor)

Input Tensor:
 tensor([[-1.,  2., -3.],
        [ 4., -5.,  6.]])

Output Tensor(ReLU):
 tensor([[0., 2., 0.],
        [4., 0., 6.]])


In [23]:
#relu_

import torch 
import torch.nn.functional as F 
input_tensor = torch.tensor([[-1.0, 2.0, -3.0],[4.0, -5.0, 6.0]])
print('Original Input Tensor:\n', input_tensor)
F.relu_(input=input_tensor)
print('\nModified Input Tensor (ReLU In-Place):\n', input_tensor)

Original Input Tensor:
 tensor([[-1.,  2., -3.],
        [ 4., -5.,  6.]])

Modified Input Tensor (ReLU In-Place):
 tensor([[0., 2., 0.],
        [4., 0., 6.]])


In [3]:
#hardtanh
# applies hardtanh function element-wise
# hardtanh is an activation function used for neural networks, it is cheaper and more computatinally efficient version of the tanh activation
# hardtanh function clamps all values in the tensor to a specified range [min_val, max_val] 
# value beelow min_val are set to min_val and values above max-val are set to max_val

import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([[-2.0,-0.5,0.0],[0.5,1.5,3.0]])
print('Input Tensor:\n', input_tensor)

min_val = -1.0 
max_val = 1.0
inplace = False 

output_tensor = F.hardtanh(input=input_tensor, min_val=min_val,max_val=max_val,inplace=inplace)

print('\nOutput Tensor (HardTanh):\n', output_tensor)


##value clamping
#clamping values to a specific range is useful in scenarios where you want to restrict the output or intermediate results of computations to a predefined range
# help prevent numerical instability or ensure that outputs meet certain constraints

#simulated activations from a layer
activations = torch.tensor([[-5.0,-2.0,0.0],[1.0,3.0,10.0]])

#clamp activations to the range
clamped_activations = F.hardtanh(activations, min_val=-1.0, max_val=1.0)
print('Oirignal Activations:\n', activations)
print('\nClamped Activations:\n', clamped_activations)

#Gradient Stability
# gradients can someines explode (becomde very large) or vanis become very small.applying a clapming operation like hartanh can help stabilize gradeints by restricint their magnitde.


gradients = torch.tensor([[-10.0, -5.0, 0.0],[-1.0, 5.0, 20.0]])

clipped_gradients = torch.clamp(gradients, min=-1.0, max=1.0)

print('Original Gradietns:\n',gradients)
print('\nClupped Gradients:\n',clipped_gradients)

#in-place hardtanh
activations = torch.tensor([[-5.0, -2.0, 0.0],[1.0, 3.0, 10.0]])

print('Original Activations:\n', activations)
F.hardtanh_(activations, min_val=-1.0, max_val=1.0)
print('\nModified Activations (In-Place):\n',activations)

"""
Value Clamping Restricts values to a specific range
Gradient Stability Limits gradients to a safe range to prevent exploding or vanishing gradients
In-Place HardTanh Modifies the input tensor directly to save memory
"""

Input Tensor:
 tensor([[-2.0000, -0.5000,  0.0000],
        [ 0.5000,  1.5000,  3.0000]])

Output Tensor (HardTanh):
 tensor([[-1.0000, -0.5000,  0.0000],
        [ 0.5000,  1.0000,  1.0000]])
Oirignal Activations:
 tensor([[-5., -2.,  0.],
        [ 1.,  3., 10.]])

Clamped Activations:
 tensor([[-1., -1.,  0.],
        [ 1.,  1.,  1.]])
Original Gradietns:
 tensor([[-10.,  -5.,   0.],
        [ -1.,   5.,  20.]])

Clupped Gradients:
 tensor([[-1., -1.,  0.],
        [-1.,  1.,  1.]])
Original Activations:
 tensor([[-5., -2.,  0.],
        [ 1.,  3., 10.]])

Modified Activations (In-Place):
 tensor([[-1., -1.,  0.],
        [ 1.,  1.,  1.]])


'\nValue Clamping Restricts values to a specific range\nGradient Stability Limits gradients to a safe range to prevent exploding or vanishing gradients\nIn-Place HardTanh Modifies the input tensor directly to save memory\n'

In [4]:
#hardswish , applies hardswish activation function element-wise to the input tensor.
# hwardswish is a computationally efficient approximation of the swish activation function
# coonly used in lightweight nuerak netrks like mobilenetv3

import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([[-4.0, -2.0, 0.0],[1.0,3.0,5.0]])

print('input tensor:\n', input_tensor)

output_tensor = F.hardswish(input=input_tensor, inplace=False)
print('\nOutput Tensor (HardSwish):\n',output_tensor)



input tensor:
 tensor([[-4., -2.,  0.],
        [ 1.,  3.,  5.]])

Output Tensor (HardSwish):
 tensor([[-0.0000, -0.3333,  0.0000],
        [ 0.6667,  3.0000,  5.0000]])


In [9]:
#here are hardswihc with MobileNetV3Block


import torch 
import torch.nn as nn 
import torch.nn.functional as F 

class MobileNetV3Block(nn.Module):
    def __init__(self, in_channels, out_channels, expansion_factor=6, kernel_size=3, stride=1):
        super(MobileNetV3Block, self).__init__()

        #expandions phase
        hidden_dim = int(in_channels * expansion_factor)
        self.use_residual = (stride == 1 and in_channels == out_channels)

        layers = []

        if expansion_factor != 1:
            layers.append(nn.Conv2d(in_channels, hidden_dim,kernel_size=1,bias=False))
            layers.append(nn.BatchNorm2d(hidden_dim))
            layers.append(nn.Hardswish())

        layers.append(nn.Conv2d(hidden_dim, hidden_dim, kernel_size=1, bias =False ))
        layers.append(nn.BatchNorm2d(hidden_dim))
        layers.append(nn.Hardswish())

        #pointwise convolution (projection)
        layers.append(nn.Conv2d(hidden_dim, out_channels,kernel_size=1, bias =False))
        layers.append(nn.BatchNorm2d(out_channels))

        self.conv = nn.Sequential(*layers)
    def forward(self,x):
        if self.use_residual:
            return x + self.conv(x)
        else:
            return self.conv(x)

input_tensor = torch.randn(1,16,32,32)
mobilenet_block = MobileNetV3Block(in_channels=16, out_channels=16, expansion_factor=6, kernel_size=3, stride=1)
output_tensor = mobilenet_block(input_tensor)
print('Output Tensor Shape:', output_tensor.shape)


# MobileNetV3 Block Structure :
# The block follows the structure of a typical MobileNetV3 bottleneck block:
# Expansion Phase : A pointwise convolution increases the number of channels by a factor (expansion_factor), followed by BatchNorm and HardSwish activation.
# Depthwise Convolution : A depthwise convolution applies spatial filtering, followed by BatchNorm and HardSwish activation.
# Projection Phase : Another pointwise convolution reduces the number of channels back to out_channels, followed by BatchNorm.
# HardSwish Activation :
# HardSwish is applied after both the expansion and depthwise convolution phases to introduce non-linearity efficiently.
# Residual Connection :
# If the input and output dimensions match (in_channels == out_channels and stride == 1), a residual connection is added to improve gradient flow.
# Input Tensor :
# A dummy input tensor of shape (1, 16, 32, 32) is passed through the block.
# Output Tensor :
# The output tensor has the same shape as the input tensor due to the residual connection.

Output Tensor Shape: torch.Size([1, 16, 32, 32])


In [1]:
#relu6 activattion function element-wise to the input tensor.
#relu6 clamps all values in the tensor to the range [0,6] specifically
# * values less than 0 and are set to 0 
# * values greater than 6 are set to 6
# values within range [0,6] remained unchanged

import torch 
import torch.nn.functional as F

input_tensor = torch.tensor([[-2.0,3.0,8.0],[5.0,-1.0,6.0]])
print('Input Tensor:\n',input_tensor)

#apply relu6 
output_tensor = F.relu6(input=input_tensor, inplace=False)
print('\nOutput Tensor (Relu6):\n', output_tensor)

Input Tensor:
 tensor([[-2.,  3.,  8.],
        [ 5., -1.,  6.]])

Output Tensor (Relu6):
 tensor([[0., 3., 6.],
        [5., 0., 6.]])


In [3]:
#elu 
# applies exponential linear unit (elu) 
# elu activation function element-wise to the input tensor

import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([[-2.0, -1.0, 0.0], [1.0, 2.0, 3.0]])
print('input tensor:\n', input_tensor)

alpha = 1.0
inplace = False 

output_tensor = F.elu(input=input_tensor, alpha=alpha, inplace=inplace)

print('\nOutput Tensur (ELU):\n',output_tensor)

# F.elu_(input=input_tensor, alpha=alpha) #apply elu activation in-place


input tensor:
 tensor([[-2., -1.,  0.],
        [ 1.,  2.,  3.]])

Output Tensur (ELU):
 tensor([[-0.8647, -0.6321,  0.0000],
        [ 1.0000,  2.0000,  3.0000]])


In [4]:
#selu

# this function applies the Scaled Explonential Linear Unit (SELU) activation function element-wise to the input tensor.
# SLUE is a self normalizing activation function that helps maintain mean and variance stability during training, making it particularly useful in deep nueral networks


import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([[-2.0, -1.0, 0.0], [1.0, 2.0, 3.0]])
print('input tensor:\n', input_tensor)

inplace = False

output_tensor = F.selu(input=input_tensor, inplace=inplace)
print('output tensor (selu):\n', output_tensor)

input tensor:
 tensor([[-2., -1.,  0.],
        [ 1.,  2.,  3.]])
output tensor (selu):
 tensor([[-1.5202, -1.1113,  0.0000],
        [ 1.0507,  2.1014,  3.1521]])


In [5]:
#celu 
# this function applies the continously differentiable exponential linear unit (celu) activation function element-wise to the input tensor.
import torch 
import torch.nn.functional as F 
input_tensor = torch.tensor([[-2.0, -1.0, 0.0], [1.0, 2.0, 3.0]])
print('input tensor:\n', input_tensor)

alpha = 1.0
inplace = False 

output_tensor = F.celu(input= input_tensor, alpha=alpha, inplace=inplace)
print('\noutput tensor (celu):\n', output_tensor)

input tensor:
 tensor([[-2., -1.,  0.],
        [ 1.,  2.,  3.]])

output tensor (celu):
 tensor([[-0.8647, -0.6321,  0.0000],
        [ 1.0000,  2.0000,  3.0000]])


In [9]:
#leaky-relu
# function applies the leaky rectified linear unit activation function element-wise to the input tensor
# leaky relu is a variant of relu that allows a small, non-zero gradient for negetive inputs, controlled by the negative_slope parameter


import torch
import torch.nn.functional as F 

input_tensor = torch.tensor([[-2.0, -1.0, 0.0],[1.0, 2.0, 3.0]])
print('input tensor:\n',input_tensor)

negative_slope = 0.01 
inplace = False 

output_tensor = F.leaky_relu(input=input_tensor, negative_slope=negative_slope, inplace=inplace)

print('output tensor (leaky ReLU):\n',output_tensor)

input tensor:
 tensor([[-2., -1.,  0.],
        [ 1.,  2.,  3.]])
output tensor (leaky ReLU):
 tensor([[-0.0200, -0.0100,  0.0000],
        [ 1.0000,  2.0000,  3.0000]])


In [10]:
#parametric rectified linear unit (PReLU)
# activation function element-wise to the input tensor.
# unlike leaky relu, PReLUL allows the slope for negative inputs (weight) to be a learnable parameter which can adapt during training 

import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([[-2.0,-1.0,0.0],[1.0,2.0,3.0]])
print('input tensor:\n', input_tensor)

#define the learnable weight parameter for PReLU
#weight can be a scaler or a 1-D tensor matching the number of input chanels
weight = torch.tensor(0.25)
output_tensor = F.prelu(input=input_tensor, weight=weight)

#print the output tensor
print('\nOuput Tensor (PReLU)\n', output_tensor)


input tensor:
 tensor([[-2., -1.,  0.],
        [ 1.,  2.,  3.]])

Ouput Tensor (PReLU)
 tensor([[-0.5000, -0.2500,  0.0000],
        [ 1.0000,  2.0000,  3.0000]])


In [12]:
#rrelu
# randomized leaky rectified linear unit (rrelu) activtaion function element-wise to the input tensor
# RReLU introduces randomness in the slope for negative inputs during training , which can help with generalization
# a is a randmly sampled from a unifrom distribution [lower, upper] during training, during inference a is fixed o the average of [lower, upper]

import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([[-2.0,-2.0,0.0],[1.0,2.0,3.0]])
print('input tensor\n',input_tensor)

lower = 1./8
upper = 1./3
training = True 
inplace = False 

output_tensor = F.rrelu(input=input_tensor, lower=lower, upper=upper, training=training, inplace=inplace)
print('\noutput tensor (RReLU):\n', output_tensor)

input tensor
 tensor([[-2., -2.,  0.],
        [ 1.,  2.,  3.]])

output tensor (RReLU):
 tensor([[-0.6247, -0.3263,  0.0000],
        [ 1.0000,  2.0000,  3.0000]])


In [14]:
#glu (gated linear unit)
# splits the input tensor along a specified dimension into two parts (a and b) applies the sigmoid function to b and computs the lement-wisr product a and a(b)

import torch 
import torch.nn.functional as F 

input_tensor = torch.randn(2,8)
print('input tensor:\n', input_tensor)

dim = -1
output_tensor = F.glu(input=input_tensor, dim=dim)

print('\output tensor (glu):\n', output_tensor)

input tensor:
 tensor([[ 0.7922, -1.1832, -0.2774, -1.9988,  0.0403,  0.0956, -1.7005, -0.1549],
        [ 1.4411, -0.7484, -0.8632,  1.1396,  1.0872, -0.6204, -0.6409,  0.7428]])
\output tensor (glu):
 tensor([[ 0.4041, -0.6199, -0.0428, -0.9222],
        [ 1.0777, -0.2617, -0.2978,  0.7722]])


In [None]:
#gelu 
#(gaussian error linear unit)
#activation function element-wise to the input tensor
# GELU is smooth non-linear activation function that is widely used in modern deep learning models

import torch
import torch.nn.functional as F 

input_tensor = torch.tensor([-2.0,-1.0,0.0,1.0,2.0])
print('input tensor:\n',input_tensor)

output_exact = F.gelu(input=input_tensor, approximate='none')
output_approximate = F.gelu(input=input_tensor, approximate='tanh')

print('output tensor (exact GELU):\n',output_exact)
print('\noutput tensor (approximate GELU):\n',output_approximate)

input tensor:
 tensor([-2., -1.,  0.,  1.,  2.])
output tensor (exact GELU):
 tensor([-0.0455, -0.1587,  0.0000,  0.8413,  1.9545])

output tensor (approximate GELU):
 tensor([-0.0454, -0.1588,  0.0000,  0.8412,  1.9546])


In [2]:
#logsigmoid
# function applies the logsigmoid activation functoin element-wise to the input tensor.
# the logsigmoid function is coonly used in probabilitstic models and binary classfication tasks , particularly when working with log probabilits

import torch 
import torch.nn.functional as F 

input_tenspr = torch.tensor([-2.0,-1.0,0.00,1.0,2.0])
print('input tensor:\n', input_tensor)
output_tensor = F.logsigmoid(input=input_tensor)
print('\noutput tensor (LogSigmoid):\n', output_tensor)

input tensor:
 tensor([-2., -1.,  0.,  1.,  2.])

output tensor (LogSigmoid):
 tensor([-2.1269, -1.3133, -0.6931, -0.3133, -0.1269])


In [5]:
#hardshrink 
# this function applies the Hard Shrinkage function function element-wise to the input tensor .
# where lamd in the function is threshold parameter that determines the range
import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([-2.0,-1.0,-0.4,0.0,0.4,1.0,2.0])
lambd= 0.5
output_tensor = F.hardshrink(input=input_tensor, lambd=lambd)
output_tensor

tensor([-2., -1.,  0.,  0.,  0.,  1.,  2.])

In [7]:
#tanshrink
# applies the element-wise tanhshrink function

import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([-2.0,-1.0,-0.4,0.0,0.4,1.0,2.0])
output_tensor = F.tanhshrink(input=input_tensor)
output_tensor

tensor([-1.0360, -0.2384, -0.0201,  0.0000,  0.0201,  0.2384,  1.0360])

In [9]:
#softsign 
# function applies the softsign activation function element-wise to the input tensor
# where x is the absolute value of x . softsign is a smooth , non-linear activation function that squashes the input into range (-1,1)

import torch 
import torch.nn.functional as F 
input_tensor = torch.tensor([-2.0, -1.0, 0.0, 1.0, 2.0])
output_tensor = F.softsign(input=input_tensor)
output_tensor

tensor([-0.6667, -0.5000,  0.0000,  0.5000,  0.6667])

In [11]:
#softplus
# applies element-wise 
# for numerical stability the implementaion reverts to the linear function when input X B > threshold. 

import torch 
import torch.nn.functional as F 
input_tensor = torch.tensor([-2.0,-1.0,0.0,1.0,2.0])
output_tensor = F.softplus(input=input_tensor)
output_tensor

tensor([0.1269, 0.3133, 0.6931, 1.3133, 2.1269])

In [15]:
#softmin 
# this function applies the softmin function essentially the negative of the softmax function
# softmin function normalizes the input tensor along a specified dimension such that the output values sum to 1 
# it is often used in scenarios where you want to compute probabilites or weights inversely propotional to the input values

import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([[1.0,2.0,3.0],[4.0,5.0,6.0]])
dim = 1
output_tensor = F.softmin(input=input_tensor,dim=dim)
output_tensor

tensor([[0.6652, 0.2447, 0.0900],
        [0.6652, 0.2447, 0.0900]])

In [1]:
#softmax 
#apples softmax activation function
# softmax normalizes the input tensor along a specified dimension such that the output values lie in the range [0,1] and sum to 1
import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([[2.0,1.0,0.5],[3.0,2.0,1.0]])
dim = 1
output_tensor = F.softmax(input=input_tensor,dim=dim)
#verify each slice sums to 1
output_tensor.sum(dim=dim)

tensor([1.0000, 1.0000])

In [4]:
#soft shrinkage (softshrink)
# function element-wise to the input tensor.
# lambd , threshold parameter that determines the range values to be shrunk to zero

import torch 
import torch.nn.functional as F

input_tensor = torch.tensor([-2.0,-1.0,-0.4,0.0,0.4,1.0,2.0])
lambd = 0.5 
output_tensor = F.softshrink(input=input_tensor, lambd=lambd)
output_tensor

tensor([-1.5000, -0.5000, -0.0000,  0.0000,  0.0000,  0.5000,  1.5000])

In [5]:
#gumble_softmax
# gumble_softmax function samples form the Gumble-Softmax distribution
# which is a continous relaxation of the categorical distribution.
# its commonly used in scenarios where you need differentiable sampling

import torch
import torch.nn.functional as F 
logits = torch.randn(5,3)
tau = 1.0
hard = False 
dim = -1
soft_samples = F.gumbel_softmax(logits=logits, tau=tau, hard=hard, dim=dim) #apply gumble-softmax with soft sampling
print('\nSoft Samples (Probability Distribution):\n',soft_samples)
hard_samples=F.gumbel_softmax(logits=logits,tau=tau,hard=True,dim=dim)
print('\nhard samples (one-hode encoded):\n', hard_samples)

#verify that hard samples are one-hot encoded
print('\nsum along dimension (should be 1 for hard samples):\n',hard_samples.sum(dim=dim))


Soft Samples (Probability Distribution):
 tensor([[2.6780e-01, 4.7145e-01, 2.6075e-01],
        [1.1713e-01, 3.7062e-02, 8.4581e-01],
        [3.5186e-02, 1.2928e-01, 8.3553e-01],
        [2.0046e-04, 2.7033e-02, 9.7277e-01],
        [1.2448e-01, 3.4024e-01, 5.3528e-01]])

hard samples (one-hode encoded):
 tensor([[0., 0., 1.],
        [1., 0., 0.],
        [0., 0., 1.],
        [0., 0., 1.],
        [0., 0., 1.]])

sum along dimension (should be 1 for hard samples):
 tensor([1., 1., 1., 1., 1.])


In [10]:
#log_softmax 
# function applies the logirithm of the softmax function in a single step,
# which is both computationally efficient and numericalls stable.
#commonly used in combinetion with loss functions like torch.nnn.NLLLoss (negative log likelihood loss) for classication tasks

import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([[2.0,1.0,0.5],[3.0,2.0,1.0]])
dim =1
output_tensor = F.log_softmax(input=input_tensor,dim=dim)
print('\noutput tensor (log probabilities):\n',output_tensor)
softmax_probabilities = torch.exp(output_tensor)
print('\nexponentiated output (softmax probabilites):\n', softmax_probabilities)

#verify that each slice sums to 1
print('\n sum along dimension 1 (should be approximately 1):\n',softmax_probabilities.sum(dim=dim))

#NLLoss
logits = torch.tensor([[2.0, 1.0, 0.5], [3.0, 2.0, 1.0]])  # Shape: (batch_size=2, num_classes=3)
labels = torch.tensor([0,2])
dim = 1
log_probs = F.log_softmax(logits,dim=dim)
print('Log Probabilities (log_softmax):\n',log_probs)
loss = F.nll_loss(log_probs, labels)
print('\nNLL Loss:\n',loss)


output tensor (log probabilities):
 tensor([[-0.4644, -1.4644, -1.9644],
        [-0.4076, -1.4076, -2.4076]])

exponentiated output (softmax probabilites):
 tensor([[0.6285, 0.2312, 0.1402],
        [0.6652, 0.2447, 0.0900]])

 sum along dimension 1 (should be approximately 1):
 tensor([1.0000, 1.0000])
Log Probabilities (log_softmax):
 tensor([[-0.4644, -1.4644, -1.9644],
        [-0.4076, -1.4076, -2.4076]])

NLL Loss:
 tensor(1.4360)


In [1]:
#tanh 

# this function applies the hyperbolic tangent (tanh) activation function 

# the output of the tanh function lies in the range (-1,1), making it a popular choiec for activation function in neural networks

import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([-2.0,-1.0,0.0,1.0,2.0])
output_tensor = F.tanh(input=input_tensor)
output_tensor


tensor([-0.9640, -0.7616,  0.0000,  0.7616,  0.9640])

In [3]:
#sigmoid

# applies sigmoid activation finction elemet-wise to the input tensor
# outut sigmoid lies in the range (0,1) making it a popular choice for tasks where probabilities or normalized values are required

import torch 
import torch.nn.functional as F
input_tensor = torch.tensor([-2.0,-1.0,0.0,1.0,2.0])
output_tensor = F.sigmoid(input=input_tensor)
output_tensor

tensor([0.1192, 0.2689, 0.5000, 0.7311, 0.8808])

In [4]:
#hardsigmoid 
# hardsigmoid function is a piecewise linear approximation of the sigmoid unction
# hardsigmoid functions is computationally efficent because it avoids exepnsive exponential calculations and is often used in lightweight neural networks

import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([-4.0, -3.0, -1.0, 0.0, 1.0, 3.0, 4.0])
output_tensor = F.hardsigmoid(input=input_tensor, inplace = False)
output_tensor

tensor([0.0000, 0.0000, 0.3333, 0.5000, 0.6667, 1.0000, 1.0000])

In [5]:
#silu 
# function applies the sigmoid linear unit activation function elemetn-wise to the input tensor 
# the silu function is also known as swish activation funcion
# the silu function comvines the input x with its signmoid activation making it a smooth non-monotonic actiation function that has beedn shown to ikmprove performance in deep nerual neural networks

import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([-2.0,-1.0,0.0,1.0,2.0])
output_tensor = F.silu(input=input_tensor, inplace =False)
output_tensor

tensor([-0.2384, -0.2689,  0.0000,  0.7311,  1.7616])

In [6]:
#mish

# mish(x) = x .tanh(Softplux(x))
# softplus(x) = log(1 + exp(x))
# tanh(x) is the hyperbolic tangent function 

# mish is smooth , non-monotonic activation function that has been shown to improve performance in deep neural netowrks compared to other acatoin funcions like ReLU or swish 

import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([-2.0,-1.0,0.0,1.0,2.0])
output_tensor = F.mish(input=input_tensor, inplace =False)
output_tensor

tensor([-0.2525, -0.3034,  0.0000,  0.8651,  1.9440])

In [7]:
#batch_norm
# applies batchnormlization to the input tensor.
# batch normalization normalizes the input across a batch of ata for each channel , stbilizing and accelrating training in deep neural networks

import torch 
import torch.nn.functional as F 

input_tensor = torch.randn(2,3,4,4,4)# batch_size, channels, height, width
running_mean = torch.zeros(3)
running_var = torch.zeros(3)

weight = torch.ones(3) #scale parameter gamma
bias = torch.zeros(3) #scale parameter (beta)

#apply batch normalization
training = True #whether the model is in training mode
momentum = 0.1 # momentum for updating running statistics
eps = 1e-5 #small constant for numerical stability

output_tensor = F.batch_norm(
    input=input_tensor,
    running_mean=running_mean,
    running_var=running_var,
    bias = bias,
    training=training,
    momentum=momentum,
    eps=eps
)

print('input tensor:', input_tensor)
print('output tensor',output_tensor)

print('updated running mean:\n', running_mean)
print('updated running variance:\n',running_var)

input tensor: tensor([[[[[ 0.2939, -0.7838, -0.3633,  1.0398],
           [-0.5306,  0.3425, -0.2094,  2.0015],
           [-1.0057, -0.4197, -1.0050,  0.7385],
           [ 0.5214,  0.5199, -0.3382,  0.3056]],

          [[ 0.3278,  1.3689, -0.3543,  0.1538],
           [-1.3689,  0.8954,  1.0587, -0.3418],
           [ 0.2685, -0.8686,  0.3122,  0.3304],
           [-0.4221, -0.3994,  0.4599, -0.1123]],

          [[-0.0404, -1.1547, -0.6198, -0.3274],
           [-0.9721,  1.5516,  2.3822, -0.1542],
           [ 1.2160, -0.8965,  1.1996, -1.1597],
           [-0.2350,  0.5723, -0.4735, -1.4145]],

          [[ 1.4348,  1.3779, -0.8737, -1.1406],
           [-0.1624,  0.0335,  1.5107,  1.1748],
           [ 0.5517, -0.5191, -1.3490, -0.2447],
           [ 1.0799, -0.1226,  0.2073, -0.0922]]],


         [[[ 0.5044,  0.6221, -1.0633,  1.4891],
           [ 0.9370,  0.6921, -0.0542, -0.8889],
           [ 0.0923,  1.7893, -0.0672, -1.0229],
           [-0.2375,  0.3284, -0.4782,  0.156

In [10]:
#group_norm
# function applies group normalization to the input tensor
# group normalization devides the channels of the input into groups and normalizes the alues withint each group
# useful in scenarions where batch sizes are small or when batch normlization is not suitable

import torch 
import torch.nn.functional as F 

input_tensor = torch.randn(2,6,4,4) #batch_size, channels, height , width
num_groups = 3
weight = torch.ones(6)
bias = torch.zeros(6)

eps = 1e-5
output_tensor = F.group_norm(
    input = input_tensor,
    bias = bias ,
    num_groups = num_groups,
    weight = weight,
    eps = eps
)

print('input tensor\n',input_tensor)
print('\noutput tensor\n',output_tensor)

input tensor
 tensor([[[[-0.9950,  0.3942,  1.2846,  1.2953],
          [ 1.1153,  0.0882, -0.0327, -0.1738],
          [ 0.9755,  0.2276,  0.7907,  0.0203],
          [-0.8627, -0.4963,  0.8088, -1.0085]],

         [[ 0.0282,  0.3838, -0.3063, -1.9295],
          [-1.1972, -1.6479,  0.2804, -1.7825],
          [-0.5665,  1.4817,  0.8787, -2.2114],
          [ 1.3869, -1.4940,  0.2617, -0.4820]],

         [[-0.1168,  0.0420,  1.0844, -0.2193],
          [ 0.1458,  0.7663,  0.4907, -2.0230],
          [-2.4124, -0.6406,  1.4944, -1.3344],
          [-0.9236,  0.7850,  0.2408, -0.5281]],

         [[-1.1853, -1.4353, -0.9999, -0.3563],
          [ 1.1517, -1.2574, -1.4784, -0.5392],
          [ 1.7553,  0.5330,  0.3643, -0.0301],
          [-0.0555,  1.2492,  1.6412,  1.9839]],

         [[-1.1844, -0.6003,  0.1958, -0.0630],
          [ 0.6549, -0.1772,  0.5408, -1.1960],
          [-0.2786, -1.2881, -1.0027,  1.3418],
          [-1.4563, -1.1602,  1.4336, -1.7977]],

         [[-0.68

In [15]:
#instance_norm
# Instance Normalization , normalize sample independently across its spatial dimensions for each channel.
# commonly used in tasks like style transfer where normzalization within individual sampes is more appropriate than across a batch

import torch 
import torch.nn.functional as F 

inout_tensor = torch.randn(2,3,4,4)
running_mean = None 
running_var = None 
weight = torch.ones(3)
bias = torch.zeros(3)
use_input_stats = True 
momentum = 0.1
eps = 1e-5

output_tensor = F.instance_norm(
    input=input_tensor,
    running_mean=running_mean,
    running_var=running_var,
    weight=weight,
    bias=bias,
    use_input_stats=use_input_stats,
    momentum=momentum,
    eps=eps
)
print('input tensor:\n', input_tensor)
print('\noutput tensor:\n',output_tensor)

input tensor:
 tensor([[[[ 0.4429,  0.0564, -0.8291,  1.1433],
          [-0.6939, -0.7697, -1.4194,  0.2286],
          [-1.4104, -0.1500,  0.2020, -1.0591],
          [ 0.3582, -0.1656,  0.2051, -1.1076]],

         [[ 0.5590, -0.9749, -2.4325, -0.0278],
          [ 0.6364,  1.4038,  1.1722, -0.2091],
          [-0.0190,  2.5039, -0.5707, -1.3462],
          [ 0.1076,  1.9312,  0.6722,  0.5899]],

         [[ 0.1542, -0.6337,  0.1058,  0.3186],
          [ 0.9883, -0.1966, -0.2011,  0.7814],
          [-0.2530,  0.2210, -0.9094, -0.3013],
          [-0.8912,  1.2809,  0.5716,  3.4703]]],


        [[[ 0.7105, -0.7458, -0.8224,  0.4514],
          [ 0.3341, -0.7917, -1.1918, -2.2718],
          [ 1.2324,  0.7007, -0.3812,  0.6503],
          [-0.8143, -0.4616,  0.5654, -0.6793]],

         [[ 0.0203,  0.8249, -0.6871,  2.0697],
          [-1.0145,  0.1541,  2.2623,  0.7888],
          [-1.2894, -0.0743,  1.2140,  0.1584],
          [-0.2534,  1.0432,  0.2157, -0.1156]],

         [[ 0

In [23]:
#layer_norm
# layer normalization , normalizes teh last few dimensions of the input tensor, dimension except the batch dimension.
# it computes the mean and variance over these dimension for each sample independently making it indeendent of btach size
import torch
import torch.nn.functional as F

# Define an input tensor (batch_size=2, features=4, height=3, width=3)
input_tensor = torch.randn(2, 4, 3, 3)  # Shape: (batch_size, features, height, width)

# Specify the dimensions to normalize (last two dimensions: height and width)
normalized_shape = (3, 3)  # Normalize over the last two dimensions (height, width)

# Define learnable parameters (optional)
weight = torch.ones(3 * 3).reshape(normalized_shape)  # Scale parameter (gamma), one value per normalized element
bias = torch.zeros(3 * 3).reshape(normalized_shape)   # Shift parameter (beta), one value per normalized element

# Apply Layer Normalization
eps = 1e-5  # Small constant for numerical stability
output_tensor = F.layer_norm(
    input=input_tensor,
    normalized_shape=normalized_shape,
    weight=weight,
    bias=bias,
    eps=eps
)

# Print the output tensor
print("Input Tensor:\n", input_tensor)
print("\nOutput Tensor (Layer Normalized):\n", output_tensor)

Input Tensor:
 tensor([[[[-1.3399, -0.5054, -0.2237],
          [ 1.0391, -0.8920,  0.5449],
          [ 0.4306, -0.5703, -1.0075]],

         [[ 0.6220,  0.6217, -1.3201],
          [ 2.2047,  1.0517,  0.6067],
          [ 0.4296,  1.9671,  0.3063]],

         [[ 1.6000,  2.6749, -0.6814],
          [-0.1662,  0.5952,  2.2210],
          [-0.4480,  0.4725,  2.3571]],

         [[ 0.5503, -0.6633, -0.4572],
          [ 0.8336,  0.2932, -0.1070],
          [-0.7641,  1.1423, -0.7377]]],


        [[[ 0.7553, -1.3207, -0.3951],
          [ 0.3857,  0.3825,  1.8887],
          [-1.6279, -0.9746, -0.0910]],

         [[ 0.0063, -0.1847,  0.3210],
          [ 2.1754, -1.0228,  0.1505],
          [-0.5886, -0.3755,  1.2899]],

         [[ 0.4226, -1.7317, -0.3964],
          [ 0.4475, -0.2443, -0.8554],
          [-0.3420,  0.7340,  0.0734]],

         [[ 1.4618,  1.1563,  1.0729],
          [-0.0700,  0.8824,  0.6557],
          [-0.3382, -0.0596,  1.3355]]]])

Output Tensor (Layer Normaliz

In [28]:
#local_response_norm

#LRN normalizes each pixel across nearby channels (feature maps) and is commonly used in covolutional neural networks (CNNs), particularly in older architectures like AlesNet.

import torch 
import torch.nn.functional as F 
input_tensor = torch.randn(2,4,3,3)
size =3
alpha = 0.0001
beta = 0.75 
k = 0.1

output_tensor = F.local_response_norm(
    input=input_tensor,
    size=size,
    alpha=alpha,
    beta=beta,
    k=k
)

print('input:\n',input_tensor)
print('\noutput:\n',output_tensor)

input:
 tensor([[[[ 2.0667e-01, -1.0599e-01, -2.4010e+00],
          [ 1.7566e+00, -1.4337e+00,  1.7986e+00],
          [ 2.5884e-01,  9.1898e-01,  4.5527e-01]],

         [[ 1.2623e-02, -8.4083e-01,  1.3972e+00],
          [-5.5456e-01, -5.8515e-01,  1.6556e+00],
          [-5.2487e-01, -1.4630e+00,  4.9386e-02]],

         [[-1.2961e+00,  6.3021e-01,  1.2197e-01],
          [-6.3145e-01,  7.6239e-02, -2.3349e-01],
          [ 6.1112e-01, -2.8929e-01, -1.2086e+00]],

         [[ 9.8071e-01, -1.1314e+00, -5.0179e-01],
          [ 4.4344e-01,  5.2481e-01, -1.7056e+00],
          [-2.2077e+00,  1.6969e+00,  3.7873e-01]]],


        [[[ 7.4024e-01, -2.1024e-01,  1.4448e+00],
          [ 2.1806e-01,  9.4254e-01, -1.7191e+00],
          [-8.0877e-02, -1.0069e-03,  7.7004e-01]],

         [[-1.1582e+00, -1.8372e-01, -1.6108e-03],
          [ 8.0797e-01, -7.6964e-01, -1.1793e-01],
          [-8.5453e-01, -9.9000e-02, -1.6620e+00]],

         [[-6.1338e-01, -8.1826e-01, -2.0167e-01],
         

In [30]:
#rms_nrom
#applies root mean square normalization
#normalize input tensor by dividing it by the root mean sqyare of its value over the specified dimensions. 
#RMSNorm is a lightweight normalization technoque that does not compute a mean or require learnable bias parameters , making it computationally efficient

import torch 
import torch.nn.functional as F 

input_tensor = torch.randn(2,4,3,3)
normalized_shape = (3,3) # normalize over the last two dimensions
weight = torch.ones(3*3).reshape(normalized_shape)
eps = 1e-6 # small constant for numerical stability

#apply rms norm
output_tensor = F.rms_norm(
    input=input_tensor,
    normalized_shape=normalized_shape,
    weight=weight,
    eps=eps
)

print('input tensor:\n', input_tensor)
print('\noutput tensor (rms normalized):\n',output_tensor)


input tensor:
 tensor([[[[ 0.1922, -0.3542, -0.0786],
          [-0.6105, -0.2181, -0.2393],
          [-0.4442, -1.9372,  1.3535]],

         [[ 0.0453,  1.2244,  0.9363],
          [ 0.7094,  0.9371,  1.0721],
          [-1.2750,  1.0188,  0.2003]],

         [[-0.7419, -1.9866,  0.2004],
          [ 0.7826,  1.0550,  1.4504],
          [-0.2918, -1.5483,  2.3967]],

         [[-1.1690, -0.5543, -1.0750],
          [ 0.1287, -0.3554, -0.3362],
          [-0.0775,  0.0403,  1.4425]]],


        [[[ 0.4881,  1.2022, -0.6943],
          [ 0.4945, -0.6746, -1.6062],
          [ 0.7551,  0.8657,  0.0080]],

         [[ 0.3827,  1.7225, -0.0801],
          [ 1.9365,  1.2369,  0.3345],
          [-0.1362, -2.0459,  0.6690]],

         [[-0.6671, -0.2857,  1.5563],
          [ 1.8626, -0.2039, -0.9786],
          [ 0.2453,  1.0418,  0.4889]],

         [[ 0.2505, -0.3716,  1.2659],
          [-0.7632,  0.6880,  0.1976],
          [ 0.7440, -0.8440,  0.2968]]]])

output tensor (rms normalized

In [31]:
#normalize 
# function performs Lp normalization of the input tensor over a specified dimension.
# it normalizes each vector along the specified dimension by deviding it by its L[ norm

import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([[3.0,4.0,5.0,6.0],[1.0,2.0,3.0,4.0]])
print('input tensor:\n',input_tensor)

# parameter for normalization
p = 2.0 # L2 nrom (euclidean norm)
dim = 1 #normalize along dimension 1 (feautres)
eps = 1e-12 

normalized_tensor = F.normalize(input=input_tensor,p=p,eps=eps,dim=dim)
print('normalized tenspr:\n',normalized_tensor)

#verify the l2 norm of each row is approximately 1
print('\nL2 Norm of Elach Row',torch.norm(normalized_tensor,p=2,dim=dim))


input tensor:
 tensor([[3., 4., 5., 6.],
        [1., 2., 3., 4.]])
normalized tenspr:
 tensor([[0.3235, 0.4313, 0.5392, 0.6470],
        [0.1826, 0.3651, 0.5477, 0.7303]])

L2 Norm of Each Row tensor([1.0000, 1.0000])


In [32]:
#linaer 
# this function applies linear transformation to the input tensor
#operatin is commonly used in neural network for full =y connected layres

import torch
import torch.nn.functional as F 

input_tensor = torch.randn(2,4)
weight_matrix = torch.randn(3,4)
bias_vector = torch.randn(3)
output_tensor = F.linear(input=input_tensor, weight=weight_matrix, bias = bias_vector)

print("Input Tensor:\n", input_tensor)
print("\nWeight Matrix:\n", weight_matrix)
print("\nBias Vector:\n", bias_vector)
print("\nOutput Tensor (Linear Transformation):\n", output_tensor)


Input Tensor:
 tensor([[-1.0893, -0.3072, -0.6570, -0.6048],
        [-1.9077,  1.0850,  0.6637, -1.1992]])

Weight Matrix:
 tensor([[-2.1312, -1.9938,  0.6720, -1.2265],
        [ 1.0330,  0.4254, -0.7774,  0.1815],
        [ 0.1109, -0.0977, -2.3333, -0.9830]])

Bias Vector:
 tensor([0.7982, 0.0718, 0.2012])

Output Tensor (Linear Transformation):
 tensor([[ 4.0324, -0.7831,  2.2380],
        [ 4.6174, -2.1708, -0.4861]])


In [33]:
#bilinear
# operation is useful in tasks like bilinear pooling where interactions between two sets of features are modeled

import torch 
import torch.nn.functional as F 

input1 = torch.randn(2,4)
input2= torch.randn(2,3)

weight = torch.randn(5,4,3)
bias = torch.randn(5)
output_tensor = F.bilinear(input1=input1, input2=input2, weight=weight, bias=bias)

print("Input Tensor 1:\n", input1)
print("\nInput Tensor 2:\n", input2)
print("\nWeight Tensor:\n", weight)
print("\nBias Tensor:\n", bias)
print("\nOutput Tensor (Bilinear Transformation):\n", output_tensor)


Input Tensor 1:
 tensor([[-0.3954, -0.9825,  0.6825, -0.5252],
        [-2.1812, -0.5727,  2.0892, -0.2917]])

Input Tensor 2:
 tensor([[-1.0951, -2.5553, -0.2692],
        [-0.7462,  1.1191,  1.1455]])

Weight Tensor:
 tensor([[[-0.0739,  0.3247, -0.6910],
         [ 0.8234, -1.1381,  1.0265],
         [ 1.2938, -0.8794,  0.2502],
         [ 0.5947,  1.0882, -0.3124]],

        [[-0.7460, -0.6583,  1.2932],
         [ 1.6261, -0.0850, -1.4893],
         [-1.3521,  0.0175, -0.1038],
         [ 0.5686, -1.3340,  1.8783]],

        [[ 0.5009,  1.0569, -0.8461],
         [-1.4141, -0.5980,  0.2224],
         [-0.8126,  1.5270, -0.3843],
         [ 0.1493, -0.1760,  1.2149]],

        [[ 0.1895,  1.2277, -1.8962],
         [ 1.5365, -1.7857,  0.3528],
         [ 1.1865, -0.2864, -0.4948],
         [ 0.1859, -0.2852,  0.9319]],

        [[-0.1267, -1.0174, -1.2453],
         [ 0.3359,  1.0251,  1.5602],
         [-0.6472,  1.2854, -1.8520],
         [-0.0937,  0.8802, -0.5476]]])

Bias Tens

In [4]:
#dropout
#regulirization technique that randomly zeroes soem elements of the input tensor during training with a specified probability p/
#dropout helps prevent overfitting by reducing co-adaption between beurons

import torch 
import torch.nn.functional as F 

# input_tensor = torch.tensor([2.0,5.0,-1.0,6.0,3.0])
input_tensor = torch.ones(2,5)

training =  True
inplace = False 
p = 0.5 #probability of zeroing and element

output_tensor = F.dropout(
    input=input_tensor,
    training=training,
    p=p,
    inplace=inplace,
)
print('output tensor (after dropout):\n',output_tensor)

#each eement has a 50% chance of being zeroed.
# for example if thes econd and fourth elements are zerored, in the first row the reaming elements are scled by (1/(1-0.5) = 2)

output tensor (after dropout):
 tensor([[2., 2., 2., 2., 2.],
        [0., 0., 2., 2., 2.]])


In [7]:
# alpha_dropout
# variant of dropout designed to work with self-normalizing neural networks.
# e.g using SELU activation function
# alpha dropout ensures that the mean and variance of the activations remain unchanged, even after droout is applied

import torch 
import torch.nn.functional as F 

input_tensor = torch.randn(2,6)

#can add
# if self.training: output.F.alpha_dropout()
output_tensor = F.alpha_dropout(
    input=input_tensor,
    p=0.5,# probability of zeroing and element
    training=True,
    inplace=False
)
print('output tensor:\n',output_tensor)

#custom dropout probability, experiment with different probabilities
# in-place operation, perform alpha dropout in-place ot save money
# batch processing, apply aplha dropout to batches of data for efficient computations

output tensor:
 tensor([[-0.7792, -0.7792, -0.7792, -0.7792,  0.3448,  3.1374],
        [-0.7792,  1.1091,  0.4981, -0.7792, -0.7792,  1.3498]])


In [12]:
#feature_alpha_dropout
# function applies Feature Alpha Dropout, a variant of droout that randmoly masks entire feauture channels.
# variant of dropout that randomly masks entire feature channles
# it is designed to work with self-normalizing networks
# ensures that the mean and variance of the activations remain unchanged

import torch
import torch.nn as nn  
import torch.nn.functional as F 

class MyModel(nn.Module):
    def __init__(self):
        super(MyModel,self).__init__()

    def forward(self,input_tensor):
        if self.training:
            output = F.feature_alpha_dropout(
                 p = 0.5,
                training = True,
                inplace = False,
                input=input_tensor
            )
        else:
            output = input_tensor
        return output

input_tensor = torch.randn(2,5)
model = MyModel()
model.train()
output_train = model(input_tensor)
print('output tensor (training mode)\n', output_tensor)

model.eval()
output_eval = model(input_tensor)
print('output tensor (evaluation mode):\n',output_eval)

output tensor (training mode)
 tensor([[-0.7792, -0.7792, -0.7792, -0.7792,  0.3448,  3.1374],
        [-0.7792,  1.1091,  0.4981, -0.7792, -0.7792,  1.3498]])
output tensor (evaluation mode):
 tensor([[-2.9115,  0.5659,  0.8973,  0.7014,  2.5634],
        [ 1.4822,  1.1028,  1.0060,  0.8892,  2.2446]])


In [13]:
#dropout1d
# This function applies 1D dropout , which randomly zeroes out entire channels (1D feature maps) in the input tensor. It is commonly used in 1D convolutional neural networks (e.g., for time-series data or audio signals).



import torch
import torch.nn.functional as F

# Define an input tensor (batch_size=2, channels=3, length=5)
input_tensor = torch.ones(2, 3, 5)  # Shape: (batch_size, channels, length)
print("Input Tensor:\n", input_tensor)

# Parameters for 1D dropout
p = 0.5           # Probability of zeroing out a channel (50% chance)
training = True   # Apply dropout only during training
inplace = False   # Do not modify the input tensor in-place

# Apply 1D dropout
output_tensor = F.dropout1d(input=input_tensor, p=p, training=training, inplace=inplace)

# Print the output tensor
print("\nOutput Tensor (After 1D Dropout):\n", output_tensor)

Input Tensor:
 tensor([[[1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.]],

        [[1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.]]])

Output Tensor (After 1D Dropout):
 tensor([[[2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.]],

        [[0., 0., 0., 0., 0.],
         [2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.]]])


In [14]:
#dropoutwd
# this function applies 2D dropout , which randomly zeroes out entire channels (2D feature maps) in the input tensor. It is commonly used in 2D convolutional neural networks (e.g., for image data).

import torch
import torch.nn.functional as F

# Define an input tensor (batch_size=2, channels=3, height=4, width=4)
input_tensor = torch.ones(2, 3, 4, 4)  # Shape: (batch_size, channels, height, width)
print("Input Tensor:\n", input_tensor)

# Parameters for 2D dropout
p = 0.5           # Probability of zeroing out a channel (50% chance)
training = True   # Apply dropout only during training
inplace = False   # Do not modify the input tensor in-place

# Apply 2D dropout
output_tensor = F.dropout2d(input=input_tensor, p=p, training=training, inplace=inplace)

# Print the output tensor
print("\nOutput Tensor (After 2D Dropout):\n", output_tensor)

Input Tensor:
 tensor([[[[1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.]],

         [[1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.]],

         [[1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.]]],


        [[[1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.]],

         [[1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.]],

         [[1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.],
          [1., 1., 1., 1.]]]])

Output Tensor (After 2D Dropout):
 tensor([[[[2., 2., 2., 2.],
          [2., 2., 2., 2.],
          [2., 2., 2., 2.],
          [2., 2., 2., 2.]],

         [[0., 0., 0., 0.],
          [0., 0., 0., 0.],
          [0., 0., 0., 0.],
          [0., 0., 0., 0.]],

         [[0., 0., 0., 0.],
      

In [15]:
#dropout3d
# This function applies 3D dropout , which randomly zeroes out entire channels (3D feature maps) in the input tensor. It is commonly used in 3D convolutional neural networks (e.g., for volumetric data such as medical imaging or video processing)

import torch
import torch.nn.functional as F

# Define an input tensor (batch_size=2, channels=3, depth=4, height=4, width=4)
input_tensor = torch.ones(2, 3, 4, 4, 4)  # Shape: (batch_size, channels, depth, height, width)
print("Input Tensor:\n", input_tensor)

# Parameters for 3D dropout
p = 0.5           # Probability of zeroing out a channel (50% chance)
training = True   # Apply dropout only during training
inplace = False   # Do not modify the input tensor in-place

# Apply 3D dropout
output_tensor = F.dropout3d(input=input_tensor, p=p, training=training, inplace=inplace)

# Print the output tensor
print("\nOutput Tensor (After 3D Dropout):\n", output_tensor)

Input Tensor:
 tensor([[[[[1., 1., 1., 1.],
           [1., 1., 1., 1.],
           [1., 1., 1., 1.],
           [1., 1., 1., 1.]],

          [[1., 1., 1., 1.],
           [1., 1., 1., 1.],
           [1., 1., 1., 1.],
           [1., 1., 1., 1.]],

          [[1., 1., 1., 1.],
           [1., 1., 1., 1.],
           [1., 1., 1., 1.],
           [1., 1., 1., 1.]],

          [[1., 1., 1., 1.],
           [1., 1., 1., 1.],
           [1., 1., 1., 1.],
           [1., 1., 1., 1.]]],


         [[[1., 1., 1., 1.],
           [1., 1., 1., 1.],
           [1., 1., 1., 1.],
           [1., 1., 1., 1.]],

          [[1., 1., 1., 1.],
           [1., 1., 1., 1.],
           [1., 1., 1., 1.],
           [1., 1., 1., 1.]],

          [[1., 1., 1., 1.],
           [1., 1., 1., 1.],
           [1., 1., 1., 1.],
           [1., 1., 1., 1.]],

          [[1., 1., 1., 1.],
           [1., 1., 1., 1.],
           [1., 1., 1., 1.],
           [1., 1., 1., 1.]]],


         [[[1., 1., 1., 1.],
        

In [19]:
#embedding
# function retrieves embeddings from a fixed dictionary (embedding matrix) based on input indices, it is commonly used in natural languege processing

import torch 
import torch.nn.functional as F 

#define the embedding matrix (v=10, embedding_dims=3)
#V = vocabulart size (number of unique tokens), embedding_dims=3
embedding_matrix = torch.randn(10,3) #Shape: (V=10, embedding_dims=3)

#define the input tensor (batch_size=2, sequence_length=4)
#each element in the input tensor is an index into the embedding matrix
input_tensor = torch.tensor([[1,2,4,5],[4,3,2,9]])#batch_size, sequenec_length

output_tensor = F.embedding(input=input_tensor, weight=embedding_matrix)

print("Embedding Matrix:\n", embedding_matrix)
print("\nInput Tensor (Indices):\n", input_tensor)
print("\nOutput Tensor (Embeddings):\n", output_tensor)

# can specified padding_idx, embedding vector for at that index is not updated during training and remains fixed.
weights = torch.randn(10,3)
weights[0,:].zero_() # set the embedding vector for padding_idx = 0 to zeros
output_tensor = F.embedding(input=input_tensor, weight=weights, padding_idx=0)
print('\nOutput Tensor (Embeddings):\n',output_tensor)

Embedding Matrix:
 tensor([[ 0.0068, -2.6321,  0.2775],
        [ 0.0899,  0.2752, -0.2612],
        [ 0.0830,  2.0061, -0.2114],
        [-1.6072, -2.3703,  0.0961],
        [-1.1281,  0.5882, -1.3024],
        [ 1.0387,  0.8942,  0.1183],
        [-0.4841, -1.0705, -0.9068],
        [-0.8395,  1.2017,  1.4339],
        [-0.6019,  2.0140,  0.8370],
        [-0.2726, -0.5958,  1.2552]])

Input Tensor (Indices):
 tensor([[1, 2, 4, 5],
        [4, 3, 2, 9]])

Output Tensor (Embeddings):
 tensor([[[ 0.0899,  0.2752, -0.2612],
         [ 0.0830,  2.0061, -0.2114],
         [-1.1281,  0.5882, -1.3024],
         [ 1.0387,  0.8942,  0.1183]],

        [[-1.1281,  0.5882, -1.3024],
         [-1.6072, -2.3703,  0.0961],
         [ 0.0830,  2.0061, -0.2114],
         [-0.2726, -0.5958,  1.2552]]])

Output Tensor (Embeddings):
 tensor([[[-2.8683, -1.0660, -1.4782],
         [ 0.8882,  0.1742,  0.2513],
         [-1.0397,  0.0435, -0.8875],
         [-0.1824,  0.6750, -0.2242]],

        [[-1.0397

In [20]:
#one_hot
# function converts a tensor of class indices into a one-hot encoded tensor
# one-hot encoding is commonly used in machine learning tasks suc as classification, where categorical dat is represented as binary vectors

import torch
import torch.nn.functional as F 

class_indices = torch.tensor([0,1,2,1,0])
print('input tensor (class indices):\n',class_indices)

#apply one-hot encoding with inferred number of classes
one_hot_tensor_inferred = F.one_hot(class_indices)
print('\nOne-Hot Encoded Tensor (Inferred Classes):\n', one_hot_tensor_inferred)

#apply one-hot encoding with a specified number of classes
num_classes = 5
one_hot_tensor_specified = F.one_hot(class_indices, num_classes=num_classes)
print('\nOne-Hot Encoded Tensor (Specified Classes):\n', one_hot_tensor_specified)

#example with multi-dimensional input
class_indices_2d = torch.tensor([[0,1],[2,0]])
print('input tensor (2d class indics):\n', class_indices_2d)

one_hot_tensor_2d = F.one_hot(class_indices_2d, num_classes=3)
print('one-hot encoded tensor (2d-input):\n',one_hot_tensor_2d)

input tensor (class indices):
 tensor([0, 1, 2, 1, 0])

One-Hot Encoded Tensor (Inferred Classes):
 tensor([[1, 0, 0],
        [0, 1, 0],
        [0, 0, 1],
        [0, 1, 0],
        [1, 0, 0]])

One-Hot Encoded Tensor (Specified Classes):
 tensor([[1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 0, 1, 0, 0],
        [0, 1, 0, 0, 0],
        [1, 0, 0, 0, 0]])
input tensor (2d class indics):
 tensor([[0, 1],
        [2, 0]])
one-hot encoded tensor (2d-input):
 tensor([[[1, 0, 0],
         [0, 1, 0]],

        [[0, 0, 1],
         [1, 0, 0]]])


In [21]:
#pairwise 
#pairwise distance between two tensors along a specified dimension using Lp norm (euclidean distance for p= 2)
#it is commonly used iin tasks like similarity computation clustering and metric learning.

import torch 
import torch.nn.functional as F 

#define two input tensor with same shape
x1 = torch.tensor([[1.0,2.0,3.0],[4.0,5.0,6.0]])
x2 = torch.tensor([[1.5,2.5,3.5],[4.5,5.5,6.5]])

#parameter for pairwise distance
p = 2.0 #use l2 norm (euclidean distance)
eps = 1e-6 #small value to avoid division by zero
keepdim = False # donot keep the reduced dimension

pairwise_distances = F.pairwise_distance(x1=x1, x2=x2, p=p, eps=eps, keepdim=keepdim)

#print the result
print('input tensor x1:\n',x1)
print('input tensor x2:\n',x2)
print('\npairwise distances:\n',pairwise_distances)

input tensor x1:
 tensor([[1., 2., 3.],
        [4., 5., 6.]])
input tensor x2:
 tensor([[1.5000, 2.5000, 3.5000],
        [4.5000, 5.5000, 6.5000]])

pairwise distances:
 tensor([0.8660, 0.8660])


In [22]:
#cosine_similarity
# function computes the cosine similarity between two tensors along a specified dimension
# cosine similarity measures the cosine of the angle between two vectors and is commonly used in task like texts similarity, recommendation systems and metric learning

import torch
import torch.nn.functional as F 

x1 = torch.tensor([[1.0,2.0,3.0],[4.0,5.0,6.0]])
x2 = torch.tensor([[1.0,2.0,3.0],[1.0,1.0,1.0]])

dim = 1
eps = 1e-8

cosine_similarities = F.cosine_similarity(x1=x1,x2=x2,dim=dim,eps=eps)

print("Input Tensor x1:\n", x1)
print("\nInput Tensor x2:\n", x2)
print("\nCosine Similarities:\n", cosine_similarities)


Input Tensor x1:
 tensor([[1., 2., 3.],
        [4., 5., 6.]])

Input Tensor x2:
 tensor([[1., 2., 3.],
        [1., 1., 1.]])

Cosine Similarities:
 tensor([1.0000, 0.9869])


In [23]:
#pdist
#this function computes the pairwise Lp-Norm distances between every pair of row vectors in the input tensor.
# it is commonly used in tasks like clustering , similarity computation and metric learning

import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([
    [1.0,2.0,3.0],
    [4.0,5.0,6.0],
    [7.0,8.0,9.0],
    [10.0,11.0,12.0]
])

pairwise_distances = F.pdist(input=input_tensor, p=p)
print('input tensor:\n', input_tensor)
print('\npairwise distances (upper triangular portion):\n',pairwise_distances)

input tensor:
 tensor([[ 1.,  2.,  3.],
        [ 4.,  5.,  6.],
        [ 7.,  8.,  9.],
        [10., 11., 12.]])

pairwise distances (upper triangular portion):
 tensor([ 5.1962, 10.3923, 15.5885,  5.1962, 10.3923,  5.1962])


In [25]:
#binary_cross_entropy
# computes binary cross-entropy loss between predicted probabilities and target labels.
# commonly used in binary classification 

import torch
import torch.nn.functional as F 

input_tensor = torch.tensor(
    [0.0,0.6,0.2]
)

target_tensor = torch.tensor(
    [1.0,1.0,0.0]
)

loss = F.binary_cross_entropy(input=input_tensor, target=target_tensor, reduction='mean')

print("Input Tensor (Predicted Probabilities):\n", input_tensor)
print("\nTarget Tensor (Ground Truth Labels):\n", target_tensor)
print("\nBinary Cross-Entropy Loss:\n", loss)

Input Tensor (Predicted Probabilities):
 tensor([0.0000, 0.6000, 0.2000])

Target Tensor (Ground Truth Labels):
 tensor([1., 1., 0.])

Binary Cross-Entropy Loss:
 tensor(33.5780)


In [27]:
#binary_cross_entropy_with_logits
# computes binary cross-entropy between target labels and input logits , applying a sigmoid activation internally.

import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([
    2.0,-1.0,0.5 
])

target_tensor = torch.tensor([
    1.0,0.0,1.0 #batch_size = 3, num_classes = 1, values betweet 0 and 1 # ground truth table for each samples
])

pos_weight = torch.tensor([1.5])

#compute binary cross-enttropy loss with default reduction ='mean'
loss = F.binary_cross_entropy_with_logits(
    input=input_tensor,
    target=target_tensor,
    pos_weight=pos_weight,
    reduction='mean'
)

print("Input Tensor (Logits):\n", input_tensor)
print("\nTarget Tensor (Ground Truth Labels):\n", target_tensor)
print("\nBinary Cross-Entropy Loss with Logits:\n", loss)

Input Tensor (Logits):
 tensor([ 2.0000, -1.0000,  0.5000])

Target Tensor (Ground Truth Labels):
 tensor([1., 0., 1.])

Binary Cross-Entropy Loss with Logits:
 tensor(0.4049)


In [30]:
#poison_nll_loss

#function computes the Poisson negative log-likelihood loss
# commonly used in tasks involving count data or Poisson-distributed targets
# it measures the difference between the predicted expectation and the observer counts 

import torch 
import torch.nn.functional as F 

input_tensor = torch.tensor([
    2.00,5.0,1.0#predicted expectations
])

target_tensor = torch.tensor([
    1.0,4.0,2.0 #observerd counts
])

log_input = True 
full = False 
reduction = 'mean'

#compute poisson negative log-likelihood loss
loss = F.poisson_nll_loss(
    input=input_tensor,
    target=target_tensor,
    log_input=log_input,
    full=full,
    reduction=reduction,
)

print("Input Tensor (Predicted Expectations):\n", input_tensor)
print("\nTarget Tensor (Observed Counts):\n", target_tensor)
print("\nPoisson Negative Log-Likelihood Loss:\n", loss)

Input Tensor (Predicted Expectations):
 tensor([2., 5., 1.])

Target Tensor (Observed Counts):
 tensor([1., 4., 2.])

Poisson Negative Log-Likelihood Loss:
 tensor(44.8402)


In [33]:
#cosine_embedding_loss
#function computes the cosine embedding loss which is used to measure the similarity between two tensors 
# based on their cosine similarity
# commonly used in tasks like metric laerning, where you want to push similar pairs closed together and dissimilar pairs farther apart

import torch 
import torch.nn.functional as F 

input1 = torch.tensor([[1.0,2.0,3.0],[4.0,5.0,6.0]])
input2 = torch.tensor([[1.0,2.0,3.0],[1.0,1.0,1.0]])

#define target tensor whether the pairs are similar or dissimiar
target = torch.tensor([1,-1])

#parameter for cosine embeddig loss
margin = 0.5 #margin for dissimlar pairs
reduction = 'mean' #average the loss acorss samples

#compute cosine embedding loss
loss = F.cosine_embedding_loss(
    input1=input1,
    input2=input2,
    target=target,
    margin=margin,
    reduction=reduction
)
# Print the results
print("Input Tensor 1:\n", input1)
print("\nInput Tensor 2:\n", input2)
print("\nTarget Tensor (1=similar, -1=dissimilar):\n", target)
print("\nCosine Embedding Loss:\n", loss)


Input Tensor 1:
 tensor([[1., 2., 3.],
        [4., 5., 6.]])

Input Tensor 2:
 tensor([[1., 2., 3.],
        [1., 1., 1.]])

Target Tensor (1=similar, -1=dissimilar):
 tensor([ 1, -1])

Cosine Embedding Loss:
 tensor(0.2435)


In [37]:
# cross_entropy
# function computes the cross-entropy loss between unnormalized logits input, and target labels. 
# commonly used in mult-class classification tasks

input_tensor = torch.tensor([
    [2.0,1.0,0.1,-0.1,0.5],
    [-1.0,3.0,2.5,0.0,-0.5],
    [0.2,0.8,0.5,1.0,2.0]
])

target_tensor = torch.tensor([0,1,4])

class_weights = torch.tensor([1.0,1.0,1.0,1.0,1.0])

loss = F.cross_entropy(
    input=input_tensor,
    target=target_tensor,
    weight=class_weights,
    reduction='mean',
    label_smoothing=0.0
)

print("Input Tensor (Logits):\n", input_tensor)
print("\nTarget Tensor (Class Indices):\n", target_tensor)
print("\nCross-Entropy Loss:\n", loss)

Input Tensor (Logits):
 tensor([[ 2.0000,  1.0000,  0.1000, -0.1000,  0.5000],
        [-1.0000,  3.0000,  2.5000,  0.0000, -0.5000],
        [ 0.2000,  0.8000,  0.5000,  1.0000,  2.0000]])

Target Tensor (Class Indices):
 tensor([0, 1, 4])

Cross-Entropy Loss:
 tensor(0.6257)


In [40]:
#cts_loss
#function computes Connectionist Temporal Clasification CTC loss
# which is commonly used in sequencet-to-sequence task like speech recognition and hadwriting recognition
# CTF handles variable-lenth input output alignments and oes not require pre-aligned data

import torch 
import torch.nn.functional as F 

#define log probabilites (log_probs)
#shape (t=5, n=2, c=6), where
# t = input seuqnce length
# N _batch size
# c = number of characaters in the alphabet (including the blank token)
log_probs = torch.randn(5,2,6).log_softmax(dim=2) #log probabilities for each time step and batch 

#define target sequences 
#sahpe : (n=2, s=3), where S is the maximum target sequence length
targets = torch.tensor([[1,2,3],[2,3,4]])

#define input lenghts
# shape : n=2, lengths of input sequences for each sample in the batch
input_lengths = torch.tensor([5,4])

#define input lengths
#shape : n=2, lengths of target sequences for each sample in the batch
target_lengths = torch.tensor([3,2])

#compute CTC loss with default reduction='mean'
loss = F.ctc_loss(
    log_probs=log_probs,
    targets=targets,
    input_lengths=input_lengths,
    target_lengths=target_lengths,
    blank=0, #blank label index
    reduction='mean', #average the loss across samples
    zero_infinity=False #do not zero infinite losses
)

print("Log Probabilities (log_probs):\n", log_probs)
print("\nTargets:\n", targets)
print("\nInput Lengths:\n", input_lengths)
print("\nTarget Lengths:\n", target_lengths)
print("\nCTC Loss:\n", loss)

Log Probabilities (log_probs):
 tensor([[[-1.5283, -3.0880, -1.9405, -1.2894, -1.7417, -1.9434],
         [-0.9785, -2.3023, -1.2762, -1.9838, -3.4500, -2.5811]],

        [[-1.9167, -1.0028, -3.1757, -2.0431, -1.6209, -2.1460],
         [-2.0156, -1.6047, -3.5327, -2.3179, -1.0484, -1.6735]],

        [[-2.3723, -2.3309, -0.9706, -1.7361, -1.5490, -3.1697],
         [-0.8324, -3.2492, -4.7505, -2.7841, -2.9472, -0.9081]],

        [[-1.1662, -1.9477, -2.7091, -2.9340, -0.9358, -3.3879],
         [-1.4907, -1.3069, -3.4306, -1.5904, -3.9991, -1.3880]],

        [[-2.3020, -0.5954, -2.0446, -2.5151, -3.4745, -2.2316],
         [-3.3717, -2.6175, -1.2592, -2.7947, -0.6716, -3.3022]]])

Targets:
 tensor([[1, 2, 3],
        [2, 3, 4]])

Input Lengths:
 tensor([5, 4])

Target Lengths:
 tensor([3, 2])

CTC Loss:
 tensor(2.2098)


In [None]:
#gaussian_nll_loss
#computes teh gaussian negative log-likelihoood (nll) loss which is commmonly used in probabilitisc regression tasks where both the mean input abd variance of a Gaussian distribution are predicted 
# evaluates how well the predicted gaussian distribution matcehs the observer target.

import torch
import torch.nn.functional as F 

#define the input tensor (predicted mean of the gaussiain distribution)
#shape representing the predicted means
input_tensor = torch.tensor([2.0,5.0,1.0]) #predicted means

#define the variance tensor (predicted variance of the gaussian distribution)
#shape: (batch_size=3), representing the predicted variances 
variance_tensor = torch.tensor([0.5,0.3,0.2]) #predicted variances

#parameter for gaussian nll loss
full = False #do not include the constant term in the loss calculation
eps = 1e-6 #small vaue added to variance for numerical stability
reduction = 'mean' #average the loss across samples

#commpute gaussian nll loss
loss = F.gaussian_nll_loss(
    input=input_tensor,
    target=target_tensor,
    var=variance_tensor,
    full=full,
    eps=eps,
    reduction=reduction
)
# Print the results
print("Input Tensor (Predicted Means):\n", input_tensor)
print("\nTarget Tensor (Observed Values):\n", target_tensor)
print("\nVariance Tensor (Predicted Variances):\n", variance_tensor)
print("\nGaussian Negative Log-Likelihood Loss:\n", loss)

#default reduction
    #use the deafult reduction 'mean' to compute the average loss
#sum reduction
    # use 'sum' to compute the total loss
#no reduction
    #  use 'none' to get the loss for each sample individually
#include constant term
    # set full=True to include the constant term in the loss calculation
#honosdastic variance 
    #use a scalar value if itis the same for all samples

Input Tensor (Predicted Means):
 tensor([2., 5., 1.])

Target Tensor (Observed Values):
 tensor([0, 1, 4])

Variance Tensor (Predicted Variances):
 tensor([0.5000, 0.3000, 0.2000])

Gaussian Negative Log-Likelihood Loss:
 tensor(17.1378)


In [1]:
#hinge embedding loss
# used in tasks like metric learning and similarity-based models
# encourage similar pairs of embeddings to have a small distance and dissimilar pairs to have a large distance

import torch 
import torch.nn.functional as F 

#define input tensor (pairwise distancs or similarities)
input_tensor = torch.tensor([0.9,-0.4,1.2,-0.7]) #similarity distance values

#define the target tensor(indicating whether the pairs are similar or dissimilar)
# 1 means similar, -1 means dissimilar
target_tensor = torch.tensor([1,-1,1,-1])

#parameters for hinge embedding loss
margin =  1.0 # margin for dissimilar pairs
reduction = 'mean'  #average the loss across samples

#compute for hinge embedding loss
loss = F.hinge_embedding_loss(
    input=input_tensor,
    target=target_tensor,
    margin=margin,
    reduction=reduction
)

print("Input Tensor (Similarity/Distances):\n", input_tensor)
print("\nTarget Tensor (1=similar, -1=dissimilar):\n", target_tensor)
print("\nHinge Embedding Loss:\n", loss)

Input Tensor (Similarity/Distances):
 tensor([ 0.9000, -0.4000,  1.2000, -0.7000])

Target Tensor (1=similar, -1=dissimilar):
 tensor([ 1, -1,  1, -1])

Hinge Embedding Loss:
 tensor(1.3000)


In [5]:
#kl_div
#computes Kullback-Leibler (KL) divergence loss
# which measures difference between two probability distribution
# commonly used in task like generative modelling, reinforcement learning and probabilitic modelling

import torch 
import torch.nn.functional as F 

#define the input tensor (log-probabilities)
#log probabilities for each class
input_tensor = torch.log_softmax(torch.tensor([[2.0,1.0,3.0],[1.0,4.0,2.0]]),dim=1)

#define the target tensor (target probabilities)
target_tensor = torch.softmax(torch.tensor([[0.1,0.7,0.2],[0.4,0.5,0.1]]),dim=1)

#parameters for KL divergence loss
reduction = 'batchmean' #alligns with mathematical definition
log_target = False #target is not in log space

#compute KL divergence loss
loss = F.kl_div(
    input=input_tensor,
    target=target_tensor,
    reduction=reduction,
    log_target=log_target
)

print("Input Tensor (Log-Probabilities):\n", input_tensor)
print("\nTarget Tensor (Probabilities):\n", target_tensor)
print("\nKL Divergence Loss:\n", loss)


Input Tensor (Log-Probabilities):
 tensor([[-1.4076, -2.4076, -0.4076],
        [-3.1698, -0.1698, -2.1698]])

Target Tensor (Probabilities):
 tensor([[0.2546, 0.4640, 0.2814],
        [0.3514, 0.3883, 0.2603]])

KL Divergence Loss:
 tensor(0.5941)


In [6]:
#l1_loss 
#computes the L1 oss, mean absolute difference between the predicted values (input) and the ground turth values.
# it is commonly used in regression tasks where robustness to outliers is desired

import torch 
import torch.nn.functional as F 

#define the input tensor (predicted values)
#represent the prediction
input_tensor = torch.tensor([[1.0,2.0],[3.0,4.0],[5.0,6.0]]) #predicted values

#define the target tensor (ground truth values)
#representing ground truth
target_tensor = torch.tensor([[1.5,2.5],[3.5,4.5],[5.5,6.5]]) #ground truth values

#parameters for L1 loss
reduction = 'mean' #average the loss across all elements 

#compute L1 loss
loss = F.l1_loss(
    input = input_tensor,
    target=target_tensor,
    reduction=reduction
)

print("Input Tensor (Predictions):\n", input_tensor)
print("\nTarget Tensor (Ground Truth):\n", target_tensor)
print("\nL1 Loss:\n", loss)


Input Tensor (Predictions):
 tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])

Target Tensor (Ground Truth):
 tensor([[1.5000, 2.5000],
        [3.5000, 4.5000],
        [5.5000, 6.5000]])

L1 Loss:
 tensor(0.5000)


In [7]:
#mse loss
# MSE loss which measures the average squared difference between predicted values and ground truth values. it is commonly used in used in regression tasks

import torch 
import torch.nn.functional as F 
#define the input (predicted values)
input_tensor = torch.tensor([[1.0,2.0],[3.0,4.0],[5.0,6.0]])

#define the target tensor (groun turth values)
#shape (batch_size=3, num_features=2), representing ground truth
target_tensor = torch.tensor([[1.5,2.5],[3.5,4.5],[5.5,6.5]])

#optional : define weights for each sample
weights = torch.tensor([1.0,2.0,1.0]) #weights for each sample in the batch

#parameters for mse loss
reduction = 'mean'

#compute mse loss
loss = F.mse_loss(
    input = input_tensor,
    target=target_tensor,
    reduction=reduction
)


# Print the results
print("Input Tensor (Predictions):\n", input_tensor)
print("\nTarget Tensor (Ground Truth):\n", target_tensor)
print("\nMean Squared Error Loss:\n", loss)

Input Tensor (Predictions):
 tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])

Target Tensor (Ground Truth):
 tensor([[1.5000, 2.5000],
        [3.5000, 4.5000],
        [5.5000, 6.5000]])

Mean Squared Error Loss:
 tensor(0.2500)


In [8]:
#margin_ranking_loss
# computes margin ranking loss, which is commonly used in tasks like learning-to-rank or paiwise ranking.
# encourages one input to be ranked higher than another input based on a target value

import torch
import torch.nn.functional as F 

#define the input tensors (scores for two items)
input1 = torch.tensor([0.9,0.6,0.8]) #scores for item 1
input2 = torch.tensor([0.7,0.8,0.5]) #scores for item 2

#define the target tensor (indication the drsired ranking)
# 1 means input1 should rank higher than input2 , -1 means input2 should rank higher than 1input1
target = torch.tensor([1,-1,1]) #ranking preferences

#parameters for maring ranking loss 
margin = 0.5 #margin by which the diffence between scores must exceed
reduction = 'mean' #average the loss across samples

# compute margin ranking loss
loss = F.margin_ranking_loss(
    input1=input1,
    input2=input2,
    target=target,
    margin=margin,
    reduction=reduction
)

# Print the results
print("Input Tensor 1 (Scores for Item 1):\n", input1)
print("\nInput Tensor 2 (Scores for Item 2):\n", input2)
print("\nTarget Tensor (Ranking Preferences):\n", target)
print("\nMargin Ranking Loss:\n", loss)


Input Tensor 1 (Scores for Item 1):
 tensor([0.9000, 0.6000, 0.8000])

Input Tensor 2 (Scores for Item 2):
 tensor([0.7000, 0.8000, 0.5000])

Target Tensor (Ranking Preferences):
 tensor([ 1, -1,  1])

Margin Ranking Loss:
 tensor(0.2667)


In [10]:
#margin_loss
#comput the multilable margin loss
# commonly used in multilabel classification tasks
# it encourages the correct labels to have higher scores than inccorrect labels by at least a margin of 1

import torch 
import torch.nn.functional as F 

#define the input tensor (raw scores for each class)
input_tensor = torch.tensor([
    [0.1,0.2,0.4,-0.1,-0.3], # raw scores for sample 1
    [-0.5,0.8,0.6,0.3,-0.2]  # raw scores for sample 2
])

#define the target tensor (ground truth labels)
#shape : (batch_size=2, num_classes=5), where:
# - positive integers represent the indices of correct labels.
# - negative integer sare used to pad the target tensor.
target_tensor = torch.tensor([
    [0,2,-1,-1,-1], # correct labels for sample 1 are classes 0 and 2
    [1,2,-1,-1,-1] # correct labels for sample 1 are classes 1 and 2
])

# parameters for multilabel margin loss
reduction = 'mean'

# compute multilabel margin loss
loss = F.multilabel_margin_loss(
    input=input_tensor,
    target=target_tensor,
    reduction=reduction
)

# Print the results
print("Input Tensor (Raw Scores):\n", input_tensor)
print("\nTarget Tensor (Correct Labels with Padding):\n", target_tensor)
print("\nMultilabel Margin Loss:\n", loss)

Input Tensor (Raw Scores):
 tensor([[ 0.1000,  0.2000,  0.4000, -0.1000, -0.3000],
        [-0.5000,  0.8000,  0.6000,  0.3000, -0.2000]])

Target Tensor (Correct Labels with Padding):
 tensor([[ 0,  2, -1, -1, -1],
        [ 1,  2, -1, -1, -1]])

Multilabel Margin Loss:
 tensor(0.5500)


In [1]:
#multilabel_soft_margin_loss
# computes the multi-label soft margin loss
# which is used for mult-label classifation tasks it applies a sigmoid actiation to the input logits and then computes the binary cross-entropy loss for each class independently

import torch
import torch.nn.functional as F 

#define the input tensor (logits for each class)
input_tensor = torch.tensor([
    [0.1,0.2,0.4,-0.1,0.3], #logits for sample 1
    [-0.5,0.8,0.6,0.3,-0.2]  #logits for sample 2
])

#define the target tensor (groud turh labes)
# shape : (batch_size=2, num_classes=5), where:
# -1 indidicates the presence of a label
# -0 indiicates teh absence of a label

target_tensor = torch.tensor([
    [1,0,1,0,1], # labels for sample 1
    [0,1,1,0,0] # labels for sample 2
])

# parameters for multilabel soft margin loss
weight = None # optional : class weights (none means equal weight for all classes)
reduction = 'mean' #average the loss across samples 

#compute multilabel soft margin loss
loss = F.multilabel_soft_margin_loss(
    input=input_tensor,
    target=target_tensor,
    weight=weight,
    reduction=reduction
)

# Print the results
print("Input Tensor (Logits):\n", input_tensor)
print("\nTarget Tensor (Ground Truth Labels):\n", target_tensor)
print("\nMultiLabel Soft Margin Loss:\n", loss)

Input Tensor (Logits):
 tensor([[ 0.1000,  0.2000,  0.4000, -0.1000,  0.3000],
        [-0.5000,  0.8000,  0.6000,  0.3000, -0.2000]])

Target Tensor (Ground Truth Labels):
 tensor([[1, 0, 1, 0, 1],
        [0, 1, 1, 0, 0]])

MultiLabel Soft Margin Loss:
 tensor(0.5889)


In [2]:
## multi_margin_loss

# function computes the multi-class margin loss which is commonly used in classificationt tasks. it measures teh difference betweent the predicted scores for the correct class adn the cores for other classes , penalizing predicitons where the maring betweent the correct class the scores for other classes ,
# penalizing predictions where the margin between the correct class and incorrect classes is too small

import torch 
import torch.nn.functional as F 

input = torch.tensor([[0.2,0.5,0.1,0.4,0.8],[0.7,0.3,0.9,0.2,0.4],[0.1,0.6,0.3,0.8,0.2]])

target = torch.tensor([4,2,3])

loss = F.multi_margin_loss(input, target, p=1,margin=1.0,reduction='mean')

print('multi-margin loss:', loss.item())

multi-margin loss: 0.4000000059604645


In [4]:
#nll_loss
# computes the nagative log likelihood loss

import torch 
import torch.nn.functional as F 

input = torch.tensor([[ -0.5,  -1.2,  -2.1,  -0.8,  -1.7],  # Log-probs for sample 1
                      [ -1.3,  -0.4,  -1.1,  -1.8,  -2.0],  # Log-probs for sample 2
                      [ -1.9,  -1.6,  -0.7,  -1.4,  -1.0]]) # Log-probs for sample 3

target = torch.tensor([0, 1, 2])  # Correct class indices

loss = F.nll_loss(input, target,reduction='mean')

print('multi-margin loss:', loss.item())

multi-margin loss: 0.5333333611488342


In [5]:
#huber_loss
# computes the huber loss , with optional weighting
# huber loss is a combination of L1 (absilute error) and L2 (squared error) losses.
# it uses the squared term when the absolute error is small (below threshold delta) and switches to a scaled L1 term when the error is alrge
# this makes it robust to outliers while still being smooth near zero

import torch 
import torch.nn.functional as F 

input = torch.tensor([1.0,2.0,3.0,4.0])
target = torch.tensor([1.5,1.8,3.5,6.0])
loss = F.huber_loss(input,target,reduction='mean',delta=1.0)
print('huber loss:', loss.item())

huber loss: 0.4424999952316284


In [10]:
#smooth_l1_loss 
# is a combination of L1 (absolute error) and L2 (squared error) losses. it uses the squared term when the absolute error is small
# and switches to the L1 term when the error is large. this makes it robust to outliers while still ebing smooth near zero

import torch 
import torch.nn.functional as F 


input = torch.tensor([1.0,2.0,3.0,4.0])
target = torch.tensor([1.5,1.8,3.5,6.0])
loss = F.smooth_l1_loss(input,target,reduction='mean',beta=1.0)
print('smooth l1 loss:', loss.item())

smooth l1 loss: 0.4424999952316284


In [11]:
#soft margin loss
#used for binary classification where target label either 1 or -1
# computes the logistic loss between the predicted values and the ground turth labels

import torch 
import torch.nn.functional as F 


input = torch.tensor([1.0,2.0,3.0,4.0])
target = torch.tensor([1.5,1.8,3.5,6.0])
loss = F.soft_margin_loss(input,target,reduction='mean')
print('soft margin loss:', loss.item())

soft margin loss: 0.05709948018193245


In [14]:
# triplet margin loss is commonly used in tasks like metric learning where the goal is to learn embeddings such that similar items are closed together than dissimilar items (negative pairs)


import torch 
import torch.nn.functional as F 

anchor = torch.tensor([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]])
positive = torch.tensor([[1.1, 2.1, 3.1, 4.1], [5.1, 6.1, 7.1, 8.1]])
negative = torch.tensor([[2.0, 3.0, 4.0, 5.0], [6.0, 7.0, 8.0, 9.0]])

# Compute the Triplet Margin Loss with margin=1.0 and p=2 (L2 norm)
loss = F.triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, reduction='mean')

print("Triplet Margin Loss:", loss.item())

Triplet Margin Loss: 0.0


In [None]:
#triplet_margin_loss
# same to triplet_margin_loss but it allows you to specify a custom distance function
# this flexivility makes it useful when you want to compute distances in ways other than the defaule l2 norm

import torch 
import torch.nn.functional as F 

anchor = torch.tensor([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]])
positive = torch.tensor([[1.1, 2.1, 3.1, 4.1], [5.1, 6.1, 7.1, 8.1]])
negative = torch.tensor([[2.0, 3.0, 4.0, 5.0], [6.0, 7.0, 8.0, 9.0]])

def manhattan_distance(x1,x2):
    return torch.sum(torch.abs(x1- x2), dim=1)


# Compute the Triplet Margin Loss with margin=1.0 and p=2 (L2 norm)
loss = F.triplet_margin_with_distance_loss(anchor, positive, negative, distance_function=manhattan_distance,margin=1.0, reduction='mean')


print("Triplet Margin Loss:", loss.item())

Triplet Margin Loss: 0.0


In [2]:
#triplet_margin_with_distance_loss
#use in tasks like metric learning, where the egoal is to laern embeddings such that similar items are closer together than disimalr items
#allow specification of custom distance function
# which provides flexibility beyonf the default L2 norm

"""d(A,) -d(A,N) + margin <= 0
* A is the anchor embedding
* P is the positive embedding (similar to the anchor)
* N is the negative embeedding (dissimilar to the anchor)
* d(.,.) is the distance function
* margin is a hyperparameter that defines the minium distance between the positive and negative pairs
"""

import torch 
import torch.nn.functional as F 

anchor = torch.tensor([[1.0,2.0,3.0,4.0],[5.0,6.0,7.0,8.0]])
positive = torch.tensor([[1.1,2.1,3.1,4.1],[5.1,6.1,7.1,8.1]])
negative = torch.tensor([[2.0,3.0,4.0,5.0],[6.0,7.0,8.0,9.0]])

def manhattan_distancee(x1,x2):
    return torch.sum(torch.abs(x1 - x2), dim=-1)

loss = F.triplet_margin_with_distance_loss(
    anchor, positive, negative, 
    distance_function = manhattan_distance,
    margin=1.0,
    reduction = 'mean'
)

print('triplet margin with distance loss:', loss.item())

triplet margin with distance loss: 0.0


In [6]:
#pixel_shuffle

"""
* rearranges element in a tensor to increat its spatial resolution by a factor called upscale_factor
* commonly used in tasks like super-resolution where low-resolution images are  upscaled to higher resolutions
(*, Cx r2, H, W)
* is any number in batch dimensions
C nuber of channels
r upscale factor
H is height 
W is width

rerrange elements into a tensor of shape. this increase spatial resolution )height and width by a faactor of r, while reducing the channel dimenson by r2
"""

import torch 
import torch.nn.functional as F 
input = torch.randn(1,9,4,4)
output = F.pixel_shuffle(input, upscale_factor=3)
print('input shape:\n',input.shape)
print('output shape:\n',output.shape)

# height and width increase by a factor of 3, 4x2 = 12
# channels reduced by r2 , 3^2, 9/9 =1

#pixel_unshuffle
# reverse operation of pixel shuffle, rearrange elements in a tensor to decrease spatial resolutioon by downsale factor,
# while incrase the channel dimension
# commonly used in tasks like downsampling or reversing super resolution operations
output = F.pixel_unshuffle(output, downscale_factor=3)
print('unshuffle output shape:\n',output.shape)


input shape:
 torch.Size([1, 9, 4, 4])
output shape:
 torch.Size([1, 1, 12, 12])
unshuffle output shape:
 torch.Size([1, 9, 4, 4])


In [1]:
#pad 
# adds padding to a tensor along specified dimensions.
# padding can be applied symmetrically or asymmetrically
# depending on the values provided in the pad tuple
# the padding size of described starting from the last dimension and moving forward 

# pad , specify padding size for each dimension since tuple has the form for 1d padding , 2d padding , 3d padding

# mode
# constant pads with contast value
# reflect, pads with reflection of the input 
# replicate pads by replicating the edge values of the input
# circular pads with the circular repeition of the input 

import torch 
import torch.nn.functional as F 

t4d = torch.arange(12).reshape(1,1,3,4)

print('original tensor:',t4d)

#pad the last dimensioon (width) by (1,2)
p1d = (1,2) #leff = 1 right =2
out1 = F.pad(t4d, p1d, mode='constant', value=0)
print(out1)
print('shape:',out1.shape)

#pad the last two dinemsion (height, width)
p2d = (1,2,3,4) #pad width=(1,2), height=(3,4)
out2 = F.pad(t4d, p2d, mode='constant', value=-1)
print('\npadded tensor (height and width):')
print(out2)
print('shape',out2.shape)

#pad the last three dinemsion (depth, height, width)
p3d = (1,2,3,4,5,6) #pad width=(1,2), height=(3,4) depth=(5,6)
out3 = F.pad(t4d, p3d, mode='constant', value=99)
print('\npadded tensor (height and width):')
print(out3)
print('shape',out3.shape)



original tensor: tensor([[[[ 0,  1,  2,  3],
          [ 4,  5,  6,  7],
          [ 8,  9, 10, 11]]]])
tensor([[[[ 0,  0,  1,  2,  3,  0,  0],
          [ 0,  4,  5,  6,  7,  0,  0],
          [ 0,  8,  9, 10, 11,  0,  0]]]])
shape: torch.Size([1, 1, 3, 7])

padded tensor (height and width):
tensor([[[[-1, -1, -1, -1, -1, -1, -1],
          [-1, -1, -1, -1, -1, -1, -1],
          [-1, -1, -1, -1, -1, -1, -1],
          [-1,  0,  1,  2,  3, -1, -1],
          [-1,  4,  5,  6,  7, -1, -1],
          [-1,  8,  9, 10, 11, -1, -1],
          [-1, -1, -1, -1, -1, -1, -1],
          [-1, -1, -1, -1, -1, -1, -1],
          [-1, -1, -1, -1, -1, -1, -1],
          [-1, -1, -1, -1, -1, -1, -1]]]])
shape torch.Size([1, 1, 10, 7])

padded tensor (height and width):
tensor([[[[99, 99, 99, 99, 99, 99, 99],
          [99, 99, 99, 99, 99, 99, 99],
          [99, 99, 99, 99, 99, 99, 99],
          [99, 99, 99, 99, 99, 99, 99],
          [99, 99, 99, 99, 99, 99, 99],
          [99, 99, 99, 99, 99, 99, 9

In [4]:
#interpolate

""" 
The interpolate function is used to resize tensors by either specifying the target size (size) or a scaling factor (scale_factor). It supports various interpolation modes for 1D, 2D, and 3D data. The most common use case is resizing spatial dimensions (e.g., height and width) for images or volumetric data.

Key Parameters
input : The input tensor to resize.
size : The target output size for the spatial dimensions (height, width, depth). Can be an integer or a tuple.
scale_factor : A multiplier for the spatial dimensions. Can be a float or a tuple.
mode : Specifies the interpolation algorithm:
'nearest': Nearest neighbor interpolation.
'bilinear': Bilinear interpolation (2D only).
'bicubic': Bicubic interpolation (2D only).
'trilinear': Trilinear interpolation (3D only).
'area': Area-based interpolation (downsampling only).
'nearest-exact': Exact nearest neighbor interpolation.
align_corners : Controls alignment of corner pixels for interpolation modes like 'bilinear' and 'bicubic'. Default is False.
antialias : Applies anti-aliasing when downsampling. Supported modes are 'bilinear' and 'bicubic'.
"""

import torch 
import torch.nn.functional as F 

input = torch.arange(1,17,dtype=torch.float32).reshape(1,1,4,4)

print('original tensor:')
print(input)
print('shape:',input.shape)

#using sampe scacle_factor with 'bilinear' mode
output_bilinear = F.interpolate(input, scale_factor=2, mode='bilinear', align_corners=False)
print('\nunsampled tensor (bilinear):')
print(output_bilinear)
print('shape:',output_bilinear)

#downsample using size  with 'areaa' mode
output_area = F.interpolate(input, size=(2,2), mode='area')
print('\ndownsampled tensor (area):')
print(output_area)
print('shape:', output_area.shape)

#unsample using scale-factor with 'nearest' mode
output_nearest = F.interpolate(input, scale_factor=3, mode='nearest' )
print('\nunsamppled tensor (nearest):')
print(output_nearest)
print('shape:',output_nearest.shape)

#apply anti-aliasing with 'bicubic mode for downsampling
output_antialias = F.interpolate(input, size=(2,2), mode ='bicubic', align_corners=False, antialias=True)
print('\ndownsampled tensor (bicubic with anti-aliasing):')
print(output_antialias)
print('shape:',output_antialias.shape)

original tensor:
tensor([[[[ 1.,  2.,  3.,  4.],
          [ 5.,  6.,  7.,  8.],
          [ 9., 10., 11., 12.],
          [13., 14., 15., 16.]]]])
shape: torch.Size([1, 1, 4, 4])

unsampled tensor (bilinear):
tensor([[[[ 1.0000,  1.2500,  1.7500,  2.2500,  2.7500,  3.2500,  3.7500,
            4.0000],
          [ 2.0000,  2.2500,  2.7500,  3.2500,  3.7500,  4.2500,  4.7500,
            5.0000],
          [ 4.0000,  4.2500,  4.7500,  5.2500,  5.7500,  6.2500,  6.7500,
            7.0000],
          [ 6.0000,  6.2500,  6.7500,  7.2500,  7.7500,  8.2500,  8.7500,
            9.0000],
          [ 8.0000,  8.2500,  8.7500,  9.2500,  9.7500, 10.2500, 10.7500,
           11.0000],
          [10.0000, 10.2500, 10.7500, 11.2500, 11.7500, 12.2500, 12.7500,
           13.0000],
          [12.0000, 12.2500, 12.7500, 13.2500, 13.7500, 14.2500, 14.7500,
           15.0000],
          [13.0000, 13.2500, 13.7500, 14.2500, 14.7500, 15.2500, 15.7500,
           16.0000]]]])
shape: tensor([[[[ 1.0000, 

In [None]:
#upsample
#upsample_nearest
#upsample_billinear

""" 
this function have been replace with interpolate since its deprecated, 

upsample function resizes the input tensor to a specified size or scale factor using various interpolation algorithms 
supports upsamping for 1D,2D , 3D data. 
the behaviour of upsample is identical to interpolate but its deprecaed in favor of interpolate

can refer to code above , also for the upsample nearest with mode='nearest'
""" 


# output_upsample_nearest = F.upsample_nearest(input, scale_factoro=2)


In [9]:
#grids_sample
#affine_grid 

""" 
used to sample values from an input tensor at locations specified by a normlized grid.
comonly used in tasks like spatioal transformations, image warping and spatial transformer networks (STNs)

"""

import torch 
import torch.nn.functional as F 

#example input tensor of shape (batch_size, channels , height, width)
input = torch.arange(1,17, dtype=torch.float32).reshape(1,1,4,4)


print('original tensor:')
print(input)
print('shape:',input.shape)


#create a grid for sampling
# the grid specifies normalized coordinates [-1,1] for x and y.
# here we create a gird that samples the center of the input

grid = torch.tensor([[
    [[-1.0,-1.0],[1.0,-1.0]], #top-left and top-right corners
    [[-1.0,1.0],[1.0,1.0]] #bottom-left and bottom-right corners
]])

output = F.grid_sample(input,grid,mode='bilinear',padding_mode='zeros',align_corners=False)
print('\nsampled output:')
print(output)
print('shape:',output.shape)

#define a grid using affine_grid
theta = torch.tensor([[[1.0,0.2,0],[0.2,1.0,0]]])

#generate a grid usign affine_grid
grid = F.affine_grid(theta, input.size(), align_corners=False)

#apply grid_sample with bilinear interpolation
output = F.grid_sample(input, grid, mode='bilinear',padding_mode='zeros', align_corners=False)

#print the transformed output
print('\ntransformed output:')
print(output)
print('shape:',output.shape)

original tensor:
tensor([[[[ 1.,  2.,  3.,  4.],
          [ 5.,  6.,  7.,  8.],
          [ 9., 10., 11., 12.],
          [13., 14., 15., 16.]]]])
shape: torch.Size([1, 1, 4, 4])

sampled output:
tensor([[[[0.2500, 1.0000],
          [3.2500, 4.0000]]]])
shape: torch.Size([1, 1, 2, 2])

transformed output:
tensor([[[[ 0.4900,  1.5300,  3.1000,  4.9000],
          [ 3.4200,  5.5000,  7.3000,  9.1000],
          [ 7.9000,  9.7000, 11.5000, 11.8800],
          [12.1000, 13.9000, 13.7700,  7.8400]]]])
shape: torch.Size([1, 1, 4, 4])


In [11]:
#parallel.data_parallel

""" 
function allows you to evaluate a model across multiple GPUs in parallel , splitting the input data across the specified devices and collecting the results oont the output device


data_parallel function is functional version of the DataParallel module. it distributes the computation of a model acorss multiple GPUs, making it useful for training or inference with large models or datasets
"""

import torch 
import torch.nn as nn 
import torch.nn.functional as F 

class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc = nn.Linear(10,1)

    def forward(self,x):
        return self.fc(x)

if torch.cuda.is_available():
    device_ids = [0,1]
    output_device = 0 
else:
    raise RuntimeError('CUDA is not available. Please run this code on a machine with GPUs')

model = SimpleModel().cuda(device_ids[0])

#create random input data 
input_data = torch.randn(16,10).cuda(device_ids[0])

#use data_parallel to evaluate the model across multiple GPUs

output = nn.parallel.data_parallel(
    module = model,
    inputs = input_data,
    device_ids = device_ids,
    output_device = output_device
)

print('Output:')
print(output)
print('shape:',output.shape)

RuntimeError: CUDA is not available. Please run this code on a machine with GPUs