# This is a glossary of PyTorch tools, some that we cover and some that we don't.
### We wanted to provide you with a repository of useful things that PyTorch gives you in case it is helpful
### Remember, you can find all of this in the documentary or on Google as well! And as always, make sure you read a little bit before going plugging and chugging blindly. This is not an exhaustive list, so make sure you do your homework before breaing something!

In [None]:
import torch

## **Model Components**


In [None]:
# Applies a linear transformation to the input data
linear_layer = torch.nn.Linear(input_size, output_size)

# Convolutional layer
conv_layer = torch.nn.Conv2d(in_channels=3, out_channels=9, kernel_size=3, stride=1, padding=1)

# Max pooling layer
max_pool = torch.nn.MaxPool2d(kernel_size=2, stride=2)

# Average pooling layer
avg_pool = torch.nn.AvgPool2d(kernel_size=2, stride=2)

# Dropout layer (regularization technique to prevent overfitting)
dropout_layer = torch.nn.Dropout(p=0.5)

# Dropout layer for 2D inputs (like images)
dropout2d_layer = torch.nn.Dropout2d(p=0.5)

# Batch normalization layer (improves speed, performance, and stability of neural networks)
batch_norm = torch.nn.BatchNorm1d(num_features=10)

# Layer normalization layer
layer_norm = torch.nn.LayerNorm(normalized_shape=10)

# LSTM layer (Long Short Term Memory - type of recurrent layer)
lstm_layer = torch.nn.LSTM(input_size=10, hidden_size=20, num_layers=2)

# GRU layer (Gated Recurrent Unit - type of recurrent layer)
gru_layer = torch.nn.GRU(input_size=10, hidden_size=20, num_layers=2)





## **Loss Functions**


In [None]:
# Mean Squared Error Loss: Used in regression tasks
mse_loss = torch.nn.MSELoss()

# Cross Entropy Loss: Used in classification tasks
ce_loss = torch.nn.CrossEntropyLoss()

# Binary Cross Entropy Loss: Used in binary classification tasks where model outputs a probability 
binary_cross_entropy_loss = torch.nn.BCELoss()

# Negative Log Likelihood Loss: Often used in conjunction with a Softmax Layer in classification tasks
nll_loss = torch.nn.NLLLoss()

# Hinge Embedding Loss: Useful for measuring whether two inputs are similar or dissimilar
hinge_embedding_loss = torch.nn.HingeEmbeddingLoss()

# Smooth L1 Loss: A combination of L1 and L2 loss, less sensitive to outliers than the MSELoss
smooth_l1_loss = torch.nn.SmoothL1Loss()

# Soft Margin Loss: A variant of SVM loss for binary classification
soft_margin_loss = torch.nn.SoftMarginLoss()

# Multi Label Margin Loss: Creates a criterion that optimizes a multi-class multi-classification hinge loss 
multi_label_margin_loss = torch.nn.MultiLabelMarginLoss()

# Cosine Embedding Loss: Measures the cosine distance between two vectors
cosine_embedding_loss = torch.nn.CosineEmbeddingLoss()

# Multi Margin Loss: Creates a criterion that optimizes a multi-class classification hinge loss
multi_margin_loss = torch.nn.MultiMarginLoss()

# Triplet Margin Loss: Measures the relative similarity between samples. Useful for learning embeddings
triplet_margin_loss = torch.nn.TripletMarginLoss()


## **Optimizers**

In [None]:
# Stochastic Gradient Descent: Simple optimizer that can escape shallow local minima
sgd_optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Adam Optimizer: Efficient and popular choice for deep learning
adam_optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Define a model parameter
params = torch.tensor([1.0], requires_grad=True)

# SGD with Momentum: Variation of SGD, momentum helps accelerate gradients vectors in the right directions, thus leading to faster converging.
optimizer = torch.optim.SGD(params, lr=0.01, momentum=0.9)

# Adagrad: Adapts the learning rate to the parameters, performs larger updates for infrequent parameters and smaller updates for frequent parameters. Good choice for sparse data.
optimizer = torch.optim.Adagrad(params, lr=0.01)

# RMSprop: Divide the learning rate for a weight by a running average of the magnitudes of recent gradients for that weight. Good choice for recurrent neural networks.
optimizer = torch.optim.RMSprop(params, lr=0.01)

# Adam: Computes adaptive learning rates for different parameters. Combination of RMSprop and Momentum.
optimizer = torch.optim.Adam(params, lr=0.01)

# Adamax: It is a variant of Adam based on the infinity norm.
optimizer = torch.optim.Adamax(params, lr=0.01)

# Adadelta: It is a more robust extension of Adagrad that adapts learning rates based on a moving window of gradient updates, instead of accumulating all past gradients.
optimizer = torch.optim.Adadelta(params, lr=0.01)

# ASGD: Averaged Stochastic Gradient Descent, it averages the parameter vector over time, leading to better generalization.
optimizer = torch.optim.ASGD(params, lr=0.01)

# LBFGS: Limited-memory BFGS, it approximates the Broyden–Fletcher–Goldfarb–Shanno (BFGS) algorithm using a limited amount of computer memory. Good for small datasets.
optimizer = torch.optim.LBFGS(params, lr=0.01)



## **Activation Functions**


In [None]:
# Sigmoid Activation Function: Transforms tensor values between 0 and 1
sigmoid = torch.nn.Sigmoid()

# ReLU Activation Function: Replaces all negative values in the tensor with zeros
relu = torch.nn.ReLU()

# Tanh activation function
tanh_activation = torch.nn.Tanh()

# Softmax activation function
softmax_activation = torch.nn.Softmax(dim=1)  # Dim indicates the dimension along which softmax will be computed

# Softplus activation function
softplus_activation = torch.nn.Softplus()

# Leaky ReLU activation function
leaky_relu_activation = torch.nn.LeakyReLU()

# ELU activation function
elu_activation = torch.nn.ELU()
