# Understanding PyTorch

We use this notebook to study about the structure of PyTorch

In [1]:
import sys
import gym
import time
import numpy as np  

import torch  
import torch.nn            as nn
import torch.nn.functional as F
import torch.optim         as optim
from   torch.autograd import Variable

import matplotlib.pyplot   as plt


  from .autonotebook import tqdm as notebook_tqdm


In [15]:

layer1 = nn.Linear( 3, 5 )
layer2 = nn.Linear( 5, 4 )

input_arr = np.arange( 3, dtype = np.float32 )

input = torch.from_numpy( input_arr ).float( ).unsqueeze( 0 )

x12 = F.relu( layer1( input ) )                 
x23 = F.softmax( layer2( x12 ) )





# Example Network
We use a three layer network

In [26]:

class ExampleNetwork( nn.Module ):
    def __init__( self, num_inputs, num_outputs, hidden_size, learning_rate = 3e-4 ):
        super( ExampleNetwork , self).__init__()

        self.num_actions = num_inputs
        self.linear1     = nn.Linear( num_inputs, hidden_size  )
        self.linear2     = nn.Linear( hidden_size, num_outputs )

        # Using the ADAM optimizer. 
        self.optimizer   = optim.Adam( self.parameters( ), lr = learning_rate )

    def forward( self, state ):
        x = F.relu( self.linear1( state ) )
        x = F.softmax( self.linear2( x ), dim = 1 )
        return x 
    
    def get_action( self, state ):
        # Generates a 1 x ns tensor.
        state = torch.from_numpy( state ).float( ).unsqueeze( 0 ) 

        # Forward the neural network and return the prbability distribution function
        probs = self.forward( state  )

        # Choosing the action based on the output policy
        highest_prob_action = np.random.choice( self.num_actions, p = np.squeeze( probs.detach( ).numpy( ) ) )

        # The log-value of the probability just for the sake of policy gradient
        log_prob = torch.log( probs.squeeze( 0 )[ highest_prob_action ] )
        
        return highest_prob_action, log_prob


In [46]:

tmp_nn = ExampleNetwork( 3, 1, 4 )

input = torch.tensor( [1.,3.,2. ] )
out1 = tmp_nn( input.unsqueeze( 0 ) ).squeeze( )

input = torch.tensor( [5.,4.,3. ] )
out2 = tmp_nn( input.unsqueeze( 0 ) ).squeeze( )

out_list = [ out1, out2 ]

tmp = torch.stack( out_list, dim = 0 )
torch.dot( tmp, torch.tensor( [ 1., 3. ]))



tensor(4., grad_fn=<DotBackward0>)