In [None]:
import python.main
import python.Model

# python/model.py

They implemented their own PPO.

In [None]:
python.main.PPO

importing model from python.Model

```python
self.model = SimulationNN(self.num_state,self.num_action)
self.muscle_model = MuscleNN(self.env.GetNumTotalMuscleRelatedDofs(),self.num_action,self.num_muscles)
```

# following step by step
```python
parser = argparse.ArgumentParser()
parser.add_argument('-m','--model',help='model path')
parser.add_argument('-d','--meta',help='meta file')

args =parser.parse_args()
if args.meta is None:
    print('Provide meta file')
    exit()

ppo = PPO(args.meta)
nn_dir = '../nn'
if not os.path.exists(nn_dir):
    os.makedirs(nn_dir)
if args.model is not None:
    ppo.LoadModel(args.model)
else:
    ppo.SaveModel()
print('num states: {}, num actions: {}'.format(ppo.env.GetNumState(),ppo.env.GetNumAction()))
for i in range(ppo.max_iteration-5):
    ppo.Train()
    rewards = ppo.Evaluate()
    Plot(rewards,'reward',0,False)

```


# `ppo.train()`

```python
def Train(self):
    self.GenerateTransitions()
    self.OptimizeModel()
```

## in PPO.GenerateTrainsisions()...

```python
a_dist,v = self.model(Tensor(states))
actions = a_dist.sample().cpu().detach().numpy()
# actions = a_dist.loc.cpu().detach().numpy() 
logprobs = a_dist.log_prob(Tensor(actions)).cpu().detach().numpy().reshape(-1)
values = v.cpu().detach().numpy().reshape(-1)
self.env.SetActions(actions)
```
### self.env.SetActions(actions)

```python
if self.use_muscle:
    mt = Tensor(self.env.GetMuscleTorques())
    for i in range(self.num_simulation_per_control//2):
        dt = Tensor(self.env.GetDesiredTorques())
        activations = self.muscle_model(mt,dt).cpu().detach().numpy()
        self.env.SetActivationLevels(activations)

        self.env.Steps(2)
else:
    self.env.StepsAtOnce()
```
### self.env.SetActivationLevels(activations)
### Question:
- why self.num_simulation_per_control//2?
- what is if self.use_muscle: ?


## self.OptimizeModel()

```python
def OptimizeModel(self):
    self.ComputeTDandGAE()
    self.OptimizeSimulationNN()
    if self.use_muscle:
        self.OptimizeMuscleNN()
```
### self.ComputeTDandGAE()

### self.OptimizeSimulationNN()
```python
def OptimizeSimulationNN(self):
    all_transitions = np.array(self.replay_buffer.buffer)
    for j in range(self.num_epochs):
        np.random.shuffle(all_transitions)
        for i in range(len(all_transitions)//self.batch_size):
            transitions = all_transitions[i*self.batch_size:(i+1)*self.batch_size]
            batch = Transition(*zip(*transitions))

            stack_s = np.vstack(batch.s).astype(np.float32)
            stack_a = np.vstack(batch.a).astype(np.float32)
            stack_lp = np.vstack(batch.logprob).astype(np.float32)
            stack_td = np.vstack(batch.TD).astype(np.float32)
            stack_gae = np.vstack(batch.GAE).astype(np.float32)
            
            a_dist,v = self.model(Tensor(stack_s))
            '''Critic Loss'''
            loss_critic = ((v-Tensor(stack_td)).pow(2)).mean() # diff btw TD and value
            
            '''Actor Loss'''
            ratio = torch.exp(a_dist.log_prob(Tensor(stack_a))-Tensor(stack_lp))
            stack_gae = (stack_gae-stack_gae.mean())/(stack_gae.std()+ 1E-5)
            stack_gae = Tensor(stack_gae)
            surrogate1 = ratio * stack_gae
            surrogate2 = torch.clamp(ratio,min =1.0-self.clip_ratio,max=1.0+self.clip_ratio) * stack_gae
            loss_actor = - torch.min(surrogate1,surrogate2).mean()
            '''Entropy Loss'''
            loss_entropy = - self.w_entropy * a_dist.entropy().mean()

            self.loss_actor = loss_actor.cpu().detach().numpy().tolist()
            self.loss_critic = loss_critic.cpu().detach().numpy().tolist()
            
            loss = loss_actor + loss_entropy + loss_critic

            self.optimizer.zero_grad()
            loss.backward(retain_graph=True)
            for param in self.model.parameters():
                if param.grad is not None:
                    param.grad.data.clamp_(-0.5,0.5)
            self.optimizer.step()
        print('Optimizing sim nn : {}/{}'.format(j+1,self.num_epochs),end='\r')
    print('')
```
### self.OptimizeMuscleNN()

```python
def OptimizeMuscleNN(self):
    for j in range(self.num_epochs_muscle):
        minibatches = self.generate_shuffle_indices(self.muscle_buffer['JtA'].shape[0],self.muscle_batch_size)

        for minibatch in minibatches:
            stack_JtA = self.muscle_buffer['JtA'][minibatch].astype(np.float32)
            stack_tau_des =  self.muscle_buffer['TauDes'][minibatch].astype(np.float32)
            stack_L = self.muscle_buffer['L'][minibatch].astype(np.float32)
            stack_L = stack_L.reshape(self.muscle_batch_size,self.num_action,self.num_muscles)
            stack_b = self.muscle_buffer['b'][minibatch].astype(np.float32)

            stack_JtA = Tensor(stack_JtA)
            stack_tau_des = Tensor(stack_tau_des)
            stack_L = Tensor(stack_L)
            stack_b = Tensor(stack_b)

            activation = self.muscle_model(stack_JtA,stack_tau_des)
            tau = torch.einsum('ijk,ik->ij',(stack_L,activation)) + stack_b

            loss_reg = (activation).pow(2).mean()
            loss_target = (((tau-stack_tau_des)/100.0).pow(2)).mean()

            loss = 0.01*loss_reg + loss_target
            # loss = loss_target

            self.optimizer_muscle.zero_grad()
            loss.backward(retain_graph=True)
            for param in self.muscle_model.parameters():
                if param.grad is not None:
                    param.grad.data.clamp_(-0.5,0.5)
            self.optimizer_muscle.step()

        print('Optimizing muscle nn : {}/{}'.format(j+1,self.num_epochs_muscle),end='\r')
    self.loss_muscle = loss.cpu().detach().numpy().tolist()
    print('')
```

# SimulationNN
```python
num_h1 = 256
num_h2 = 256

self.p_fc1 = nn.Linear(num_states,num_h1)
self.p_fc2 = nn.Linear(num_h1,num_h2)
self.p_fc3 = nn.Linear(num_h2,num_actions)
self.log_std = nn.Parameter(torch.zeros(num_actions))

self.v_fc1 = nn.Linear(num_states,num_h1)
self.v_fc2 = nn.Linear(num_h1,num_h2)
self.v_fc3 = nn.Linear(num_h2,1)
```

```python
def forward(self,x):
    p_out = F.relu(self.p_fc1(x))
    p_out = F.relu(self.p_fc2(p_out))
    p_out = self.p_fc3(p_out)

    p_out = MultiVariateNormal(p_out,self.log_std.exp())

    v_out = F.relu(self.v_fc1(x))
    v_out = F.relu(self.v_fc2(v_out))
    v_out = self.v_fc3(v_out)
    return p_out,v_out
```

# MuscleNN
```python
num_h1 = 1024
num_h2 = 512
num_h3 = 512
self.fc = nn.Sequential(
    nn.Linear(num_total_muscle_related_dofs+num_dofs,num_h1),
    nn.LeakyReLU(0.2, inplace=True),
    nn.Linear(num_h1,num_h2),
    nn.LeakyReLU(0.2, inplace=True),
    nn.Linear(num_h2,num_h3),
    nn.LeakyReLU(0.2, inplace=True),
    nn.Linear(num_h3,num_muscles),
    nn.Tanh(),
    nn.ReLU()		
)
```

```python
def forward(self,muscle_tau,tau):
    muscle_tau = muscle_tau/self.std_muscle_tau

    tau = tau/self.std_tau
    out = self.fc.forward(torch.cat([muscle_tau,tau],dim=1))
    return out		
```

: 

: 

: 