In [26]:
from torch import nn, tensor, max 
import numpy as np 

# Cross entropy example 
# One hot 
# 0 : 1 0 0 
# 1 : 0 1 0 
# 2 : 0 0 1 

Y = np.array([1,0,0])
Y_pred1 = np.array([0.7,0.2,0.1])
Y_pred2 = np.array([0.1,0.3,0.6])
print(f'Loss1 : {np.sum(-Y*np.log(Y_pred1)):.4f}')
print(f'Loss2 : {np.sum(-Y*np.log(Y_pred2)):.4f}')

#### -- (1) single label -- ##### 

###  criterion instantiate
# Softmax + CrossEntropy (logSoftmax + NLLLoss)
loss = nn.CrossEntropyLoss()

###  Y,Y_pred1,Y_pred2 instantiate  
# target is of size nBatch              
# each element in target has to gave 0 <= value < nClasses (0-2)
# Input is class, not one-hot               
Y = tensor([0],requires_grad = False)

# input is of size nBatch x nClasses = 1 x 4 
# Y_pred are logits (not passed softmax)
Y_pred1 = tensor([[2.0,1.0,0.1]])          
Y_pred2 = tensor([[0.5,2.0,0.3]])

###  get loss
l1 = loss(Y_pred1,Y)
l2 = loss(Y_pred2,Y)

### get result 
print(f'PyTorch Loss1 : {l1.item():.4f} \nPyTorch Loss2 : {l2.item():.4f}')
print(f'Y_pred1 : {max(Y_pred1.data, 1)[1].item()}') ###### Y_pred1의 data tesnor의[1]요소 >>> tensor([0]) 반환, pred 값
print(f'Y_pred2 : {max(Y_pred2.data, 1)[1].item()}')


#### -- (2) multiple labels with batch mode -- #####

###  Y, Y_pred1, Y_pred2 instantiate
# target is of size nBatch 
# each element in target has to have 0 <= value < nClasses (0-2)
# Input is class, not one-hot 
Y = tensor([2,0,1], requires_grad = False)

# input is of size nBatch x nClasses = 2 x 4 
# Y_pred are logits (not softmax)
Y_pred1 = tensor([[0.1,0.2,0.9],
                [1.1, 0.1,0.2],
                [0.2,2.1,0.1]])

Y_pred2 = tensor([[0.8,0.2,0.3],
                 [0.2,0.3,0.5],
                 [0.2,0.2,0.5]])
### getting loss 
l1 = loss(Y_pred1, Y)
l2 = loss(Y_pred2, Y)

### gettinh result 
print(f'Batch Loss1 : {l1.item():.4f} \nBatch Loss2 : {l2.item():.4f}')


Loss1 : 0.3567
Loss2 : 2.3026
PyTorch Loss1 : 0.4170 
PyTorch Loss2 : 1.8406
Y_pred1 : 0
Y_pred2 : 1
Batch Loss1 : 0.4966 
Batch Loss2 : 1.2389


## ISSUE 

1) target is of size nBatch (line 23) << think) it means Y's size is same of nBatch ? Y_pred1 and 2 ?  <br>
2) Input is class, not one-hot (line 26, 49) << how is it possible as a tensor functionaly? <br>
3) each element in target has to have 0 <= value < nClasses (0-2) << what's mean <br>

- This criterion expects a a class index in the range [0,C-1] as the target for each value of a 1D tensor of 
size minibatch <br>
- target(N) where each value is 0 <= targets[i] <= C-1 <br>

4) Y_pred1 = tensor([[blah blah]]) (line 53) << Y_pred is wrapping twice ,, so that Y can operate as classes ??  <br><br> 
Inupt (Y_pred)
- is expected to contain raw, uncormalized scores for each class.<br>
- a tensor of size either (minibatch, C) <br> 
- input(N,C) where C = number of classes , or (N,C,d1,d2,...,dk) with k>=1. N = minibatch <br>

5) torch.max(Y_pred1.data, 1)[1].item() (lien 39) << what's mean <br>
- torch.max 함수는 주어진 텐서 배열의 최대 값이 들어있는 index를 리턴하는 함수 
- Y_pred = [ [0.3,0.2,0.9,0.1] ] 의 경우 torch.max(Y_pred.data , 1 ) 의 결과는 0.9의 인덱스인 2가 된다. 
-  뒤의 1 은 dimension에 대한 것이다. e,g) 64 * 10개의 Y_pred 값을 한번에 넣어주고 64개의 예측값을 받아야 하는 경우
- ▲ still issue ) 헌데 아직, torch.max(output.data,1) 뒤의 [1].item() 는 완벽히 이해하지 못함
```python

    print("{} : ".format('max(Y_pred1.data,1)'), max(Y_pred1.data,1)) 
    # >>> torch.return_types.max(values=tensor([2.]),indices=tensor([0]))
    print("{} : ".format('max(Y_pred1.data,1)[1]'), max(Y_pred1.data,1)[1]) 
    # >>>  tensor([0])  #[1] idx가 y_pred의 예측 레이블 텐서인 것으로 추정된다. 
    print("{} : ".format('max(Y_pred1.data,1)[1].item()'), max(Y_pred1.data,1)[1].item()) 
    # >>>  0  #[1].item() 이므로 예측 레이블 텐서의 스칼라 값을 반환한다. 즉, 예측 레이블을 반환한다. 
```

6) Input is of size nBatch x nClasses = 2 x 4 << what's mean <br>
- ▲ still issue ) 왜 사이즈가 2 x 4 이지