# First things first
* Click **File -> Save a copy in Drive** and click **Open in new tab** in the pop-up window to save your progress in Google Drive.
* Click **Runtime -> Change runtime type** and select **GPU** in Hardware accelerator box to enable faster GPU training.


# If you're using Colab notebook.


In [1]:
try:
    import google.colab
    from google.colab.output import clear as clear_output
    IN_COLAB = True
except:
    IN_COLAB = False
    from IPython.display import clear_output as clear_output



In [2]:
# import required package
from sklearn.metrics import ndcg_score, dcg_score
import numpy as np

In [3]:
# Draw random data.
n = 20
draw_score = lambda up: np.random.randint(0, up, size=(1, n))
draw_score = lambda up: np.random.choice(up, size=(1, n))


In [4]:
# Releveance scores in output order 
relevance_score = draw_score(100)

# Releveance scores in Ideal order 
ideal_relevance = -np.sort(-relevance_score)

print(relevance_score, ideal_relevance, sep="\n")

[[74  7 81 45 28 57 73 94 18 53 97 45 33 75  4 77 92  7  6 68]]
[[97 94 92 81 77 75 74 73 68 57 53 45 45 33 28 18  7  7  6  4]]


In [5]:
# Cumulative Gain
cg = np.sum(relevance_score)
cg


1034

In [18]:
def dcg_function(score):
    """
      score: np array of relevance score.
      returns: Discounted Cumulative Gain, float.
    """
    # Generated the np array of log2([2, 3, ..... n+1])
    x = np.arange(start=2, stop=score.shape[-1] + 2, step=1)
    x = x.reshape((1, x.shape[0]))
    # print(x.shape, score.shape)

    # Divided the score by log value, sum then return.
    score = np.sum(score / np.log2(x))
    return score

In [19]:
# Discounted Cumulative gain
dcg = dcg_function(relevance_score)

# Ideal Discounted Cumulative gain
idcg = dcg_function(ideal_relevance)

dcg, idcg


(371.65966556966066, 442.5774873626731)

In [20]:
ndcg = dcg / idcg
ndcg


0.8397617958030063

# Compare with package functions value.




In [21]:
# Because the "dcg_score" function in sklearn takes the ranking position as input, so we cannot use this:
# dcg = dcg_score(ideal_relevance, relevance_score)
# Instead....

order_of_relevance_score = -np.argsort(relevance_score)[:, ::-1] + n

order_of_relevance_score, relevance_score, ideal_relevance

(array([[10, 13,  4, 18,  5,  7, 20, 14,  1, 15, 11,  9, 17,  8, 16, 12,
          3, 19,  2,  6]], dtype=int64),
 array([[74,  7, 81, 45, 28, 57, 73, 94, 18, 53, 97, 45, 33, 75,  4, 77,
         92,  7,  6, 68]]),
 array([[97, 94, 92, 81, 77, 75, 74, 73, 68, 57, 53, 45, 45, 33, 28, 18,
          7,  7,  6,  4]]))

In [24]:
# DCG score 

dcg = dcg_score(ideal_relevance, order_of_relevance_score) 
print("DCG score : ", dcg) 
   
# IDCG score 
idcg = dcg_score(ideal_relevance, ideal_relevance) 
print("IDCG score : ", idcg) 
   
# Normalized DCG score 
ndcg = dcg / idcg
print("nDCG score : ", ndcg) 
   
# or we can use the scikit-learn ndcg_score package 
print("nDCG score (from package function) : ",
      ndcg_score(ideal_relevance, relevance_score)) 

DCG score :  371.65966556966066
IDCG score :  442.577487362673
nDCG score :  0.8397617958030064
nDCG score (from package function) :  0.8317136907974455


### Answers for the dcg function.

In [23]:
def dcg_function(score):
    """
      score: np array of relevance score.
      returns: Discounted Cumulative Gain, float.
    """
    # Generated the np array of log2([2, 3, ..... n+1])
    x = np.asarray(range(score.shape[-1])) + 2

    # Divided the score by log value, sum then return.
    return np.sum(score / np.log2(x))