In [1]:
import math
import pandas as pd

def get_DCG(groundtruth, pred_rank_list, k):
    dcg = 0
    groundtruth = list(map(lambda x: x.lower(), groundtruth))
    pred_rank_list = list(map(lambda x: x.lower(), pred_rank_list))

    for index, pred in enumerate(pred_rank_list):
        if index >= k:
            break
        if pred in groundtruth:
            dcg += (1) / math.log(index + 1 + 1, 2)

    idcg = 0
    num_item = int(min(k, len(groundtruth)))
    for i in range(num_item):
        idcg += (1) / math.log(i + 1 + 1, 2)
    ndcg = dcg / idcg

    return ndcg 


def get_recall(groundtruth, pred_rank_list, k):
    recall = 0
    groundtruth = list(map(lambda x: x.lower(), groundtruth))
    pred_rank_list = list(map(lambda x: x.lower(), pred_rank_list))

    for index, pred in enumerate(pred_rank_list):
        if index >= k:
            break
        if pred in groundtruth:
            recall += 1
    
    num_item = min(k, len(groundtruth))
    recall = float(recall) / num_item

    return recall


def evaluationWithK(gt, pred, k):
    recall = get_recall(gt, pred, k)
    dcg = get_DCG(gt, pred, k)

    print("recall@" + str(k) + ": " + str(recall), end=" ")
    print("dcg@" + str(k) + ": " + str(dcg), end=" ")
    return recall, dcg


def evaluation(gt, pred):
    recall3, dcg3 = evaluationWithK(gt, pred, 3)
    recall5, dcg5 = evaluationWithK(gt, pred, 5)
    recall10, dcg10 = evaluationWithK(gt, pred, 10)
    print()
    return recall3, dcg3, recall5, dcg5, recall10, dcg10

In [2]:
import pandas as pd
df = pd.read_csv('test.csv')
print(df.head())

                                         description  \
0  Recently I could hardly fall asleep. Every tim...   
1  I was always a good tempered person. But since...   
2  I think I am over sensitive because my parents...   
3  I had a huge workload this quarter, and I have...   
4  I hate to go to school, and I hate to do what ...   

                                               label  \
0                             'anxiety','depression'   
1  'impulse control disorders','anger management'...   
2                                     'ADHD','child'   
3  'school issues','anxiety','emotional disturbance'   
4  'school issues','impulse control disorders','a...   

                                                pred  
0  'anxiety','stress','grief','parenting','school...  
1  'anxiety','stress','grief','parenting','school...  
2  'anxiety','stress','grief','parenting','school...  
3  'anxiety','stress','grief','parenting','school...  
4  'anxiety','stress','grief','parenting','school..

In [9]:
r3, d3, r5, d5, r10, d10 = 0, 0, 0, 0, 0, 0

length = len(df.index)
print(length)

for index, row in df.iterrows():
    gt = row['label'].split(',')
    gt = list(map(lambda x: x[1:-1], gt))
    print(gt)
    pred = row['pred'].split(',')
    pred = list(map(lambda x: x[1:-1], pred))
    print(pred)
    recall3, dcg3, recall5, dcg5, recall10, dcg10 = evaluation(gt, pred)
    r3 += recall3
    d3 += dcg3
    r5 += recall5
    d5 += dcg5
    r10 += recall10
    d10 += dcg10
    print()
    
print("recall@3= " + str(r3/length))
print("dcg@3= " + str(d3/length))
print("recall@5= " + str(r5/length))
print("dcg@5= " + str(d5/length))
print("recall@10= " + str(r10/length))
print("dcg@10= " + str(d10/length))
    

19
['anxiety', 'depression']
['anxiety', 'stress', 'grief', 'parenting', 'school issues', 'impulse control disorders', 'emotional disturbance', 'anger management', 'depression', 'ADHD']
recall@3: 0.5 dcg@3: 0.6131471927654585 recall@5: 0.5 dcg@5: 0.6131471927654585 recall@10: 1.0 dcg@10: 0.7977228895450267 

['impulse control disorders', 'anger management', 'stress']
['anxiety', 'stress', 'grief', 'parenting', 'school issues', 'impulse control disorders', 'emotional disturbance', 'anger management', 'depression', 'ADHD']
recall@3: 0.3333333333333333 dcg@3: 0.2960819109658653 recall@5: 0.3333333333333333 dcg@5: 0.2960819109658653 recall@10: 1.0 dcg@10: 0.6112833214150002 

['ADHD', 'child']
['anxiety', 'stress', 'grief', 'parenting', 'school issues', 'impulse control disorders', 'emotional disturbance', 'anger management', 'depression', 'ADHD']
recall@3: 0.0 dcg@3: 0.0 recall@5: 0.0 dcg@5: 0.0 recall@10: 0.5 dcg@10: 0.17723928678404774 

['school issues', 'anxiety', 'emotional disturban