In [None]:
import csv

#
# Import Data
#

TRAIN_FILE = ""

def load_data(train_file):
    if train_file is None:
        return 0
    
    with open(train_file, newline='') as csvfile:
        data = list(csv.reader(csvfile))
    
    return data

sentences = load_data(TRAIN_FILE)

In [None]:
import numpy as np
import random as rm
import json

#
# Create Markov Transition Graphs
#

MAP_FILE_1 = ""
MAP_FILE_2 = ""

# create mapping of each song to its successor with a count
# ex: {item1 : {item2: 3}} means the sequence song1, song2 appears 3x in our training set
# a, b, c, d, e, f -> (d, e)-> END a -> b
def create_1_map(sentences):
    song_map = {}
    for line in sentences:
        for i in range(1, len(line)):
            if line[i-1] not in song_map:
                tmp = {}
                tmp[line[i]] = 1
                song_map[line[i-1]] = tmp
            elif line[i] not in song_map[line[i-1]]:
                song_map[line[i-1]][line[i]] = 1
            else:
                song_map[line[i-1]][line[i]] += 1
    
    return song_map

# 2nd order markov mapping that has the form {"songA+songB" {"songC": 1}}
def create_2_map(sentences):
    song_map = {}
    for line in sentences:
        for i in range(2, len(line)):
            key = "{}+{}".format(line[i-2], line[i-1])
            if key not in song_map:
                tmp = {}
                tmp[line[i]] = 1
                song_map[key] = tmp
            elif line[i] not in song_map[key]:
                song_map[key][line[i]] = 1
            else:
                song_map[key][line[i]] += 1
    return song_map

# normalize the map to create probabilities of occurence across training set
def normalize_map(song_map):
    for key in song_map:
        total = 0.0
        for s1 in song_map[key]:
            total += song_map[key][s1]
        for s2 in song_map[key]:
            song_map[key][s2] = song_map[key][s2]/total

    return song_map

def write_map(song_map, name):
    with open(name, 'w') as fp:
        json.dump(song_map, fp)


if __name__ == '__main__':
    map_1 = create_1_map(sentences)
    map_1 = normalize_map(song_map)
    map_2 = create_2_map(sentences)
    map_2 = normalize_map(map_2)
    write_map(map_1, MAP_FILE_1)
    write_map(map_2, MAP_FILE_2)
    print("Done")

In [None]:
import json

#
# Evaluation
#

curr_index = 5
prev2 = curr_index-1
prev1 = curr_index-2
mlen = curr_index+1

def evaluate_2(map_2, map_1, test_data):
    total = 0
    correct = 0.0
    two_correct = 0
    one_correct = 0
    found_two = 0
    # for each test sequence, first use the 2nd-order map to retrieve most likely sequence
    # Sequence: A, B, C, D, E -> then use D, E, _ to find prediction
    # If no sequence found in training set, use 1st-order map to retrieve most likely item using just E
    # 2nd-order: D, E, _
    # 1st-order: E, _
    for line in test_data:
        if len(line) >= mlen:
            key = "{}+{}".format(line[prev1], line[prev2])
            guess = ""
            # 2nd-order
            if key in map_2:
                found_two += 1
                curr_max = -1
                for possible in map_2[key]:
                    if map_2[key][possible] > curr_max:
                        guess = possible
                        curr_max = map_2[key][possible]
                if guess == line[curr_index]:
                    two_correct += 1
            # 1st-order
            elif line[prev2] in map_1:
                curr_max = -1
                new_key = line[prev2]
                for possible in map_1[new_key]:
                    if map_1[new_key][possible] > curr_max:
                        guess = possible
                        curr_max = map_1[new_key][possible]
                if guess == line[curr_index]:
                    one_correct += 1
        total += 1
    
    correct = two_correct + one_correct
    accuracy = correct / total
    print("Total:-- %8f", total)
    print('Accuracy:-- %8f', accuracy)

def evaluate_2_recommender_mrr(map_2, map_1, test_data):
    total = 0
    mrr = 0.0
    prev1 = 43
    prev2 = 44
    curr_index = 45
    for line in test_data:
        if len(line) >= mlen:
            key = "{}+{}".format(line[prev1], line[prev2])
            guess = ""
            if key in map_2:
                sort_songs = sorted(map_2[key].items(), key=lambda x: x[1], reverse=True)
                actual = line[curr_index]
                for index, (song, _) in list(enumerate(sort_songs[0:20])):
                    if song == actual:
                        mrr += (1 / (index + 1))
                total += 1
            elif line[prev2] in map_1:
                new_key = line[prev2]
                sort_songs = sorted(map_1[new_key].items(), key=lambda x: x[1], reverse=True)
                actual = line[curr_index]
                for index, (song, _) in list(enumerate(sort_songs[0:20])):
                    if song == actual:
                        mrr += (1 / (index + 1))
                total += 1
    mrr = mrr / total
    print("Markov 2 Recommender MRR:-- %.8f" % mrr)
    
def evaluate_recall(map_2, map_1, test_data):
    recall = 0.0
    total = 0
    for line in test_data:
        if (len(line) >= mlen):
            key = "{}+{}".format(line[prev1], line[prev2])
            guess = ""
            songs = line[45:65]
            if key in map_2:
                guesses = [x[0] for x in sorted(map_2[key].items(), key=lambda x: x[1], reverse=True)[0:20]]
                intersect = len(list(set(guesses) & set(songs)))
                recall += intersect / 20.0
                total += 1
            elif line[4] in map_1:
                new_key = line[prev2]
                guesses = [x[0] for x in sorted(map_1[new_key].items(), key=lambda x: x[1], reverse=True)[0:20]]
                intersect = len(list(set(guesses) & set(songs)))
                recall += intersect / 20.0
                total += 1
    recall = recall / total
    print("Markov Recall:-- %.8f" % recall)

with open(MAP_FILE_1) as json_file: 
    map_1 = json.load(json_file)

with open(MAP_FILE_2) as json_file_2: 
    map_2 = json.load(json_file_2)

evaluate_2(map_2, map_1, test_data)
evaluate_2_recommender_mrr(map_2, map_1, test_data)
evaluate_recall(map_2, map_2, test_data)
print("Done")