<h3>Importing</h3>

Most of the code was adapted using implict documentation https://benfred.github.io/implicit/ and their Github Repository https://github.com/benfred/implicit/tree/main/implicit/datasets

In [4]:
import implicit
from implicit.datasets.lastfm import get_lastfm
from implicit.nearest_neighbours import bm25_weight
from implicit.als import AlternatingLeastSquares

import pandas
import random
import numpy as np
from scipy.sparse import coo_matrix, csr_matrix

import FeatureFaking as ff
from FeatureFaking import FeatureFaker as faker

<h3>Data Processing</h3>
<b>artist_user_plays</b> is a sparse matrix. </br>
A Sparse matrix is a datastructure that contains coordinates of 2D list/array (list inside of lists) to non-zero values.</br>
Example: if, 2D list = [[1 0 0 0 0 0][0 0 2 0 0 1][0 0 0 2 0 0]], then sparse matrix =   (0, 0)	1, (1, 2)	2, (1, 5)	1, (2, 3) 2</br>
Explanation taken from https://www.educative.io/answers/sparse-matrices-in-python.</br>
</br>
<b>artists and users</b></br>
are arrays of string labels for each row and column in the sparse matrix.

In [5]:
#artists, users, artist_user_plays = get_lastfm()

In [6]:
def load_data(path, product_column_name, quantity_column_name):
    file = pandas.read_csv(path + ".csv")
    #print(file[product_column_name], file[bought_column_name])
    fake_users = []
    for i in range(10):
        fake_users.append("User " + str(i))
    
    users = []
    items = []
    sizes = []
    for i in range(200):
        
        user = random.choice(fake_users)
        item = random.choice(file[product_column_name])
        size = random.choice(file[quantity_column_name])
        
        amount = ''
        for c in size:
            if(c != ',' and c != '.'):
                amount += c
        
        users.append(user)
        items.append(item)
        sizes.append(random.randint(1, int(amount)))
    
    df = pandas.DataFrame({'user': users, 'item': items, 'plays': sizes})
    df["user"] = df["user"].astype("category")
    df["item"] = df["item"].astype("category")
    
    data = df
    
    plays = coo_matrix(
        (
            data["plays"].astype(np.float32),
            (data["user"].cat.codes.copy(), data["item"].cat.codes.copy()),
        )
    ).tocsr()
    
    return np.array(data["user"][:]), np.array(data["item"][:]), plays
    

In [7]:
class User:
    
    def __init__(self):
        self.items = {}
    
    def purchase(self, item):
        if item in self.items:
            self.items[item] += 1
        else:
            self.items[item] = 0
    
    def get_lists(self):
        items = []
        sizes = []
        for key in self.items:
            items.append(key)
            sizes.append(self.items[key])
        return items, sizes
            

In [8]:
def read_file(filename):
    try:
        dataframe = pandas.read_excel(filename + '.xlsx')
        return dataframe
    except:
        dataframe = pandas.read_csv(filename + '.csv')
        return dataframe

def load_synth_data(path_training_data):
    
    df = read_file(path_training_data)
    users = {}
    for index, line in df.iterrows():
        c_id = line['user_id']
        product = line['product_id']
        
        if c_id in users:
            users[c_id].purchase(product)
        else:
            user = User()
            user.purchase(product)
            users[c_id] = user
    
    users1 = []
    items1 = []
    sizes1 = []
    
    for c_id in users:
        items, sizes = users[c_id].get_lists()
        items1 += items
        sizes1 += sizes
        for i in range(len(sizes)):
            users1.append(c_id)
    
    df = pandas.DataFrame({'user': users1, 'item': items1, 'plays': sizes1})
    df["user"] = df["user"].astype("category")
    df["item"] = df["item"].astype("category")
    
    data = df
    
    plays = coo_matrix(
        (
            data["plays"].astype(np.float32),
            (data["item"].cat.codes.copy(), data["user"].cat.codes.copy()),
        )
    ).tocsr()
    
    return np.array(data["item"][:]), np.array(data["user"][:]), plays

def test(path_training_data, path_testing_data, num_recommendations):
    
    products, users, products_user_purchased = load_synth_data(path_training_data)
    products_purchased = products_user_purchased.T.tocsr()
    model = AlternatingLeastSquares(factors=64, regularization=0.05, alpha=2.0)
    model.fit(products_purchased)
    
    df = read_file(path_testing_data)
    testing_data = {}
    
    for index, line in df.iterrows():
        user_id = line['user_id']
        product = line['product_id']
        
        if user_id in testing_data:
            testing_data[user_id].append(product)
        else:
            testing_data[user_id] = [product]
    
    correct = 0
    for user_id in testing_data:
        try:
            ids, scores = model.recommend(user_id, products_purchased[user_id], N=num_recommendations, filter_already_liked_items=False)
        except:
            continue
        for recommendation in products[ids]:
            if recommendation in testing_data[user_id]:
                correct += 1
    
    return correct

def test_random(interaction_data, path_testing_data, num_recommendations):
    
    interaction = read_file(interaction_data)
    df = read_file(path_testing_data)
    products = []
    testing_data = {}
    for index, line in interaction.iterrows():
        product = line['product_id']
        products.append(product)
        
    for index, line in df.iterrows():
        
        user_id = line['user_id']
        product = line['product_id']
        
        if user_id in testing_data:
            testing_data[user_id].append(product)
        else:
            testing_data[user_id] = [product]
    correct = 0
    for user in testing_data:
        recommendations = []
        for i in range(num_recommendations):
            rand_product = random.choice(products)
            if rand_product in testing_data[user]:
                correct += 1
    
    return correct

def test_features(interaction_data, path_testing_data, num_recommendations):
    
    interaction = read_file(interaction_data)
    test = read_file(path_testing_data)
    
    m = ff.FeatureFaker(interaction, test, 10)
    m.fit(10)
    print("Fit")
    correct = 0
    
    testing_data = {}
    
    for index, line in test.iterrows():
        user_id = line['user_id']
        product = line['product_id']
        
        if user_id in testing_data:
            testing_data[user_id].append(product)
        else:
            testing_data[user_id] = [product]
    
    for user_id in testing_data:
        
        recommended = m.recommend(user_id, num_recommendations)
        
        for recommendation in recommended:
            if recommendation in testing_data[user_id]:
                correct += 1
                print(correct)
    return correct


In [9]:
#artists, users, artist_user_plays = load_synth_data("interaction")

In [10]:
#artists, users, artist_user_plays = load_data("topyoutube", 'Artist', 'Total Views')

In [11]:
#user_plays = artist_user_plays.T.tocsr()

In [9]:
#model = AlternatingLeastSquares(factors=64, regularization=0.05, alpha=2.0)
#model.fit(user_plays)

In [10]:
# Get recommendations for the a single user
#userid = 2
#ids, scores = model.recommend(userid, user_plays[userid], N=10, filter_already_liked_items=False)

In [11]:
# Use pandas to display the output in a table, pandas isn't a dependency of implicit otherwise
import numpy as np
import pandas as pd
#print(userid)
#pd.DataFrame({"product_id": artists[ids], "score": scores, "already_liked": np.in1d(ids, user_plays[userid].indices)})

In [12]:
# get related items for the beatles (itemid = 25512)
#ids, scores= model.similar_items(5)

# display the results using pandas for nicer formatting
#pd.DataFrame({"artist": artists[ids], "score": scores})

In [13]:
import csv
def implicit_to_file(path_training_data, num_recommendations):
    
    path = "rcommendations_implicit.csv"
    output_file = open(path, 'a',  newline='')
    line = csv.writer(output_file)
    
    products, users, products_user_purchased = load_synth_data(path_training_data)
    products_purchased = products_user_purchased.T.tocsr()
    model = AlternatingLeastSquares(factors=64, regularization=0.05, alpha=2.0)
    model.fit(products_purchased)
    

    user = set(users)
    
    for user_id in user:
        ids, _ = model.recommend(user_id, products_purchased[user_id], N=num_recommendations, filter_already_liked_items=False)
        row = [user_id]
        for id in ids:
            row.append(id)
        line.writerow(row)
    
    output_file.close()
    return output_file

def feature_faker_to_file(path_training_data, num_recommendations):
    interaction = read_file(path_training_data)
    
    m = ff.FeatureFaker(interaction, test, 10)
    m.fit(1)
    m.recommend_all(num_recommendations)

def random_to_file(path_training_data, num_recommendations):
    
    path = "recommendations_random.csv"
    output_file = open(path, 'a',  newline='')
    line_writer = csv.writer(output_file)
    
    interaction = read_file(path_training_data)
    products = []
    users = []
    for index, line in interaction.iterrows():
        product = line['product_id']
        user = line['user_id']
        
        if product not in products:
            products.append(product)
        users.append(user)
    
    users = set(users)
    for user_id in users:
        recommendations = []
        for product in range(num_recommendations):
            recommendations.append(random.choice(products))
        row = [user_id]
        for id in recommendations:
            row.append(id)
        line_writer.writerow(row)
    
    output_file.close()
    return output_file
    
    

In [3]:
t = "interactions_test"
tr = "interactions_train"

In [15]:
print("Correct: ", test(tr, t, 10))

  0%|          | 0/15 [00:00<?, ?it/s]

Correct:  3085


In [16]:
print("Correct: ", test_random(tr, t, 10))

Correct:  3870


In [17]:
#print("Correct: ", test_features(tr, t, 10))

In [14]:
random_to_file(tr, 10)

<_io.TextIOWrapper name='recommendations_random.csv' mode='a' encoding='cp1252'>

implicit_to_file(tr, 10)

In [16]:
feature_faker_to_file(tr, 10)