In [1]:
import numpy as np
import pandas as pd

In [2]:
np.random.seed(42)

In [3]:
df = pd.read_csv('ratings.csv')
targets = pd.read_csv('targets.csv')
df[['User', 'Item']] = df['UserId:ItemId'].str.split(':', expand=True)
targets[['User', 'Item']] = targets['UserId:ItemId'].str.split(':', expand=True)

In [4]:
def funkSVD(df, k=10, learning_rate=0.01, regularization=0.1, epochs=10):
    unique_users = df['User'].unique()
    unique_items = df['Item'].unique()

    user_to_index = {user: idx for idx, user in enumerate(unique_users)}
    item_to_index = {item: idx for idx, item in enumerate(unique_items)}
    
    m, n = len(unique_users), len(unique_items)
    P = np.random.rand(m, k) # Changed initialization to small random values
    Q = np.random.rand(n, k) # Changed initialization to small random values

    ratings = df[['User', 'Item', 'Rating']].values

    for epoch in range(epochs):
        np.random.shuffle(ratings)  # Shuffle ratings before each epoch
        mse_accumulated = 0
        count = 0
        
        for user, item, rating in ratings:
            i = user_to_index[user]
            j = item_to_index[item]
            
            error = rating - P[i, :].dot(Q[j, :].T)
            for d in range(k):
                P[i, d] += learning_rate * (error * Q[j, d] - regularization * P[i, d])
                Q[j, d] += learning_rate * (error * P[i, d] - regularization * Q[j, d])

            # Compute error for current rating and accumulate
            predicted_rating = P[i, :].dot(Q[j, :].T)
            mse_accumulated += (rating - predicted_rating)**2
            count += 1

        mse = mse_accumulated / count
        print(f"Epoch {epoch+1}/{epochs} - MSE: {mse:.4f}")

    return P, Q

In [5]:
a, b = funkSVD(df, k=10, learning_rate=0.01, regularization=0.1, epochs=1)

KeyboardInterrupt: 

In [None]:
unique_users = df['User'].unique()
unique_items = df['Item'].unique()
user_to_index = {user: idx for idx, user in enumerate(unique_users)}
item_to_index = {item: idx for idx, item in enumerate(unique_items)}

In [None]:
def predict_rating(user_id, item_id, P, Q, user_to_index, item_to_index):
    try:
        user_idx = user_to_index[user_id]
        item_idx = item_to_index[item_id]
        return P[user_idx, :].dot(Q[item_idx, :].T)
    except KeyError:  # If the user or item is not in the training data
        return None

In [None]:
def get_predictions(targets, P, Q, user_to_index, item_to_index):
    predictions = []

    for _, row in targets.iterrows():
        user = row['User']
        item = row['Item']

        predicted = predict_rating(user, item, P, Q, user_to_index, item_to_index)
        
        if predicted is None:
            predicted = np.nan  # or use a default value if you prefer

        predictions.append(predicted)

    return predictions

# Get predictions for the targets dataframe
targets['Rating'] = np.clip(get_predictions(targets, a, b, user_to_index, item_to_index), 0, 5)

In [None]:
# i want to generate a file resulst.csv with the following format (UserId:ItemId, Rating)

targets[['UserId:ItemId', 'Rating']].to_csv('results.csv', index=False)

In [None]:
print(targets.to_csv(index=False))

UserId:ItemId,User,Item,Rating
33ce7ee122:34cb28c370,33ce7ee122,34cb28c370,3.168461992992332
eab9e065e5:34cb28c370,eab9e065e5,34cb28c370,3.836277474173408
f785763291:34cb28c370,f785763291,34cb28c370,3.07943806469764
5f8185d75d:34cb28c370,5f8185d75d,34cb28c370,2.993547204030639
0eeef87507:1dfcdde662,0eeef87507,1dfcdde662,3.2406500633888675
cc5199f2f7:7724ef1f69,cc5199f2f7,7724ef1f69,2.9127284818007917
82ae2469c6:bc3b9136bc,82ae2469c6,bc3b9136bc,2.503027107584129
afced32639:89138b3fef,afced32639,89138b3fef,3.9334926301178026
cea1eee489:89138b3fef,cea1eee489,89138b3fef,2.8098866235717863
2bb8f34435:ee46c55ce9,2bb8f34435,ee46c55ce9,2.2440221710174972
355c1965c6:47fbb47c3e,355c1965c6,47fbb47c3e,1.6747153578474883
ee96e33972:47fbb47c3e,ee96e33972,47fbb47c3e,1.1638287955231563
ad9b954e7d:47fbb47c3e,ad9b954e7d,47fbb47c3e,1.8597320319824282
08094b7c2e:d9c70d1ac6,08094b7c2e,d9c70d1ac6,2.6954685068558546
6d7faa7b56:d9c70d1ac6,6d7faa7b56,d9c70d1ac6,3.1180149397076944
e89adb845b:972e7ee0b6,e89adb84