# Import

In [1]:
import pandas as pd

import MatrixV3
from MatrixV3 import RecommendationMeasureStevenZ as rm

from surprise import Reader, Dataset, SVD

from surprise.model_selection import cross_validate

from concurrent.futures import ThreadPoolExecutor, as_completed

# Load and transform data

In [2]:
df = pd.read_csv('df_training_1k.csv')

df1 = rm.randomize_non_zero_values(df, 12)

data = df1.melt(id_vars=['user_id'], var_name='app_id', value_name='rating')
data = data[data['rating'] != 0]
data['rating'] = data['rating'].replace(1, 2) # Change 1s to 2s
data['rating'] = data['rating'].replace(-1, 1) # Then change -1s to 1s
data['app_id'] = data['app_id'].astype(int)
data.head()

Unnamed: 0,user_id,app_id,rating
56,118422,10,2
86,204980,10,2
95,232709,10,2
133,345832,10,2
140,359152,10,2


# Surprise Method

In [3]:
reader = Reader(rating_scale=(1, 2))
sup_data = Dataset.load_from_df(data[['user_id', 'app_id', 'rating']], reader)

sup_train = sup_data.build_full_trainset()
algo = SVD(n_factors = 200 , lr_all = 0.005 , reg_all = 0.02 , n_epochs = 30 , init_std_dev = 0.05)
algo.fit(sup_train)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1e5d704cf50>

In [4]:
def prediction_algo(uid = None , iid = None):
    predictions = []
    if uid is None:
        for ui in sup_train.all_users():
            predictions.append(algo.predict(ui, iid, verbose = False))
        return predictions

    if iid is None:
        for ii in sup_train.all_items():
            ii = sup_train.to_raw_iid(ii)
            predictions.append(algo.predict(uid, ii, verbose = False))
        return predictions
    return predictions.append(algo.predict(uid,iid,verbose = False))

def worker(user_id, prediction_algo, n):
    predictions = prediction_algo(uid=user_id)
    top_n_iids = sorted(predictions, key=lambda x: x.est, reverse=True)[:n]
    top_n_iids = [str(pred.iid) for pred in top_n_iids]  # Use str(pred.iid)
    return user_id, top_n_iids

def create_recommendation_matrix(df, prediction_algo, n, num_workers=10):
    user_ids = df['user_id'].tolist()  # Convert to list for ordered iteration
    app_ids = df.columns[1:].tolist()  # First column is 'user_id'

    # Initialize the recommendation dataframe with zeros
    recommendations_df = pd.DataFrame(0, index=user_ids, columns=app_ids, dtype=int)

    # Use a ThreadPoolExecutor to parallelize the recommendation process
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        future_to_user_id = {executor.submit(worker, user_id, prediction_algo, n): user_id for user_id in user_ids}

        for future in as_completed(future_to_user_id):
            user_id, top_n_iids = future.result()
            recommendations_df.loc[user_id, top_n_iids] = 1

    # Reset index to make user_id a column
    recommendations_df.reset_index(inplace=True)
    recommendations_df.rename(columns={'index': 'user_id'}, inplace=True)
    
    return recommendations_df

In [5]:
# Define the list of N values
N_values = [1, 3, 5, 9, 12]

# Initialize an empty list to store the results
results = []

for N in N_values:
    # Call the recommendation function
    recommendation_matrix = create_recommendation_matrix(df, prediction_algo, N, num_workers=10)
    
    # Calculate the recommendation accuracy
    recom_acc = rm.calculate_recommendation_accuracy(df, recommendation_matrix)
    
    # Store N and the accuracy in the results list
    results.append({'N': N, 'Accuracy': recom_acc * 100})
    
    # Save the recommendation matrix to a CSV file
    filename = f"recommendation_matrix_{N}.csv"
    recommendation_matrix.to_csv(filename)

# Convert the results to a DataFrame
results_df = pd.DataFrame(results)

# Display the DataFrame
results_df

Unnamed: 0,N,Accuracy
0,1,8.5
1,3,20.2
2,5,29.7
3,9,43.8
4,12,50.1


In [8]:
results_df.to_csv("model2_accuracy.csv")