In [103]:
#!pip install mlxtend

In [104]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.metrics.pairwise import cosine_similarity

In [105]:
df = pd.read_csv('df_training_1h.csv')

In [106]:
df

Unnamed: 0,user_id,10,20,30,40,50,60,70,80,130,...,2240910,2241190,2241570,2242980,2244840,2244920,2245840,2245890,2246290,2253290
0,1239,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2821,0,0,0,0,0,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
2,2881,0,0,0,0,0,0,1,1,1,...,0,0,0,0,0,0,0,0,0,0
3,3629,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4040,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,232709,1,0,0,0,1,0,1,1,1,...,0,0,0,0,0,0,0,0,0,0
96,240526,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
97,241163,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
98,241405,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


## Def function for recommendation system

In [114]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

def calculate_popularity(data):
    # 计算每个游戏的受欢迎程度
    popularity = data.apply(lambda x: x > 0).sum().sort_values(ascending=False)
    return popularity

def create_recommendation_dataframe(data, n):
    # 确保 user_id 是一个列
    if 'user_id' in data.index.names:
        data.reset_index(inplace=True)

    user_ids = data['user_id']
    game_data = data.drop('user_id', axis=1)
    all_games = game_data.columns.tolist()  # 获取所有游戏的列名
    popularity = calculate_popularity(game_data).index[:n]  # 获取最受欢迎的n个游戏

    cosine_sim = cosine_similarity(game_data)
    recommendations = pd.DataFrame(0, index=user_ids, columns=all_games)

    for idx, user_id in enumerate(user_ids):
        sim_scores = list(enumerate(cosine_sim[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        most_similar_users = [sim_scores[i][0] for i in range(1, 6)]
        
        similar_users_likes = game_data.iloc[most_similar_users].sum(axis=0)
        top_games = similar_users_likes.sort_values(ascending=False).head(n).index
        
        recommendations.loc[user_id, top_games] = 1

        # 检查并填充不足的推荐
        if recommendations.loc[user_id].sum() < n:
            needed = n - recommendations.loc[user_id].sum()
            recommendations.loc[user_id, popularity[:needed]] = 1

    recommendations['user_id'] = user_ids  # 重新添加 user_id 列
    return recommendations


In [115]:
final_recommendations_1 = create_recommendation_dataframe(df, n=1)
final_recommendations_5 = create_recommendation_dataframe(df, n=5)
final_recommendations_12 = create_recommendation_dataframe(df, n=12)

## Function to check 

In [124]:

import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix

class RecommendationMeasureStevenZ:
    @staticmethod
    def random_replace(row):
        # 直接操作 numpy array，假设 row 已经不包含 user_id
        non_zero_indices = np.where(row > 0)[0]  # 获取非零值的索引
        if len(non_zero_indices) > 5:
            replace_indices = np.random.choice(non_zero_indices, size=5, replace=False)
            row[replace_indices] = 0
        return row

    @staticmethod
    def randomize_non_zero_values(dataset, n):
        # 创建一个不包含 user_id 的数据集副本
        game_data = dataset.drop(columns=['user_id'])
        for i, row in game_data.iterrows():
            non_zero_indices = row.index[row != 0]
            if len(non_zero_indices) > n:
                random_indices = np.random.choice(non_zero_indices, size=n, replace=False)
                game_data.loc[i, random_indices] = 0
        # 将修改后的游戏数据再与 user_id 列合并
        dataset.update(game_data)
        return dataset


    @staticmethod
    def generate_random_prediction(df, n):
        """
        Generate a random prediction matrix with specified number of 1 values per row.

        Parameters:
            df (DataFrame): Input DataFrame to match the shape of the generated matrix.
            n (int): Number of 1 values to generate per row.

        Returns:
            np.ndarray: Randomly generated prediction matrix with specified number of 1 per row.
        """
        rows, cols = df.shape
        rand_matrix = np.zeros((rows, cols), dtype=int)

        # Loop through each row
        for i in range(rows):
            indices = np.random.choice(cols, n, replace=False)
            rand_matrix[i, indices] = 1

        rand_pred = pd.DataFrame(rand_matrix, columns=df.columns, index=df.index)

        return rand_pred

    @staticmethod
    def calculate_recommendation_accuracy(df0, df_pred):
        """
        Calculate recommendation accuracy based on two input DataFrames.

        Parameters:
            df0 (DataFrame): Original DataFrame.
            df_pred (DataFrame): DataFrame containing predicted recommendations.

        Returns:
            float: Recommendation accuracy.
        """
        matrix_df0 = df0.values
        matrix_df_pred = df_pred.values
        matrix_sum = matrix_df0 + matrix_df_pred

        result_df = pd.DataFrame(matrix_sum, columns=df0.columns, index=df0.index)
        num_rows_true = result_df.eq(2).any(axis=1).sum()

        recommend_acc = num_rows_true / df0.shape[0]
        return recommend_acc

### for n = 1

In [165]:
n = 1
df0 = RecommendationMeasureStevenZ.randomize_non_zero_values(df, n)

In [166]:
recom_acc1 = RecommendationMeasureStevenZ.calculate_recommendation_accuracy(df0, final_recommendations_1)
print("Prediction accuracy:", "{:.2f}%".format(recom_acc * 100))

Prediction accuracy: 3.00%


## for n = 5

In [163]:
n = 5
df0 = RecommendationMeasureStevenZ.randomize_non_zero_values(df, n)

In [164]:
recom_acc1 = RecommendationMeasureStevenZ.calculate_recommendation_accuracy(df0, final_recommendations_5)
print("Prediction accuracy:", "{:.2f}%".format(recom_acc * 100))

Prediction accuracy: 3.00%


## for n = 12

In [161]:
n = 12
df0 = RecommendationMeasureStevenZ.randomize_non_zero_values(df, n)

In [162]:
recom_acc1 = RecommendationMeasureStevenZ.calculate_recommendation_accuracy(df0, final_recommendations_12)
print("Prediction accuracy:", "{:.2f}%".format(recom_acc * 100))

Prediction accuracy: 3.00%
