# DynCoCluster: A Parallel Co-Clustering Framework for Recommender Systems

This notebook implements the DynCoCluster framework proposed in the SCI research paper. The model leverages parallel co-clustering for efficient recommendation using the Amazon Product Reviews dataset.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score, recall_score, f1_score, average_precision_score
import time
import os


In [None]:
# Load Amazon Reviews dataset (100k subset)
file_path = 'amazon_100k.csv'
df = pd.read_csv(file_path)
df.head()


In [None]:
# Preprocess the dataset
df.dropna(subset=['reviewerID', 'asin', 'overall'], inplace=True)
df['reviewerID'] = LabelEncoder().fit_transform(df['reviewerID'])
df['asin'] = LabelEncoder().fit_transform(df['asin'])
df = df[['reviewerID', 'asin', 'overall']]
df.head()


In [None]:
user_item_matrix = df.pivot_table(index='reviewerID', columns='asin', values='overall').fillna(0)
user_item_matrix = user_item_matrix.astype(np.float32)
user_item_matrix.shape


In [None]:
from sklearn.cluster import KMeans

def parallel_co_clustering(matrix, n_user_clusters=10, n_item_clusters=10):
    user_clusters = KMeans(n_clusters=n_user_clusters).fit_predict(matrix)
    item_clusters = KMeans(n_clusters=n_item_clusters).fit_predict(matrix.T)
    clustered_matrix = np.zeros_like(matrix)
    for u in range(n_user_clusters):
        for i in range(n_item_clusters):
            mask_u = user_clusters == u
            mask_i = item_clusters == i
            cluster_mean = matrix[np.ix_(mask_u, mask_i)].mean()
            clustered_matrix[np.ix_(mask_u, mask_i)] = cluster_mean
    return clustered_matrix

start_time = time.time()
clustered_matrix = parallel_co_clustering(user_item_matrix.values)
execution_time = time.time() - start_time
execution_time


In [None]:
# Simulate ground truth and predicted matrix
y_true = user_item_matrix.values.flatten()
y_pred = clustered_matrix.flatten()

# Compute scores
precision = precision_score(y_true > 3, y_pred > 3)
recall = recall_score(y_true > 3, y_pred > 3)
f1 = f1_score(y_true > 3, y_pred > 3)
map_score = average_precision_score(y_true > 3, y_pred)

precision, recall, f1, map_score
