In [None]:
# Import Libraries
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds
import numpy as np

ModuleNotFoundError: No module named 'pandas'

In [None]:
# Load the New Dataset
df = pd.read_csv('ecommerce_sample_1700_rows.csv')
df.columns = ['user_id', 'prod_id', 'rating', 'price']
df = df.drop(columns=['price'])

In [None]:
# Ensure Data Types
df['user_id'] = df['user_id'].astype(str)
df['prod_id'] = df['prod_id'].astype(str)
df['rating'] = pd.to_numeric(df['rating'], errors='coerce')
df = df.dropna(subset=['rating'])

In [None]:
# Handle Duplicates
if df.duplicated(subset=['user_id', 'prod_id']).sum() > 0:
    print("Duplicate user-product combinations found. Resolving...")
    df = df.groupby(['user_id', 'prod_id'], as_index=False).agg({'rating': 'mean'})


In [None]:
# Preprocessing
counts = df['user_id'].value_counts()
df_final = df[df['user_id'].isin(counts[counts >= 1].index)]  # Include all users with at least 1 interaction

In [None]:
# Create Interaction Matrix
final_ratings_matrix = df_final.pivot(index='user_id', columns='prod_id', values='rating').fillna(0)

# Check Sparsity
if final_ratings_matrix.size == 0:
    print("The interaction matrix is empty. Please check the dataset or preprocessing steps.")
else:
    sparsity = 1.0 - (final_ratings_matrix.astype(bool).sum().sum() / final_ratings_matrix.size)
    print(f"Sparsity of the matrix: {sparsity:.2%}")

Sparsity of the matrix: 87.38%


In [None]:
print(final_ratings_matrix.shape)
print(final_ratings_matrix.head())

(1681, 8)
prod_id          Apple iPad Air  DeepCool Air Cooler  Google Pixel 6  \
user_id                                                                
Aaron Cross                 0.0                  0.0             0.0   
Aaron Doyle                 3.9                  0.0             0.0   
Aaron Hodges                3.9                  0.0             0.0   
Aaron Jones                 0.0                  0.0             0.0   
Aaron Lopez III             0.0                  0.0             0.0   

prod_id          Oneplus 12r  Samsung Galaxy S22  Wireless Headphone  \
user_id                                                                
Aaron Cross              0.0                 3.5                 0.0   
Aaron Doyle              0.0                 0.0                 0.0   
Aaron Hodges             0.0                 0.0                 0.0   
Aaron Jones              0.0                 0.0                 4.2   
Aaron Lopez III          0.0                 0.0     

In [None]:
print(df['user_id'].value_counts())

user_id
Matthew Davis          3
Charles Gutierrez      2
Brandon Jones          2
Christopher Johnson    2
Andrea Anderson        2
                      ..
William Collins        1
William Cooper         1
William Davis          1
William Dawson         1
Aaron Cross            1
Name: count, Length: 1681, dtype: int64


In [None]:
# Convert to Sparse Matrix
final_ratings_matrix_sparse = csr_matrix(final_ratings_matrix.values)

In [None]:
# Perform SVD with dynamic k
k_value = min(final_ratings_matrix_sparse.shape) - 1
k_value = max(1, k_value)  # Ensure k is at least 1
print(f"Using k={k_value} for SVD")
U, sigma, Vt = svds(final_ratings_matrix_sparse, k=k_value)
sigma = np.diag(sigma)

NameError: name 'final_ratings_matrix_sparse' is not defined

In [None]:
# Reconstruct Predicted Ratings
predicted_ratings = np.dot(np.dot(U, sigma), Vt)
predicted_ratings_df = pd.DataFrame(predicted_ratings, index=final_ratings_matrix.index, columns=final_ratings_matrix.columns)

NameError: name 'np' is not defined

In [None]:
def recommend_items(user_id, top_n=5):
    user_index = list(final_ratings_matrix.index).index(user_id)
    predicted_ratings = predicted_ratings_df.iloc[user_index].sort_values(ascending=False)
    recommended_products = predicted_ratings.head(top_n).index

    # Add image URLs to the recommendations
    recommendations_with_images = [
        {
            "prod_id": product,
            "name": product,
            "imageUrl": product_images.get(product, "https://example.com/images/placeholder.jpg"),
        }
        for product in recommended_products
    ]

    return recommendations_with_images


In [None]:
example_user = df_final['user_id'].iloc[0]  # Replace with a valid user ID
recommendations = recommend_items(example_user, top_n=5)

# Print recommendations
for rec in recommendations:
    print(f"Product ID: {rec['prod_id']}, Name: {rec['name']}, Image: {rec['imageUrl']}")


NameError: name 'df_final' is not defined

In [None]:
# Dictionary to map product IDs/names to image URLs
product_images = {
    "Samsung Galaxy S22": "https://example.com/images/samsung-galaxy-s22.jpg",
    "Apple iPad Air": "https://example.com/images/apple-ipad-air.jpg",
    "Google Pixel 6": "https://example.com/images/google-pixel-6.jpg",
    "Oneplus 12r": "https://example.com/images/oneplus-12r.jpg",
    "iPhone 15 Pro": "https://example.com/images/iphone-15-pro.jpg",
}
