## Imporing all libraries

In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np

# ------------------------
# 1. Mock User-Item Matrix
# ------------------------
R_df = pd.DataFrame([
    [5, 3, np.nan, 1],
    [4, np.nan, np.nan, 1],
    [1, 1, np.nan, 5],
    [1, np.nan, np.nan, 4],
    [np.nan, 1, 5, 4]
], columns=["Item1", "Item2", "Item3", "Item4"], index=["User1", "User2", "User3", "User4", "User5"])

print("Original Ratings Matrix (R):\n", R_df)

# Convert to numpy and mask missing values
R = torch.tensor(R_df.fillna(0).values, dtype=torch.float32)
mask = torch.tensor(~R_df.isna().values, dtype=torch.bool)

num_users, num_items = R.shape
k = 2 # number of latent features

# ------------------------
# 2. Define Model
# ------------------------
class MatrixFactorization(nn.Module):
    def __init__(self, num_users, num_items, k):
        super(MatrixFactorization, self).__init__()
        self.user_factors = nn.Embedding(num_users, k)
        self.item_factors = nn.Embedding(num_items, k)
        nn.init.normal_(self.user_factors.weight, std=0.1)
        nn.init.normal_(self.item_factors.weight, std=0.1)

    def forward(self):
        return torch.matmul(self.user_factors.weight, self.item_factors.weight.t())

# ------------------------
# 3. Train the Model
# ------------------------
model = MatrixFactorization(num_users, num_items, k)
optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = nn.MSELoss()

num_epochs = 5000
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    
    prediction = model()
    loss = criterion(prediction[mask], R[mask])  # Only compute loss on known ratings
    loss.backward()
    optimizer.step()

    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

# ------------------------
# 4. Get Predicted Ratings
# ------------------------
model.eval()
R_hat = model().detach().numpy()
R_hat_df = pd.DataFrame(R_hat, columns=R_df.columns, index=R_df.index)

print("\nðŸ”® Predicted Ratings Matrix (R_hat):\n", R_hat_df.round(2))


Original Ratings Matrix (R):
        Item1  Item2  Item3  Item4
User1    5.0    3.0    NaN      1
User2    4.0    NaN    NaN      1
User3    1.0    1.0    NaN      5
User4    1.0    NaN    NaN      4
User5    NaN    1.0    5.0      4
Epoch 0, Loss: 10.6036
Epoch 1000, Loss: 0.0000
Epoch 2000, Loss: 0.0000
Epoch 3000, Loss: 0.0000
Epoch 4000, Loss: 0.0000

ðŸ”® Predicted Ratings Matrix (R_hat):
        Item1  Item2  Item3  Item4
User1   5.00   3.00   0.28    1.0
User2   4.00   2.42   0.48    1.0
User3   1.00   1.00   6.34    5.0
User4   1.00   0.92   5.03    4.0
User5   1.14   1.00   5.00    4.0


In [22]:
import numpy as np
import pandas as pd

# Mock user-item rating matrix (NaN = missing)
R_df = pd.DataFrame([
    [5, 3, np.nan, 1],
    [4, np.nan, np.nan, 1],
    [1, 1, np.nan, 5],
    [1, np.nan, np.nan, 4],
    [np.nan, 1, 5, 4]
], columns=["Item1", "Item2", "Item3", "Item4"], index=["User1", "User2", "User3", "User4", "User5"])

print("Original Ratings Matrix (R):\n", R_df)

# Convert to NumPy array, mask missing values
R = R_df.values
mask = ~np.isnan(R)

# Replace NaNs with zeros just for SGD (we'll use the mask to ignore them during training)
R = np.nan_to_num(R)

# Parameters
num_users, num_items = R.shape
k = 2  # number of latent features
alpha = 0.01  # learning rate
lambda_reg = 0.02  # regularization
epochs = 5000

# Initialize user (U) and item (V) latent matrices randomly
U = np.random.normal(scale=0.1, size=(num_users, k))
V = np.random.normal(scale=0.1, size=(num_items, k))

# SGD
for epoch in range(epochs):
    for i in range(num_users):
        for j in range(num_items):
            if mask[i, j]:  # only update known ratings
                eij = R[i, j] - np.dot(U[i, :], V[j, :])
                # Update rules
                U[i, :] += alpha * (eij * V[j, :] - lambda_reg * U[i, :])
                V[j, :] += alpha * (eij * U[i, :] - lambda_reg * V[j, :])
    
    if epoch % 1000 == 0:
        loss = np.sum((mask * (R - U @ V.T) ** 2)) + lambda_reg * (np.sum(U**2) + np.sum(V**2))
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Final prediction
R_pred = U @ V.T
R_pred_df = pd.DataFrame(R_pred, columns=R_df.columns, index=R_df.index)

print("\nðŸ”® Predicted Ratings Matrix (R_hat):\n", R_pred_df.round(2))


Original Ratings Matrix (R):
        Item1  Item2  Item3  Item4
User1    5.0    3.0    NaN      1
User2    4.0    NaN    NaN      1
User3    1.0    1.0    NaN      5
User4    1.0    NaN    NaN      4
User5    NaN    1.0    5.0      4
Epoch 0, Loss: 137.8368
Epoch 1000, Loss: 0.8258
Epoch 2000, Loss: 0.8131
Epoch 3000, Loss: 0.8046
Epoch 4000, Loss: 0.7991

ðŸ”® Predicted Ratings Matrix (R_hat):
        Item1  Item2  Item3  Item4
User1   4.95   2.97   3.12   1.00
User2   3.96   2.39   2.72   1.00
User3   1.00   0.99   5.99   4.94
User4   1.00   0.90   4.88   3.97
User5   1.17   1.01   4.97   3.98


In [None]:
import pandas as pd
import json
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load user interaction data and item metadata

# Sample user interaction data (in reality, would come from CSV)
user_df = pd.DataFrame([
    {
        "user_id": "U001",
        "wishlist_items": '["I001"]',
        "cart_items": '',
        "browsed_items_history": '["I004", "I005"]',
        "ordered_history": ''
    }
])
# Sample item metadata
item_df = pd.DataFrame([
    {"item_id": "I001", "name": "Red Running Shoes", "category": "Shoes", "brand": "Nike", "description": "Comfortable red running shoes for men"},
    {"item_id": "I002", "name": "Blue Sneakers", "category": "Shoes", "brand": "Adidas", "description": "Casual blue sneakers for daily use"},
    {"item_id": "I003", "name": "Black Leather Jacket", "category": "Clothing", "brand": "Zara", "description": "Stylish black leather jacket"},
    {"item_id": "I004", "name": "Running Shorts", "category": "Clothing", "brand": "Puma", "description": "Lightweight shorts for runners"},
    {"item_id": "I005", "name": "Sports Socks", "category": "Accessories", "brand": "Nike", "description": "High-quality cotton socks for sports"},
    {"item_id": "I006", "name": "Fitness Tracker", "category": "Electronics", "brand": "Fitbit", "description": "Track your workouts and heart rate"},
    {"item_id": "I007", "name": "White Tennis Shoes", "category": "Shoes", "brand": "Nike", "description": "Classic white tennis shoes"},
    {"item_id": "I008", "name": "Green Hoodie", "category": "Clothing", "brand": "H&M", "description": "Comfortable green hoodie for cold weather"},
])

# Prepare item features (text description)
item_df = item_df.dropna(subset=["description"])
item_df["text"] = (
    item_df["category"].astype(str) + " " +
    item_df["brand"].astype(str) + " " +
    item_df["description"].astype(str)
)

# TF-IDF vectorization of item text
vectorizer = TfidfVectorizer(stop_words='english')
item_tfidf = vectorizer.fit_transform(item_df["text"])
item_id_to_index = dict(zip(item_df["item_id"], item_df.index))

# Define weights for user interactions
interaction_weights = {
    "ordered_history": 3.0,
    "wishlist_items": 2.0,
    "cart_items": 1.5,
    "browsed_items_history": 1.0
}

# Recommendation function using weighted content-based filtering
def recommend_items_for_user(user_id, top_n=10):
    try:
        user_row = user_df[user_df['user_id'] == user_id].iloc[0]
    except IndexError:
        print(f"User {user_id} not found.")
        return []

    user_vector = None
    total_weight = 0.0
    interacted_items = set()

    for interaction, weight in interaction_weights.items():
        try:
            items = json.loads(user_row[interaction])
            indices = [item_id_to_index[i] for i in items if i in item_id_to_index]
            interacted_items.update(items)

            if indices:
                vectors = item_tfidf[indices]
                weighted_vector = vectors.mean(axis=0) * weight
                user_vector = weighted_vector if user_vector is None else user_vector + weighted_vector
                total_weight += weight
        except:
            continue

    if user_vector is None or total_weight == 0:
        return []

    # Normalize user vector
    user_vector /= total_weight

    # Compute cosine similarity
    similarities = cosine_similarity(np.asarray(user_vector), item_tfidf).flatten()
    item_df['similarity'] = similarities

    # Exclude already interacted items
    recommendations = item_df[~item_df['item_id'].isin(interacted_items)]
    recommendations = recommendations.sort_values(by='similarity', ascending=False)

    return recommendations[['item_id', 'name', 'similarity']].head(top_n)

# Example usage
recommended = recommend_items_for_user(user_id="U001", top_n=5)
print(recommended)


  item_id                  name  similarity
6    I007    White Tennis Shoes    0.426670
1    I002         Blue Sneakers    0.150919
7    I008          Green Hoodie    0.138134
2    I003  Black Leather Jacket    0.024230
5    I006       Fitness Tracker    0.000000


## Dynamic pricing