In [None]:
# Install dependencies
!pip install torch pandas flwr matplotlib scikit-learn

# Check PyTorch version and device
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"Using device: {'GPU' if torch.cuda.is_available() else 'CPU'}")

Collecting flwr
  Downloading flwr-1.15.2-py3-none-any.whl.metadata (15 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-

In [None]:
import pandas as pd

# Load the dataset
file_path = "/content/Partitioned_Client_1_Pet_Supplies.csv"  # Update if needed
df = pd.read_csv(file_path)

# Display dataset info
print(f"Dataset Loaded! Total records: {len(df)}")
print(df.head())  # Show first few rows
print(df.info())  # Show dataset structure


Dataset Loaded! Total records: 324
   Unnamed: 0                                               name  \
0         635  Pups&Pets Hard Squeeze Green Squeak Ball Dog T...   
1         220  Pawsome Reversable Dual Color Beige & Black Ul...   
2         593  Paw Naturale Flake Off Healing Skin Massage Oi...   
3         429  Ksk Natural Wooden Bridge, Chewing,Hanging Toy...   
4          72  Jainsons Pet Products® Bird Nest with Chewing ...   

  main_category      sub_category  \
0  pet supplies  All Pet Supplies   
1  pet supplies  All Pet Supplies   
2  pet supplies  All Pet Supplies   
3  pet supplies  All Pet Supplies   
4  pet supplies  All Pet Supplies   

                                               image  \
0  https://m.media-amazon.com/images/I/41OllOR88R...   
1  https://m.media-amazon.com/images/I/61ZOr2Cx69...   
2  https://m.media-amazon.com/images/W/IMAGERENDE...   
3  https://m.media-amazon.com/images/I/61-Q6GriC3...   
4  https://m.media-amazon.com/images/I/71qSYGIZ-P... 

In [None]:
# Print available columns
print("Columns in dataset:", df.columns.tolist())

# Drop only the columns that exist
drop_columns = ['Unnamed: 0', 'image', 'link']
df = df.drop(columns=[col for col in drop_columns if col in df.columns], errors='ignore')

# Display updated columns
print("Columns after dropping unnecessary ones:", df.columns.tolist())


Columns in dataset: ['name', 'main_category', 'sub_category', 'ratings', 'no_of_ratings', 'discount_price', 'actual_price', 'price_category']
Columns after dropping unnecessary ones: ['name', 'main_category', 'sub_category', 'ratings', 'no_of_ratings', 'discount_price', 'actual_price', 'price_category']


In [None]:
# Fill missing ratings with median value
df['ratings'] = pd.to_numeric(df['ratings'], errors='coerce')  # Convert to numeric
df['ratings'].fillna(df['ratings'].median(), inplace=True)

# Fill missing no_of_ratings with 0
df['no_of_ratings'] = pd.to_numeric(df['no_of_ratings'], errors='coerce').fillna(0)

# Convert actual_price & discount_price to numeric, handle missing discount_price
df['actual_price'] = df['actual_price'].replace('[₹,]', '', regex=True)  # Remove ₹ symbol
df['actual_price'] = pd.to_numeric(df['actual_price'], errors='coerce')

df['discount_price'] = pd.to_numeric(df['discount_price'], errors='coerce')
df['discount_price'].fillna(df['actual_price'], inplace=True)  # Fill missing discount prices with actual price

# Display updated dataset info
print("Dataset after cleaning:")
print(df.info())
print(df.head())


Dataset after cleaning:
<class 'pandas.core.frame.DataFrame'>
Index: 272 entries, 0 to 323
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   name            272 non-null    object 
 1   main_category   272 non-null    object 
 2   sub_category    272 non-null    object 
 3   ratings         272 non-null    float64
 4   no_of_ratings   272 non-null    float64
 5   discount_price  272 non-null    float64
 6   actual_price    272 non-null    float64
 7   price_category  272 non-null    object 
dtypes: float64(4), object(4)
memory usage: 19.1+ KB
None
                                                name main_category  \
0  Pups&Pets Hard Squeeze Green Squeak Ball Dog T...  pet supplies   
1  Pawsome Reversable Dual Color Beige & Black Ul...  pet supplies   
3  Ksk Natural Wooden Bridge, Chewing,Hanging Toy...  pet supplies   
4  Jainsons Pet Products® Bird Nest with Chewing ...  pet supplies   
5  Aakriti Glass Ge

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['ratings'].fillna(df['ratings'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['discount_price'].fillna(df['actual_price'], inplace=True)  # Fill missing discount prices with actual price


In [None]:
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

# Label Encoding categorical features
label_encoders = {}
for col in ['main_category', 'sub_category', 'price_category']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])  # Convert to numeric
    label_encoders[col] = le  # Store encoder for inverse transform if needed

# Normalize numerical values (ratings, no_of_ratings, discount_price, actual_price)
scaler = MinMaxScaler()
df[['ratings', 'no_of_ratings', 'discount_price', 'actual_price']] = scaler.fit_transform(
    df[['ratings', 'no_of_ratings', 'discount_price', 'actual_price']]
)

# Final dataset structure
print("Dataset after feature engineering:")
print(df.head())
print(df.info())


Dataset after feature engineering:
                                                name  main_category  \
0  Pups&Pets Hard Squeeze Green Squeak Ball Dog T...              0   
1  Pawsome Reversable Dual Color Beige & Black Ul...              0   
3  Ksk Natural Wooden Bridge, Chewing,Hanging Toy...              0   
4  Jainsons Pet Products® Bird Nest with Chewing ...              0   
5  Aakriti Glass Gem Stone, Flat Round Marbles Aq...              0   

   sub_category  ratings  no_of_ratings  discount_price  actual_price  \
0             0    0.600       0.002092        0.044870      0.040980   
1             0    0.825       0.781381        0.089740      0.128173   
3             0    0.650       0.019874        0.037392      0.060453   
4             0    0.800       0.248954        0.044870      0.040980   
5             0    0.775       0.058577        0.029913      0.050668   

   price_category  
0               1  
1               1  
3               1  
4               1  

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Drop 'name' column since it's not useful for training
df = df.drop(columns=['name'])

# Convert DataFrame to PyTorch tensors
X = torch.tensor(df.drop(columns=['price_category']).values, dtype=torch.float32)  # Features
y = torch.tensor(df['price_category'].values, dtype=torch.float32)  # Target

# Define PyTorch Dataset
class ProductDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create DataLoader
batch_size = 32
dataset = ProductDataset(X, y)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Define the Recommendation Model (Neural Collaborative Filtering)
class NCFModel(nn.Module):
    def __init__(self, input_dim):
        super(NCFModel, self).__init__()
        self.fc_layers = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()  # Binary classification
        )

    def forward(self, x):
        return self.fc_layers(x).squeeze()

# Initialize model, optimizer, and loss function
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = NCFModel(input_dim=X.shape[1]).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCELoss()

# Training function
def train(model, dataloader, optimizer, criterion, device, epochs=10):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for X_batch, y_batch in dataloader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            optimizer.zero_grad()
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss/len(dataloader):.4f}")

# Train the model
train(model, dataloader, optimizer, criterion, device, epochs=10)


Epoch [1/10], Loss: 0.6428
Epoch [2/10], Loss: 0.5904
Epoch [3/10], Loss: 0.5426
Epoch [4/10], Loss: 0.5040
Epoch [5/10], Loss: 0.4723
Epoch [6/10], Loss: 0.4256
Epoch [7/10], Loss: 0.4231
Epoch [8/10], Loss: 0.4230
Epoch [9/10], Loss: 0.4028
Epoch [10/10], Loss: 0.4128


In [None]:
# Evaluation function
def evaluate(model, dataloader, device):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for X_batch, y_batch in dataloader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            predictions = model(X_batch)
            predicted_labels = (predictions > 0.5).float()
            correct += (predicted_labels == y_batch).sum().item()
            total += y_batch.size(0)

    accuracy = correct / total
    print(f"Model Accuracy: {accuracy * 100:.2f}%")
    return accuracy

# Evaluate the model
evaluate(model, dataloader, device)


Model Accuracy: 84.56%


0.8455882352941176

In [None]:
import torch.nn as nn
import torch.optim as optim

# Define Neural Collaborative Filtering (NCF) Model
class NCFModel(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=16):
        super(NCFModel, self).__init__()

        # Embedding layers for users & items
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)

        # Fully Connected layers for interaction
        self.fc_layers = nn.Sequential(
            nn.Linear(embedding_dim * 2, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()  # Output probability of purchase (0-1)
        )

    def forward(self, user, item):
        # Get user & item embeddings
        user_emb = self.user_embedding(user)
        item_emb = self.item_embedding(item)

        # Concatenate embeddings
        x = torch.cat([user_emb, item_emb], dim=-1)

        return self.fc_layers(x).squeeze()

# Get user and product count
num_users = df['user_id'].nunique()
num_items = df['product_id'].nunique()

# Initialize model
model = NCFModel(num_users=num_users, num_items=num_items)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define optimizer & loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCELoss()  # Binary Cross Entropy Loss for classification

# Print model summary
print(model)


KeyError: 'user_id'

In [None]:
DEVICE = torch.device("cpu")  # Force CPU for debugging

# Training Loop Debugging
for user, item, label in train_loader:
    print(f"Users: {user}")
    print(f"Items: {item}")
    user_emb = model.user_embedding(user)  # This will show the exact crashing point


NameError: name 'torch' is not defined

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

# Hyperparameters
EPOCHS = 5
BATCH_SIZE = 32
LEARNING_RATE = 0.001
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# DataLoader
train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

# Model, Loss, Optimizer
model = NCFModel(num_users=df['user_id'].nunique(), num_items=df['product_id'].nunique()).to(DEVICE)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Training Loop
def train_model(model, train_loader, criterion, optimizer, device):
    model.train()
    for epoch in range(EPOCHS):
        total_loss = 0
        for user, item, label in train_loader:
            user, item, label = user.to(device), item.to(device), label.to(device, dtype=torch.float)

            optimizer.zero_grad()
            output = model(user, item)

            loss = criterion(output, label)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {total_loss/len(train_loader):.4f}")

# Evaluation Function
def evaluate_model(model, train_loader, device):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for user, item, label in train_loader:
            user, item, label = user.to(device), item.to(device), label.to(device, dtype=torch.float)
            output = model(user, item)
            predicted = (output > 0.5).float()
            correct += (predicted == label).sum().item()
            total += label.size(0)

    accuracy = correct / total
    print(f"Model Accuracy: {accuracy:.4f}")

# Train & Evaluate
train_model(model, train_loader, criterion, optimizer, DEVICE)
evaluate_model(model, train_loader, DEVICE)


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
