In [2]:
import pandas as pd
import numpy as np
import ast
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.decomposition import NMF
from sklearn.model_selection import train_test_split

In [3]:
# Load the dataset
df = pd.read_csv("/kaggle/input/raw-for-user-item/cleaned_data.csv")

# Display the first few rows to understand the structure
df.head()

Unnamed: 0,profile_id,purchased_product_ids,purchased_product_catgeories,purchased_product_title,purchased_product_values,add_to_cart_product_ids,number_of_times_products_put_in_add_to_cart
0,00004c7a-9ed3-40ff-8156-c44530c27213,['412684'],['Sneakers'],['White Logo Print Sneakers|137228601-Bright-W...,[2533],"['412684', '414761']","[1, 1]"
1,00006fa3-10e4-430f-91ae-fedf5475765a,['415026'],['Full Sleeves'],['White Knit Full Sleeves Shirt|141441706-Clou...,[1348],"['413229', '415026']","[1, 1]"
2,0000b16e-db6a-4e93-b4b0-6a4f2117b3da,['397603'],['Crew Neck'],['White Crew Neck T-shirt|265314902'],[548],['397603'],[2]
3,0000c676-957e-4b5e-a9e0-cc70f1fffc66,"['413130', '416252']","['Fashion Vests', 'Trunks']",['Black Tropical Print Pocket Vest|144648002-B...,"[460, 672]","['413130', '416252']","[1, 1]"
4,000137ae-2d3e-49e9-b1ef-af48411c9191,"['410881', '411145']","['Skinny Fit', 'Slim Fit']",['Black Low Rise Ben Skinny Fit Jeans|21923120...,"[1479, 1643]","['410881', '411145', '411440']","[1, 1, 1]"


In [4]:
# Parse the string lists into actual lists
df['purchased_product_ids'] = df['purchased_product_ids'].apply(ast.literal_eval)
df['add_to_cart_product_ids'] = df['add_to_cart_product_ids'].apply(ast.literal_eval)

# Verify the transformation
df[['profile_id', 'purchased_product_ids', 'add_to_cart_product_ids']].head()

Unnamed: 0,profile_id,purchased_product_ids,add_to_cart_product_ids
0,00004c7a-9ed3-40ff-8156-c44530c27213,[412684],"[412684, 414761]"
1,00006fa3-10e4-430f-91ae-fedf5475765a,[415026],"[413229, 415026]"
2,0000b16e-db6a-4e93-b4b0-6a4f2117b3da,[397603],[397603]
3,0000c676-957e-4b5e-a9e0-cc70f1fffc66,"[413130, 416252]","[413130, 416252]"
4,000137ae-2d3e-49e9-b1ef-af48411c9191,"[410881, 411145]","[410881, 411145, 411440]"


In [5]:
def get_interaction_scores(row, purchase_score=1, add_to_cart_score=0.5):
    purchased = set(row['purchased_product_ids'])
    added = set(row['add_to_cart_product_ids'])
    all_products = purchased | added
    scores = {}
    for product in all_products:
        score = 0
        if product in added:
            score += add_to_cart_score
        if product in purchased:
            score += purchase_score
        scores[product] = score
    return scores

# Test on a single row
sample_row = df.iloc[0]
scores = get_interaction_scores(sample_row)
print(f"Profile ID: {sample_row['profile_id']}, Scores: {scores}")

Profile ID: 00004c7a-9ed3-40ff-8156-c44530c27213, Scores: {'414761': 0.5, '412684': 1.5}


In [6]:
interaction_list = []
for index, row in df.iterrows():
    user_id = row['profile_id']
    scores = get_interaction_scores(row)
    for product, score in scores.items():
        interaction_list.append((user_id, product, score))

# Convert to DataFrame for inspection
interaction_df = pd.DataFrame(interaction_list, columns=['user_id', 'product_id', 'score'])
print(interaction_df.head())
print(f"Total interactions: {len(interaction_df)}")

                                user_id product_id  score
0  00004c7a-9ed3-40ff-8156-c44530c27213     414761    0.5
1  00004c7a-9ed3-40ff-8156-c44530c27213     412684    1.5
2  00006fa3-10e4-430f-91ae-fedf5475765a     413229    0.5
3  00006fa3-10e4-430f-91ae-fedf5475765a     415026    1.5
4  0000b16e-db6a-4e93-b4b0-6a4f2117b3da     397603    1.5
Total interactions: 313908


In [7]:
# Create pivot table, filling missing values with 0
pivot_table = interaction_df.pivot(index='user_id', columns='product_id', values='score').fillna(0)

# Display the pivot table
print(pivot_table.shape)
pivot_table.head()

(99711, 10539)


product_id,378750,378751,378753,378754,378755,378779,378781,379076,379305,379312,...,60449,60456,60460,60465,60466,60587,60588,"60588,403609,403919,403968,403997,405329,406229,407031",60589,60590
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00004c7a-9ed3-40ff-8156-c44530c27213,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00006fa3-10e4-430f-91ae-fedf5475765a,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0000b16e-db6a-4e93-b4b0-6a4f2117b3da,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0000c676-957e-4b5e-a9e0-cc70f1fffc66,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
000137ae-2d3e-49e9-b1ef-af48411c9191,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
# Initialize and fit NMF model
nmf = NMF(n_components=30, init='nndsvd', max_iter=100, random_state=42)
W = nmf.fit_transform(pivot_table)
H = nmf.components_

# Reconstruct the matrix
V_approx = np.dot(W, H)

# Calculate reconstruction error
error = np.linalg.norm(pivot_table.values - V_approx, 'fro')
print(f"Reconstruction Error (Frobenius Norm): {error}")

# Convert to DataFrame for recommendations
predicted_scores_df = pd.DataFrame(V_approx, index=pivot_table.index, columns=pivot_table.columns)
predicted_scores_df.head()



Reconstruction Error (Frobenius Norm): 652.2855264455987


product_id,378750,378751,378753,378754,378755,378779,378781,379076,379305,379312,...,60449,60456,60460,60465,60466,60587,60588,"60588,403609,403919,403968,403997,405329,406229,407031",60589,60590
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00004c7a-9ed3-40ff-8156-c44530c27213,1.87202e-07,4.160344e-09,3.354772e-08,3.359039e-10,4e-06,1e-06,3.2731e-07,2.114412e-08,3.098542e-06,3e-06,...,1.682869e-09,6.698719e-07,2.248505e-07,3.869603e-07,7.75365e-08,7.24052e-06,9.325856e-08,1.865171e-07,9.743209e-07,6.779503e-08
00006fa3-10e4-430f-91ae-fedf5475765a,1.504528e-07,6.91545e-09,1.012911e-07,7.740834e-10,1.4e-05,4e-06,4.917075e-07,4.772175e-07,8.80524e-06,3.1e-05,...,8.618192e-08,2.422627e-06,2.923517e-07,2.011028e-06,2.804708e-07,2.675791e-06,2.415405e-07,4.830809e-07,1.687804e-06,6.716864e-09
0000b16e-db6a-4e93-b4b0-6a4f2117b3da,7.548107e-07,1.526435e-08,2.033776e-08,1.244491e-09,7e-06,6e-06,1.581636e-06,1.017723e-06,7.672623e-07,6.2e-05,...,1.618876e-07,2.799429e-07,7.332862e-08,2.592349e-07,1.138731e-08,6.468881e-07,1.041968e-07,2.083936e-07,3.443609e-06,8.754459e-07
0000c676-957e-4b5e-a9e0-cc70f1fffc66,2.155905e-06,6.054388e-08,1.511259e-07,8.417673e-10,1e-06,3.1e-05,1.028994e-05,1.844744e-08,4.481565e-05,2.8e-05,...,1.372549e-07,2.173148e-06,2.202302e-07,1.943761e-06,0.0,1.77667e-05,6.978715e-07,1.395743e-06,1.664075e-06,1.916851e-06
000137ae-2d3e-49e9-b1ef-af48411c9191,1.268472e-05,1.452997e-08,4.227731e-06,8.898611e-09,6.5e-05,0.0,1.575399e-05,5.469983e-07,0.0,0.000148,...,6.114751e-08,8.0409e-05,9.618327e-06,6.447574e-05,1.357281e-05,4.904965e-05,5.030155e-06,1.006031e-05,1.117606e-08,4.238615e-09


In [9]:
# Create mappings
user_id_to_idx = {uid: idx for idx, uid in enumerate(pivot_table.index)}
product_id_to_idx = {pid: idx for idx, pid in enumerate(pivot_table.columns)}

# Prepare interaction data
interaction_data = [(user_id_to_idx[user_id], product_id_to_idx[product_id], score) 
                    for user_id, product_id, score in interaction_list]

# Split into train and test (80-20 split)
train_data, test_data = train_test_split(interaction_data, test_size=0.2, random_state=42)
print(f"Training samples: {len(train_data)}, Test samples: {len(test_data)}")

Training samples: 251126, Test samples: 62782


In [10]:
class NCF(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=50, hidden_dim=100):
        super(NCF, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)
        self.fc1 = nn.Linear(embedding_dim * 2, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.output = nn.Linear(hidden_dim, 1)
        self.relu = nn.ReLU()
    
    def forward(self, user, item):
        user_emb = self.user_embedding(user)
        item_emb = self.item_embedding(item)
        x = torch.cat([user_emb, item_emb], dim=1)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.output(x)
        return x

# Initialize model
num_users = len(user_id_to_idx)
num_items = len(product_id_to_idx)
ncf_model = NCF(num_users, num_items)
print(ncf_model)

NCF(
  (user_embedding): Embedding(99711, 50)
  (item_embedding): Embedding(10539, 50)
  (fc1): Linear(in_features=100, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=100, bias=True)
  (output): Linear(in_features=100, out_features=1, bias=True)
  (relu): ReLU()
)


In [11]:
class InteractionDataset(Dataset):
    def __init__(self, data):
        self.users, self.items, self.scores = zip(*data)
        self.users = torch.tensor(self.users, dtype=torch.long)
        self.items = torch.tensor(self.items, dtype=torch.long)
        self.scores = torch.tensor(self.scores, dtype=torch.float32)
    
    def __len__(self):
        return len(self.scores)
    
    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.scores[idx]

# Create data loaders
train_dataset = InteractionDataset(train_data)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Define loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(ncf_model.parameters(), lr=0.001)

# Training loop (5 epochs)
for epoch in range(5):
    total_loss = 0
    for user, item, score in train_loader:
        optimizer.zero_grad()
        pred = ncf_model(user, item).squeeze()
        loss = criterion(pred, score)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}")

Epoch 1, Loss: 0.23094827564438913
Epoch 2, Loss: 0.2068734227600873
Epoch 3, Loss: 0.1813887144485441
Epoch 4, Loss: 0.16001003602791147
Epoch 5, Loss: 0.14167198044543175


In [12]:
interacted_products = {}
for index, row in df.iterrows():
    user_id = row['profile_id']
    purchased = set(row['purchased_product_ids'])
    added = set(row['add_to_cart_product_ids'])
    interacted_products[user_id] = purchased | added

# Test for a user
sample_user = df['profile_id'].iloc[0]
print(f"User {sample_user} interacted with: {interacted_products[sample_user]}")

User 00004c7a-9ed3-40ff-8156-c44530c27213 interacted with: {'414761', '412684'}


In [13]:
def recommend_combined(user_id, top_n=10):
    if user_id not in user_id_to_idx:
        return []
    
    # NMF scores
    nmf_scores = predicted_scores_df.loc[user_id]
    
    # NCF scores
    user_idx = user_id_to_idx[user_id]
    product_idxs = torch.tensor(list(product_id_to_idx.values()), dtype=torch.long)
    user_tensor = torch.tensor([user_idx] * len(product_idxs), dtype=torch.long)
    with torch.no_grad():
        ncf_scores = ncf_model(user_tensor, product_idxs).squeeze().numpy()
    ncf_scores_df = pd.Series(ncf_scores, index=pivot_table.columns)
    
    # Combine scores (average)
    combined_scores = (nmf_scores + ncf_scores_df) / 2
    
    # Exclude interacted products (convert set to list)
    interacted = list(interacted_products.get(user_id, set()))
    combined_scores[interacted] = -np.inf
    
    # Get top N recommendations
    top_products = combined_scores.nlargest(top_n).index.tolist()
    return top_products

# Test recommendation
test_user = df['profile_id'].iloc[0]
recommendations = recommend_combined(test_user)
print(f"Top 10 recommendations for {test_user}: {recommendations}")

Top 10 recommendations for 00004c7a-9ed3-40ff-8156-c44530c27213: ['388726', '391133', '401838', '415950', '400745', '397140', '383541', '404592', '386932', '409114']


In [14]:
all_users = df['profile_id'].unique()
recommendations_dict = {'profile_id': [], 'recommended_products': []}

for user_id in all_users:
    recs = recommend_combined(user_id, top_n=10)
    recommendations_dict['profile_id'].append(user_id)
    recommendations_dict['recommended_products'].append(recs)

# Convert to DataFrame
recommendations_df = pd.DataFrame(recommendations_dict)

In [15]:
recommendations_df.dtypes

profile_id              object
recommended_products    object
dtype: object

In [16]:
recommendations_df

Unnamed: 0,profile_id,recommended_products
0,00004c7a-9ed3-40ff-8156-c44530c27213,"[388726, 391133, 401838, 415950, 400745, 39714..."
1,00006fa3-10e4-430f-91ae-fedf5475765a,"[411415, 416401, 392506, 399050, 392224, 40832..."
2,0000b16e-db6a-4e93-b4b0-6a4f2117b3da,"[404530, 406821, 412092, 404636, 400832, 41141..."
3,0000c676-957e-4b5e-a9e0-cc70f1fffc66,"[397197, 403984, 394448, 383541, 394191, 40704..."
4,000137ae-2d3e-49e9-b1ef-af48411c9191,"[405537, 402085, 400833, 402023, 402169, 39419..."
...,...,...
99706,fffd403a-4d6a-4390-aefd-bc07e32bfae8,"[388992, 388114, 388761, 404880, 403961, 40075..."
99707,fffd90be-3291-4d4b-9cca-df7970559a6e,"[403906, 411416, 388979, 408618, 404530, 40096..."
99708,ffff2360-3acf-42ca-8fe7-0a293917488d,"[411364, 411360, 385699, 393084, 400754, 38872..."
99709,ffff984a-67f2-40f8-b892-3d81df3feea3,"[415952, 412978, 398524, 397140, 414189, 41154..."


In [18]:
import pandas as pd


# explode the list column so each product gets its own row
df_exploded = recommendations_df.explode('recommended_products') \
                .rename(columns={'recommended_products': 'recommended_product'}) \
                .reset_index(drop=True)

df_exploded


Unnamed: 0,profile_id,recommended_product
0,00004c7a-9ed3-40ff-8156-c44530c27213,388726
1,00004c7a-9ed3-40ff-8156-c44530c27213,391133
2,00004c7a-9ed3-40ff-8156-c44530c27213,401838
3,00004c7a-9ed3-40ff-8156-c44530c27213,415950
4,00004c7a-9ed3-40ff-8156-c44530c27213,400745
...,...,...
997105,ffffb158-222b-43e7-b65e-05023a31999c,391620
997106,ffffb158-222b-43e7-b65e-05023a31999c,398524
997107,ffffb158-222b-43e7-b65e-05023a31999c,400754
997108,ffffb158-222b-43e7-b65e-05023a31999c,417480


In [19]:
df_exploded.to_csv("final_recommendations_user_item.csv",index=False)

In [2]:
# merging below

In [76]:
import pandas as pd
rec_df=pd.read_csv("/kaggle/input/rec-data/final_recommendations_user_item.csv")
rec_df

Unnamed: 0,profile_id,recommended_product
0,00004c7a-9ed3-40ff-8156-c44530c27213,388726
1,00004c7a-9ed3-40ff-8156-c44530c27213,391133
2,00004c7a-9ed3-40ff-8156-c44530c27213,401838
3,00004c7a-9ed3-40ff-8156-c44530c27213,415950
4,00004c7a-9ed3-40ff-8156-c44530c27213,400745
...,...,...
997105,ffffb158-222b-43e7-b65e-05023a31999c,391620
997106,ffffb158-222b-43e7-b65e-05023a31999c,398524
997107,ffffb158-222b-43e7-b65e-05023a31999c,400754
997108,ffffb158-222b-43e7-b65e-05023a31999c,417480


In [77]:
product_details=pd.read_csv("/kaggle/input/product-details/info_id_categ_prod.csv")

In [78]:
product_details

Unnamed: 0.1,Unnamed: 0,profile_id,purchased_product_id,purchased_product_category,purchased_product_title
0,0,00004c7a-9ed3-40ff-8156-c44530c27213,412684,Sneakers,White Logo Print Sneakers|137228601-Bright-White
1,1,00006fa3-10e4-430f-91ae-fedf5475765a,415026,Full Sleeves,White Knit Full Sleeves Shirt|141441706-Cloud-...
2,2,0000b16e-db6a-4e93-b4b0-6a4f2117b3da,397603,Crew Neck,White Crew Neck T-shirt|265314902
3,3,0000c676-957e-4b5e-a9e0-cc70f1fffc66,413130,Fashion Vests,Black Tropical Print Pocket Vest|144648002-Black
4,4,0000c676-957e-4b5e-a9e0-cc70f1fffc66,416252,Trunks,Pack Of 2 Black Trunks|254225701-Black
...,...,...,...,...,...
201176,201176,ffff2360-3acf-42ca-8fe7-0a293917488d,410881,Full Sleeves,Black Low Rise Ben Skinny Fit Jeans|219231201-...
201177,201177,ffff2360-3acf-42ca-8fe7-0a293917488d,411541,Slim Fit,PRODUKT by JACK&ampJONES Navy Blue Full Sleeve...
201178,201178,ffff2360-3acf-42ca-8fe7-0a293917488d,412385,,
201179,201179,ffff984a-67f2-40f8-b892-3d81df3feea3,406980,Sneakers,White Flex Sole Knitted Sneakers|128856102


In [79]:
product_details.rename(columns={"purchased_product_id":"recommended_product"},inplace=True)

In [80]:
product_details.drop(["Unnamed: 0","profile_id"],axis=1,inplace=True)

In [81]:
product_details

Unnamed: 0,recommended_product,purchased_product_category,purchased_product_title
0,412684,Sneakers,White Logo Print Sneakers|137228601-Bright-White
1,415026,Full Sleeves,White Knit Full Sleeves Shirt|141441706-Cloud-...
2,397603,Crew Neck,White Crew Neck T-shirt|265314902
3,413130,Fashion Vests,Black Tropical Print Pocket Vest|144648002-Black
4,416252,Trunks,Pack Of 2 Black Trunks|254225701-Black
...,...,...,...
201176,410881,Full Sleeves,Black Low Rise Ben Skinny Fit Jeans|219231201-...
201177,411541,Slim Fit,PRODUKT by JACK&ampJONES Navy Blue Full Sleeve...
201178,412385,,
201179,406980,Sneakers,White Flex Sole Knitted Sneakers|128856102


In [82]:
list(product_details[product_details["recommended_product"]=="388726"].mode()["purchased_product_title"])[0]

'Grey Logo Print Sweatshirt|246435902'

In [93]:
product_details[product_details["purchased_product_title"]=="388726"]

Unnamed: 0,recommended_product,purchased_product_category,purchased_product_title


In [97]:
unique_product_ids = list(rec_df["recommended_product"].unique())
data_list = []

for prod_id in unique_product_ids:
    try:
        # get the mode of purchased_product_title for this product
        title = product_details.loc[
            product_details["recommended_product"] == prod_id,
            "purchased_product_title"
        ].mode().iloc[0]
    except (IndexError, KeyError):
        # if there's no mode (or column), default to 0
        title = 0

    data_list.append([prod_id, title])


In [98]:
data_list

[['388726', 'Grey Logo Print Sweatshirt|246435902'],
 ['391133', 'Black Cotton Polo T-shirt|235285803-Jet-Black'],
 ['401838', 'Tan PU Sneakers|142919504'],
 ['415950', 'Black Logo Print Sliders|237878302-Jet-Black'],
 ['400745', 'Brown Leather Boots|161846301'],
 ['397140', 'Beige Linen Co-ord Jacket|291421701'],
 ['383541', 'Navy Blue Graphic Print Crew Neck T-shirt|256492402'],
 ['404592', 'Grey Sandals|284553002'],
 ['386932', 'Grey Mandarin Collar Full Sleeves Shirt|277729401'],
 ['409114', 'Black Ombre Full Sleeves Shirt|263487601'],
 ['411415',
  'PRODUKT by JACK&ampJONES Black Pocket Detail Slim Fit Jeans|148226501-Caviar'],
 ['416401', 'Black Mid Rise Slim Fit Pants|193072404-Jet-Black'],
 ['392506', 'Brown Leather Belt|258687901'],
 ['399050', 'Blue Low Rise Glenn Slim Jeans|221208401'],
 ['392224', 'Purple Cassette Printed Briefs|116792801'],
 ['408325', 'Blue Moulded Sandals|145348304'],
 ['407757', 'White Floral Full Sleeves Shirt|263494601'],
 ['406156', 'Beige Full Sleev

In [99]:
details_df=pd.DataFrame(data_list,columns=["recommended_product","title"])

In [103]:
final_user_item=pd.merge(rec_df,details_df,on="recommended_product",how="inner")

In [106]:
final_user_item.to_csv("final_user_item.csv",index=False)

In [108]:
final_user_item[final_user_item["profile_id"]=="00004c7a-9ed3-40ff-8156-c44530c27213"]

Unnamed: 0,profile_id,recommended_product,title
0,00004c7a-9ed3-40ff-8156-c44530c27213,388726,Grey Logo Print Sweatshirt|246435902
1,00004c7a-9ed3-40ff-8156-c44530c27213,391133,Black Cotton Polo T-shirt|235285803-Jet-Black
2,00004c7a-9ed3-40ff-8156-c44530c27213,401838,Tan PU Sneakers|142919504
3,00004c7a-9ed3-40ff-8156-c44530c27213,415950,Black Logo Print Sliders|237878302-Jet-Black
4,00004c7a-9ed3-40ff-8156-c44530c27213,400745,Brown Leather Boots|161846301
5,00004c7a-9ed3-40ff-8156-c44530c27213,397140,Beige Linen Co-ord Jacket|291421701
6,00004c7a-9ed3-40ff-8156-c44530c27213,383541,Navy Blue Graphic Print Crew Neck T-shirt|2564...
7,00004c7a-9ed3-40ff-8156-c44530c27213,404592,Grey Sandals|284553002
8,00004c7a-9ed3-40ff-8156-c44530c27213,386932,Grey Mandarin Collar Full Sleeves Shirt|277729401
9,00004c7a-9ed3-40ff-8156-c44530c27213,409114,Black Ombre Full Sleeves Shirt|263487601


In [110]:
df=pd.read_csv("/kaggle/input/raw-for-user-item/cleaned_data.csv")

In [111]:
df[df["profile_id"]=="00004c7a-9ed3-40ff-8156-c44530c27213"]

Unnamed: 0,profile_id,purchased_product_ids,purchased_product_catgeories,purchased_product_title,purchased_product_values,add_to_cart_product_ids,number_of_times_products_put_in_add_to_cart
0,00004c7a-9ed3-40ff-8156-c44530c27213,['412684'],['Sneakers'],['White Logo Print Sneakers|137228601-Bright-W...,[2533],"['412684', '414761']","[1, 1]"
