# Transformadores para sistema de recomendaciones

In [1]:
import numpy as np 
import pandas as pd 
import kagglehub
import os
import transformers
import torch
import torch.nn as nn 
import torch.nn.functional as F




In [2]:
class BERT4RecWithRatings(nn.Module):
    def __init__(self, vocab_size, d_model=256, n_layers=4, n_heads=8, max_len=128,
                 rating_bins=6, dropout=0.1):
        super().__init__()
        self.vocab_size = vocab_size
        self.item_emb = nn.Embedding(vocab_size, d_model, padding_idx=0)  # reserve 0 for PAD
        # rating bins: e.g., 0 = no rating / PAD, 1..5 actual scores. Adjust as needed.
        self.rating_emb = nn.Embedding(rating_bins, d_model)  
        self.pos_emb = nn.Embedding(max_len, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model, n_heads, dim_feedforward=4*d_model, dropout=dropout)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=n_layers)
        # MLM head (tie embeddings optionally)
        self.mlm_head = nn.Linear(d_model, vocab_size)
        # Rating regression/classification head (per position)
        self.rating_head = nn.Sequential(
            nn.Linear(d_model, d_model//2),
            nn.ReLU(),
            nn.Linear(d_model//2, 1)  # scalar rating; use nn.Sigmoid if normalized
        )

        self.max_len = max_len
        self.d_model = d_model

    def forward(self, item_seq, rating_seq, attention_mask):
        # item_seq: (B, L) item ids (with [MASK] token id where applicable)
        # rating_seq: (B, L) discrete rating bins aligned (0 for unknown)
        # attention_mask: (B, L) 1 for real tokens, 0 for PAD (for transformer)
        B, L = item_seq.shape
        pos = torch.arange(L, device=item_seq.device).unsqueeze(0).expand(B, -1)  # (B, L)
        x = self.item_emb(item_seq) + self.rating_emb(rating_seq) + self.pos_emb(pos)
        # transformer expects (L, B, d)
        x = x.transpose(0, 1)
        # key_padding_mask expects True for PAD positions
        key_padding_mask = (attention_mask == 0)
        x = self.transformer(x, src_key_padding_mask=key_padding_mask)  # (L, B, d)
        x = x.transpose(0, 1)  # (B, L, d)
        logits = self.mlm_head(x)  # (B, L, vocab)
        rating_pred = self.rating_head(x).squeeze(-1)  # (B, L)
        return logits, rating_pred

In [3]:
def create_masked_input(item_seq, mask_token_id, pad_id=0, mask_prob=0.15):
    # item_seq: (B, L)
    B, L = item_seq.shape
    labels = torch.full_like(item_seq, -100)  # -100 ignored by CE loss
    # decide mask positions only on non-pad tokens
    mask = (item_seq != pad_id) & (torch.rand(B, L, device=item_seq.device) < mask_prob)
    # store labels for masked positions
    labels[mask] = item_seq[mask]
    input_seq = item_seq.clone()
    input_seq[mask] = mask_token_id  # or random replace / keep some percent unchanged
    return input_seq, labels, mask



## Descargar conjunto de datos y lectura con pandas

In [4]:
'''
estas líneas están comentadas porque otra de las notebooks ya tiene código para descargar y guardar de forma local en otra carpeta los conjuntos de datos.
Si no se encuentran las carpetas, se pueden descomentar estas tres celdas y comentar las últimas dos líneas de la tercera
'''
path = kagglehub.dataset_download("mohamedbakhet/amazon-books-reviews")

print("Path to dataset files:", path)


Path to dataset files: /home/m/.cache/kagglehub/datasets/mohamedbakhet/amazon-books-reviews/versions/1


In [5]:
extracted_directory = "/home/m/.cache/kagglehub/datasets/mohamedbakhet/amazon-books-reviews/versions/1"
print(os.listdir(extracted_directory))

['books_data.csv', 'Books_rating.csv']


In [6]:
file_path_ratings = os.path.join(extracted_directory,'Books_rating.csv')
file_path_data = os.path.join(extracted_directory, 'books_data.csv')

ratings = pd.read_csv(file_path_ratings)
metadata = pd.read_csv(file_path_data)

# metadata = pd.read_csv("/home/matias/Documentos/Mineria_de_textos/Proyecto_final/data/amazon-books-reviews/books_data.csv")
# ratings = pd.read_csv("/home/matias/Documentos/Mineria_de_textos/Proyecto_final/data/amazon-books-reviews/Books_rating.csv")


## Exploración y limpieza de datos

In [7]:
metadata.head()

Unnamed: 0,Title,description,authors,image,previewLink,publisher,publishedDate,infoLink,categories,ratingsCount
0,Its Only Art If Its Well Hung!,,['Julie Strain'],http://books.google.com/books/content?id=DykPA...,http://books.google.nl/books?id=DykPAAAACAAJ&d...,,1996,http://books.google.nl/books?id=DykPAAAACAAJ&d...,['Comics & Graphic Novels'],
1,Dr. Seuss: American Icon,Philip Nel takes a fascinating look into the k...,['Philip Nel'],http://books.google.com/books/content?id=IjvHQ...,http://books.google.nl/books?id=IjvHQsCn_pgC&p...,A&C Black,2005-01-01,http://books.google.nl/books?id=IjvHQsCn_pgC&d...,['Biography & Autobiography'],
2,Wonderful Worship in Smaller Churches,This resource includes twelve principles in un...,['David R. Ray'],http://books.google.com/books/content?id=2tsDA...,http://books.google.nl/books?id=2tsDAAAACAAJ&d...,,2000,http://books.google.nl/books?id=2tsDAAAACAAJ&d...,['Religion'],
3,Whispers of the Wicked Saints,Julia Thomas finds her life spinning out of co...,['Veronica Haddon'],http://books.google.com/books/content?id=aRSIg...,http://books.google.nl/books?id=aRSIgJlq6JwC&d...,iUniverse,2005-02,http://books.google.nl/books?id=aRSIgJlq6JwC&d...,['Fiction'],
4,"Nation Dance: Religion, Identity and Cultural ...",,['Edward Long'],,http://books.google.nl/books?id=399SPgAACAAJ&d...,,2003-03-01,http://books.google.nl/books?id=399SPgAACAAJ&d...,,


In [8]:
metadata.shape

(212404, 10)

In [9]:
ratings.head()

Unnamed: 0,Id,Title,Price,User_id,profileName,review/helpfulness,review/score,review/time,review/summary,review/text
0,1882931173,Its Only Art If Its Well Hung!,,AVCGYZL8FQQTD,"Jim of Oz ""jim-of-oz""",7/7,4.0,940636800,Nice collection of Julie Strain images,This is only for Julie Strain fans. It's a col...
1,826414346,Dr. Seuss: American Icon,,A30TK6U7DNS82R,Kevin Killian,10/10,5.0,1095724800,Really Enjoyed It,I don't care much for Dr. Seuss but after read...
2,826414346,Dr. Seuss: American Icon,,A3UH4UZ4RSVO82,John Granger,10/11,5.0,1078790400,Essential for every personal and Public Library,"If people become the books they read and if ""t..."
3,826414346,Dr. Seuss: American Icon,,A2MVUWT453QH61,"Roy E. Perry ""amateur philosopher""",7/7,4.0,1090713600,Phlip Nel gives silly Seuss a serious treatment,"Theodore Seuss Geisel (1904-1991), aka &quot;D..."
4,826414346,Dr. Seuss: American Icon,,A22X4XUPKF66MR,"D. H. Richards ""ninthwavestore""",3/3,4.0,1107993600,Good academic overview,Philip Nel - Dr. Seuss: American IconThis is b...


In [10]:
ratings.isnull().sum()

Id                          0
Title                     208
Price                 2518829
User_id                561787
profileName            561905
review/helpfulness          0
review/score                0
review/time                 0
review/summary            407
review/text                 8
dtype: int64

In [11]:
metadata.isnull().sum()

Title                 1
description       68442
authors           31413
image             52075
previewLink       23836
publisher         75886
publishedDate     25305
infoLink          23836
categories        41199
ratingsCount     162652
dtype: int64

In [12]:
ratings.shape

(3000000, 10)

In [13]:
#Quitamos valores faltantes para id de usuarios

ratings = ratings.dropna(subset=['User_id'])

ratings.head()

Unnamed: 0,Id,Title,Price,User_id,profileName,review/helpfulness,review/score,review/time,review/summary,review/text
0,1882931173,Its Only Art If Its Well Hung!,,AVCGYZL8FQQTD,"Jim of Oz ""jim-of-oz""",7/7,4.0,940636800,Nice collection of Julie Strain images,This is only for Julie Strain fans. It's a col...
1,826414346,Dr. Seuss: American Icon,,A30TK6U7DNS82R,Kevin Killian,10/10,5.0,1095724800,Really Enjoyed It,I don't care much for Dr. Seuss but after read...
2,826414346,Dr. Seuss: American Icon,,A3UH4UZ4RSVO82,John Granger,10/11,5.0,1078790400,Essential for every personal and Public Library,"If people become the books they read and if ""t..."
3,826414346,Dr. Seuss: American Icon,,A2MVUWT453QH61,"Roy E. Perry ""amateur philosopher""",7/7,4.0,1090713600,Phlip Nel gives silly Seuss a serious treatment,"Theodore Seuss Geisel (1904-1991), aka &quot;D..."
4,826414346,Dr. Seuss: American Icon,,A22X4XUPKF66MR,"D. H. Richards ""ninthwavestore""",3/3,4.0,1107993600,Good academic overview,Philip Nel - Dr. Seuss: American IconThis is b...


In [14]:
ratings.isnull().sum()

Id                          0
Title                     195
Price                 2023633
User_id                     0
profileName               118
review/helpfulness          0
review/score                0
review/time                 0
review/summary            397
review/text                 1
dtype: int64

In [15]:
#Revisemos entradas repetidas y eliminemos las repeticiones

duplicados = ratings.duplicated()

print("Número de duplicados: ", duplicados.sum())

Número de duplicados:  5546


In [16]:
ratings = ratings[~duplicados]

ratings.shape

(2432667, 10)

In [17]:
usuarios = ratings['User_id'].nunique()


In [18]:
usuarios

1008972

In [19]:
# Cambiar el formato de 'review/time' a datetime
ratings['review/time'] = pd.to_datetime(ratings['review/time'], unit='s')

# Ordenar por 'review/time' de manera ascendente
ratings = ratings.sort_values(by='review/time', ascending=True)

ratings

Unnamed: 0,Id,Title,Price,User_id,profileName,review/helpfulness,review/score,review/time,review/summary,review/text
2971448,B000G167FA,Silver Pennies,,AAFZZHA2I598B,Byron C. Benson,4/4,5.0,1969-12-31 23:59:59,An incomparable children's classic,This book of children's poems has been enjoyed...
2971449,B000G167FA,Silver Pennies,,A1MCQGDJDSPJFF,SHOCKgBLUE@aol.com,4/4,5.0,1969-12-31 23:59:59,Unequalled Collection of Children's Poetry for...,I was introduced to this marvelous collection ...
75754,0786280670,"Julie and Julia: 365 Days, 524 Recipes, 1 Tiny...",,A2FBKQ7J7D0Z36,Angie789,7/10,2.0,1969-12-31 23:59:59,disappointing,"I love both cooking and reading blogs, so I th..."
75753,0786280670,"Julie and Julia: 365 Days, 524 Recipes, 1 Tiny...",,A7PNOKN67P7T3,"Mary Harris ""beachreader""",7/10,1.0,1969-12-31 23:59:59,waste of time and money,A book as messy and nasty as some of the autho...
2152018,B000KPX7RI,"McKeachie's Teaching Tips, Strategies Research...",,A1AY4QM3FDINBQ,Sujini2,0/0,5.0,1969-12-31 23:59:59,Pretty good.,Pretty good at providing me with the fundament...
...,...,...,...,...,...,...,...,...,...,...
1017673,155927140X,The Enchanted Cat,,AOS6V51CJM8NQ,"Ted M. Sweitzer ""Judy Sweitzer""",0/0,4.0,2013-03-04 00:00:00,For the Cat Lover,"A nice gift book, my husband bought it for me ..."
1564116,1423315537,Firehouse,,AO17E7H0S0PBU,TiffanyW,0/0,5.0,2013-03-04 00:00:00,Love this book,"This book is a poignant, emotional, humorous, ..."
960831,1850891648,Frankenstein (Isis Large Print Fiction),,A3SFQL1UXFOQY,Kylee Bennett,0/0,5.0,2013-03-04 00:00:00,Good,"This is my favorite, it is very eventful and i..."
1888684,0435130994,Skeleton Key (New Windmills),,A1F8YGI6MX27KC,Sharon Rogers,0/0,5.0,2013-03-04 00:00:00,Amazing,This was action packed from beginning to end.T...


In [20]:
# Asignar tokens a los títulos de los libros 

metadata['Token_titulo'] = metadata['Title'].index
metadata.head()

Unnamed: 0,Title,description,authors,image,previewLink,publisher,publishedDate,infoLink,categories,ratingsCount,Token_titulo
0,Its Only Art If Its Well Hung!,,['Julie Strain'],http://books.google.com/books/content?id=DykPA...,http://books.google.nl/books?id=DykPAAAACAAJ&d...,,1996,http://books.google.nl/books?id=DykPAAAACAAJ&d...,['Comics & Graphic Novels'],,0
1,Dr. Seuss: American Icon,Philip Nel takes a fascinating look into the k...,['Philip Nel'],http://books.google.com/books/content?id=IjvHQ...,http://books.google.nl/books?id=IjvHQsCn_pgC&p...,A&C Black,2005-01-01,http://books.google.nl/books?id=IjvHQsCn_pgC&d...,['Biography & Autobiography'],,1
2,Wonderful Worship in Smaller Churches,This resource includes twelve principles in un...,['David R. Ray'],http://books.google.com/books/content?id=2tsDA...,http://books.google.nl/books?id=2tsDAAAACAAJ&d...,,2000,http://books.google.nl/books?id=2tsDAAAACAAJ&d...,['Religion'],,2
3,Whispers of the Wicked Saints,Julia Thomas finds her life spinning out of co...,['Veronica Haddon'],http://books.google.com/books/content?id=aRSIg...,http://books.google.nl/books?id=aRSIgJlq6JwC&d...,iUniverse,2005-02,http://books.google.nl/books?id=aRSIgJlq6JwC&d...,['Fiction'],,3
4,"Nation Dance: Religion, Identity and Cultural ...",,['Edward Long'],,http://books.google.nl/books?id=399SPgAACAAJ&d...,,2003-03-01,http://books.google.nl/books?id=399SPgAACAAJ&d...,,,4


In [21]:
#Imputación de valores faltantes por unk
metadata['categories'] = metadata['categories'].replace(np.nan, 'unk')

In [22]:
metadata.head()

Unnamed: 0,Title,description,authors,image,previewLink,publisher,publishedDate,infoLink,categories,ratingsCount,Token_titulo
0,Its Only Art If Its Well Hung!,,['Julie Strain'],http://books.google.com/books/content?id=DykPA...,http://books.google.nl/books?id=DykPAAAACAAJ&d...,,1996,http://books.google.nl/books?id=DykPAAAACAAJ&d...,['Comics & Graphic Novels'],,0
1,Dr. Seuss: American Icon,Philip Nel takes a fascinating look into the k...,['Philip Nel'],http://books.google.com/books/content?id=IjvHQ...,http://books.google.nl/books?id=IjvHQsCn_pgC&p...,A&C Black,2005-01-01,http://books.google.nl/books?id=IjvHQsCn_pgC&d...,['Biography & Autobiography'],,1
2,Wonderful Worship in Smaller Churches,This resource includes twelve principles in un...,['David R. Ray'],http://books.google.com/books/content?id=2tsDA...,http://books.google.nl/books?id=2tsDAAAACAAJ&d...,,2000,http://books.google.nl/books?id=2tsDAAAACAAJ&d...,['Religion'],,2
3,Whispers of the Wicked Saints,Julia Thomas finds her life spinning out of co...,['Veronica Haddon'],http://books.google.com/books/content?id=aRSIg...,http://books.google.nl/books?id=aRSIgJlq6JwC&d...,iUniverse,2005-02,http://books.google.nl/books?id=aRSIgJlq6JwC&d...,['Fiction'],,3
4,"Nation Dance: Religion, Identity and Cultural ...",,['Edward Long'],,http://books.google.nl/books?id=399SPgAACAAJ&d...,,2003-03-01,http://books.google.nl/books?id=399SPgAACAAJ&d...,unk,,4


In [23]:
#Asignación de tokens a las categorías
metadata['categories'].nunique()

10884

In [25]:
for index, row in metadata.iterrows():
    metadata.at[index, 'tokens_cat'] = row['categories'].split()

metadata.head()

Unnamed: 0,Title,description,authors,image,previewLink,publisher,publishedDate,infoLink,categories,ratingsCount,Token_titulo,tokens_cat
0,Its Only Art If Its Well Hung!,,['Julie Strain'],http://books.google.com/books/content?id=DykPA...,http://books.google.nl/books?id=DykPAAAACAAJ&d...,,1996,http://books.google.nl/books?id=DykPAAAACAAJ&d...,['Comics & Graphic Novels'],,0,"[['Comics, &, Graphic, Novels']]"
1,Dr. Seuss: American Icon,Philip Nel takes a fascinating look into the k...,['Philip Nel'],http://books.google.com/books/content?id=IjvHQ...,http://books.google.nl/books?id=IjvHQsCn_pgC&p...,A&C Black,2005-01-01,http://books.google.nl/books?id=IjvHQsCn_pgC&d...,['Biography & Autobiography'],,1,"[['Biography, &, Autobiography']]"
2,Wonderful Worship in Smaller Churches,This resource includes twelve principles in un...,['David R. Ray'],http://books.google.com/books/content?id=2tsDA...,http://books.google.nl/books?id=2tsDAAAACAAJ&d...,,2000,http://books.google.nl/books?id=2tsDAAAACAAJ&d...,['Religion'],,2,[['Religion']]
3,Whispers of the Wicked Saints,Julia Thomas finds her life spinning out of co...,['Veronica Haddon'],http://books.google.com/books/content?id=aRSIg...,http://books.google.nl/books?id=aRSIgJlq6JwC&d...,iUniverse,2005-02,http://books.google.nl/books?id=aRSIgJlq6JwC&d...,['Fiction'],,3,[['Fiction']]
4,"Nation Dance: Religion, Identity and Cultural ...",,['Edward Long'],,http://books.google.nl/books?id=399SPgAACAAJ&d...,,2003-03-01,http://books.google.nl/books?id=399SPgAACAAJ&d...,unk,,4,[unk]


In [26]:
mapeoTokens = dict(zip(metadata['Title'], metadata['tokens_cat']))
ratings['tokens_cat'] =ratings['Title'].map(mapeoTokens)

In [27]:
ratings.head()

Unnamed: 0,Id,Title,Price,User_id,profileName,review/helpfulness,review/score,review/time,review/summary,review/text,tokens_cat
2971448,B000G167FA,Silver Pennies,,AAFZZHA2I598B,Byron C. Benson,4/4,5.0,1969-12-31 23:59:59,An incomparable children's classic,This book of children's poems has been enjoyed...,[unk]
2971449,B000G167FA,Silver Pennies,,A1MCQGDJDSPJFF,SHOCKgBLUE@aol.com,4/4,5.0,1969-12-31 23:59:59,Unequalled Collection of Children's Poetry for...,I was introduced to this marvelous collection ...,[unk]
75754,0786280670,"Julie and Julia: 365 Days, 524 Recipes, 1 Tiny...",,A2FBKQ7J7D0Z36,Angie789,7/10,2.0,1969-12-31 23:59:59,disappointing,"I love both cooking and reading blogs, so I th...","[['Cookery,, French']]"
75753,0786280670,"Julie and Julia: 365 Days, 524 Recipes, 1 Tiny...",,A7PNOKN67P7T3,"Mary Harris ""beachreader""",7/10,1.0,1969-12-31 23:59:59,waste of time and money,A book as messy and nasty as some of the autho...,"[['Cookery,, French']]"
2152018,B000KPX7RI,"McKeachie's Teaching Tips, Strategies Research...",,A1AY4QM3FDINBQ,Sujini2,0/0,5.0,1969-12-31 23:59:59,Pretty good.,Pretty good at providing me with the fundament...,[['Education']]


In [28]:
#Asignación de tokens de título a df ratings a través de un diccionario
mapeoTitulos = dict(zip(metadata['Title'], metadata['Token_titulo']))
ratings['Token_titulos'] = ratings['Title'].map(mapeoTitulos)

In [29]:
#Agrupamos el data frame por el id del usuario

ratingsAgrupados = ratings.groupby(['User_id'], sort=False)
ratingsAgrupados.head()

Unnamed: 0,Id,Title,Price,User_id,profileName,review/helpfulness,review/score,review/time,review/summary,review/text,tokens_cat,Token_titulos
2971448,B000G167FA,Silver Pennies,,AAFZZHA2I598B,Byron C. Benson,4/4,5.0,1969-12-31 23:59:59,An incomparable children's classic,This book of children's poems has been enjoyed...,[unk],130606
2971449,B000G167FA,Silver Pennies,,A1MCQGDJDSPJFF,SHOCKgBLUE@aol.com,4/4,5.0,1969-12-31 23:59:59,Unequalled Collection of Children's Poetry for...,I was introduced to this marvelous collection ...,[unk],130606
75754,0786280670,"Julie and Julia: 365 Days, 524 Recipes, 1 Tiny...",,A2FBKQ7J7D0Z36,Angie789,7/10,2.0,1969-12-31 23:59:59,disappointing,"I love both cooking and reading blogs, so I th...","[['Cookery,, French']]",5197
75753,0786280670,"Julie and Julia: 365 Days, 524 Recipes, 1 Tiny...",,A7PNOKN67P7T3,"Mary Harris ""beachreader""",7/10,1.0,1969-12-31 23:59:59,waste of time and money,A book as messy and nasty as some of the autho...,"[['Cookery,, French']]",5197
2152018,B000KPX7RI,"McKeachie's Teaching Tips, Strategies Research...",,A1AY4QM3FDINBQ,Sujini2,0/0,5.0,1969-12-31 23:59:59,Pretty good.,Pretty good at providing me with the fundament...,[['Education']],165632
...,...,...,...,...,...,...,...,...,...,...,...,...
1017673,155927140X,The Enchanted Cat,,AOS6V51CJM8NQ,"Ted M. Sweitzer ""Judy Sweitzer""",0/0,4.0,2013-03-04 00:00:00,For the Cat Lover,"A nice gift book, my husband bought it for me ...","[['Body,, Mind, &, Spirit']]",28597
1564116,1423315537,Firehouse,,AO17E7H0S0PBU,TiffanyW,0/0,5.0,2013-03-04 00:00:00,Love this book,"This book is a poignant, emotional, humorous, ...",[['Fiction']],111025
960831,1850891648,Frankenstein (Isis Large Print Fiction),,A3SFQL1UXFOQY,Kylee Bennett,0/0,5.0,2013-03-04 00:00:00,Good,"This is my favorite, it is very eventful and i...",[unk],68441
1888684,0435130994,Skeleton Key (New Windmills),,A1F8YGI6MX27KC,Sharon Rogers,0/0,5.0,2013-03-04 00:00:00,Amazing,This was action packed from beginning to end.T...,[unk],146628


In [30]:
ratingsAgrupados.get_group('AVCGYZL8FQQTD',)

  ratingsAgrupados.get_group('AVCGYZL8FQQTD',)


Unnamed: 0,Id,Title,Price,User_id,profileName,review/helpfulness,review/score,review/time,review/summary,review/text,tokens_cat,Token_titulos
1282430,1882931335,Six Foot One and Worth the Climb,,AVCGYZL8FQQTD,"Jim of Oz ""jim-of-oz""",16/18,4.0,1999-10-17,"If you like Julie, you'll like this pictorial ...",A pictorial exploration of Julie. There is a l...,[unk],91711
0,1882931173,Its Only Art If Its Well Hung!,,AVCGYZL8FQQTD,"Jim of Oz ""jim-of-oz""",7/7,4.0,1999-10-23,Nice collection of Julie Strain images,This is only for Julie Strain fans. It's a col...,"[['Comics, &, Graphic, Novels']]",0
1293256,B000TDGZO8,Under Fire - A Corps Novel,,AVCGYZL8FQQTD,"Jim of Oz ""jim-of-oz""",2/3,4.0,2005-02-03,tired characters - good stories,Basically I want to reiterate what most everyo...,[['Fiction']],92553


In [31]:
# Armar los historiales de reseñas por usuario
user_histories = {}
for user_id, group in ratingsAgrupados:
    user_histories[user_id[0]] = list(zip(group['Token_titulos'], group['review/score'], group['review/time'], group['tokens_cat']))

In [32]:
user_histories["AVCGYZL8FQQTD"]

[(91711, 4.0, Timestamp('1999-10-17 00:00:00'), ['unk']),
 (0,
  4.0,
  Timestamp('1999-10-23 00:00:00'),
  ["['Comics", '&', 'Graphic', "Novels']"]),
 (92553, 4.0, Timestamp('2005-02-03 00:00:00'), ["['Fiction']"])]

In [33]:
#Filtrado para hacer conservar usuarios con más de 4 reseñas

historial_filtrado = {usuario:historial for usuario, historial in user_histories.items() if len(historial)>3}

In [34]:
print(len(historial_filtrado))
print(len(user_histories))

114069
1008972


In [35]:
# training loop skeleton
# model = BERT4RecWithRatings(vocab_size=..., d_model=..., ...)
# opt = torch.optim.Adam(model.parameters(), lr=3e-4)
# mlm_loss_fn = nn.CrossEntropyLoss(ignore_index=-100)
# rating_loss_fn = nn.MSELoss()  # if predicting scalar ratings normalized to [0,1]

# for epoch in range(epochs):
#     for batch in dataloader:
#         item_seq, rating_seq, attn_mask = batch['items'], batch['ratings'], batch['attn']
#         input_seq, mlm_labels, mask_positions = create_masked_input(item_seq, mask_token_id=MASK_ID)
#         logits, rating_pred = model(input_seq, rating_seq, attn_mask)
#         # MLM loss: reshape to (B*L, vocab)
#         mlm_loss = mlm_loss_fn(logits.view(-1, logits.size(-1)), mlm_labels.view(-1))
#         # Rating loss: compute only at masked positions where ground truth rating exists
#         if rating_seq is not None:
#             # rating_targets: maybe normalized to 0..1
#             rating_targets = rating_seq.float()  # shape (B, L)
#             # Only compute where mask_positions is True (and not pad)
#             if mask_positions.any():
#                 pred_masked = rating_pred[mask_positions]
#                 true_masked = rating_targets[mask_positions]
#                 rating_loss = rating_loss_fn(pred_masked, true_masked)
#             else:
#                 rating_loss = torch.tensor(0.0, device=mlm_loss.device)
#         else:
#             rating_loss = torch.tensor(0.0, device=mlm_loss.device)

#         loss = mlm_loss + 0.5 * rating_loss  # alpha=0.5 as example
#         opt.zero_grad()
#         loss.backward()
#         opt.step()