In [1]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from torch.utils.data import DataLoader, Dataset

In [2]:
if torch.cuda.is_available():
    device='cuda'
else:
    device='cpu'

In [46]:
df = pd.read_parquet("trainf.parquet", engine='pyarrow')
df

Unnamed: 0,id1,id2,id3,id4,id5,y,f1,f2,f3,f4,...,f28_ctr,f30_ctr,f28_has_impression,f29_has_click,f30_has_impression,f31_has_click,f39_41_total_spend,f39_ratio,f40_ratio,f41_ratio
0,1366776_189706075_16-23_2023-11-02 22:22:00.042,1366776,189706075,2023-11-02 22:22:00.042,2023-11-02,0,1,,,,...,0.0,0.011613,True,False,True,True,0.000000,0.0000,0.0,0.0000
1,1366776_89227_16-23_2023-11-01 23:51:24.999,1366776,89227,2023-11-01 23:51:24.999,2023-11-01,0,1,,,,...,0.0,0.012233,True,False,True,True,0.000000,0.0000,0.0,0.0000
2,1366776_35046_16-23_2023-11-01 00:30:59.797,1366776,35046,2023-11-01 00:30:59.797,2023-11-01,0,1,,,,...,0.0,0.005819,True,False,True,True,0.000000,0.0000,0.0,0.0000
3,1366776_6275451_16-23_2023-11-02 22:21:32.261,1366776,6275451,2023-11-02 22:21:32.261,2023-11-02,0,1,,,,...,0.0,0.011613,True,False,True,True,0.000000,0.0000,0.0,0.0000
4,1366776_78053_16-23_2023-11-02 22:21:34.799,1366776,78053,2023-11-02 22:21:34.799,2023-11-02,0,1,,,,...,0.0,0.011613,True,False,True,True,0.000000,0.0000,0.0,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
770159,1896641_87731_16-23_2023-11-02 08:14:21.524,1896641,87731,2023-11-02 08:14:21.524,2023-11-02,0,,,,,...,0.0,0.006691,True,False,True,True,318.000000,0.7861,0.0,0.2139
770160,1896641_505604_16-23_2023-11-02 08:14:24.458,1896641,505604,2023-11-02 08:14:24.458,2023-11-02,0,,,,,...,0.0,0.006691,True,False,True,True,318.000000,0.7861,0.0,0.2139
770161,1896641_25212_16-23_2023-11-02 08:14:25.748,1896641,25212,2023-11-02 08:14:25.748,2023-11-02,0,,,,,...,0.0,0.006691,True,False,True,True,318.000000,0.7861,0.0,0.2139
770162,1900765_95157_16-23_2023-11-02 06:08:25.900,1900765,95157,2023-11-02 06:08:25.900,2023-11-02,0,,,,,...,0.0,0.000000,True,False,True,False,102.500000,0.0000,0.0,1.0000


In [48]:
USER_NUMERICAL = [
    'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10',
    'f11', 'f12', 'f13', 'f18', 'f22', 'f26', 'f27', 'f32', 'f33', 'f35',
    'f36', 'f38', 'f39', 'f40', 'f41', 'f43', 'f44', 'f45', 'f46', 'f47',
    'f49', 'f51', 'f58', 'f59', 'f68', 'f77', 'f78', 'f79', 'f81', 'f82',
    'f83', 'f85', 'f86', 'f87', 'f88', 'f89', 'f90', 'f91', 'f92', 'f93',
    'f94', 'f95', 'f96', 'f97', 'f98', 'f99', 'f100', 'f101', 'f102', 'f103',
    'f104', 'f105', 'f106', 'f107', 'f108', 'f109', 'f110', 'f111', 'f113', 'f114',
    'f115', 'f116', 'f117', 'f118', 'f119', 'f120', 'f121', 'f123', 'f124', 'f125',
    'f126', 'f127', 'f128', 'f129', 'f130', 'f131', 'f132', 'f133', 'f134', 'f137',
    'f138', 'f139', 'f140', 'f141', 'f142', 'f143', 'f144', 'f145', 'f146', 'f147',
    'f148', 'f149', 'f150', 'f151', 'f152', 'f153', 'f154', 'f155', 'f156', 'f157',
    'f158', 'f159', 'f160', 'f161', 'f162', 'f163', 'f164', 'f165', 'f166', 'f167',
    'f169', 'f170', 'f171', 'f172', 'f173', 'f174', 'f175', 'f176', 'f177', 'f178',
    'f179', 'f180', 'f181', 'f182', 'f183', 'f184', 'f185', 'f186', 'f187', 'f188',
    'f189', 'f190', 'f191', 'f192', 'f193', 'f194', 'f195', 'f196', 'f197', 'f198',
    'f199', 'f200', 'f201', 'f202', 'f203', 'f204', 'f205', 'f206', 'f207', 'f208',
    'f209', 'f210', 'f211', 'f212', 'f213', 'f214', 'f215', 'f216', 'f217', 'f219',
    'f220', 'f221', 'f222', 'f223', 'f224', 'f225', 'f310', 'f311', 'f312', 'f313',
    'f314', 'f315', 'f316', 'f317', 'f318', 'f319', 'f320', 'f321', 'f322', 'f323',
    'f324', 'f325', 'f326', 'f327', 'f328', 'f329', 'f330', 'f331', 'f332', 'f333',
    'f336', 'f337', 'f338', 'f339', 'f340', 'f341', 'f342', 'f343', 'f344', 'f345',
    'f346', 'f347', 'f348', 'f352', 'f353', 'f355', 'f356', 'f357', 'f358', 'f359',
    'f360', 'f361', 'f362', 'f363', 'f364', 'f365', 'f366', 'f28_ctr', 'f30_ctr', 
    'f39_41_total_spend', 'f39_ratio', 'f40_ratio', 'f41_ratio',
]

USER_CATEGORICAL = ["f42", "f48", "f50", "f52", "f53", "f54", "f55", "f56", "f57"]

OFFER_ID = 'id3'

OFFER_CONTEXTUAL = ["f168", "f349", "f350", "f351", "f354"]

OFFER_CATEGORIES = ["f226", "f227", "f228", "f229", "f230", "f231", "f232"]

OFFER_SUBCATEGORIES = [
    'f233', 'f234', 'f235', 'f236', 'f237', 'f238', 'f239', 'f240', 'f241', 'f242',
    'f243', 'f244', 'f245', 'f246', 'f247', 'f248', 'f249', 'f250', 'f251', 'f252',
    'f253', 'f254', 'f255', 'f256', 'f257', 'f258', 'f259', 'f260', 'f261', 'f262',
    'f263', 'f264', 'f265', 'f266', 'f267', 'f268', 'f269', 'f270', 'f271', 'f272',
    'f273', 'f274', 'f275', 'f276', 'f277', 'f278', 'f279', 'f280', 'f281', 'f282',
    'f283', 'f284', 'f285', 'f286', 'f287', 'f288', 'f289', 'f290', 'f291', 'f292',
    'f293', 'f294', 'f295', 'f296', 'f297', 'f298', 'f299', 'f300', 'f301', 'f302',
    'f303', 'f304', 'f305', 'f306', 'f307', 'f308', 'f309',
]

OFFER_FEATURES = OFFER_CONTEXTUAL + OFFER_CATEGORIES + OFFER_SUBCATEGORIES


INTERACTION_FEATURES = [
    "f28",  # Cumulative OET offer impressions (with decay)
    "f29",  # Cumulative OET offer clicks (with decay)
    "f30",  # Cumulative merchant offer impressions (with decay)
    "f31",  # Cumulative merchant offer clicks (with decay)
    "f28_has_impression",  # Boolean: has OET impressions
    "f29_has_click",       # Boolean: has OET clicks
    "f30_has_impression",  # Boolean: has merchant impressions
    "f31_has_click"        # Boolean: has merchant clicks
]

LABEL = 'y'
USER_ID = 'id2'
TIMESTAMP = 'id4'

In [47]:
# Keep id2 (user_id) for MAP@7 evaluation, drop others
user_ids = df['id2'].copy()
df.drop(columns=['id1','id2','id4','id5'], inplace=True)

In [None]:
le_user = {}
for col in USER_CATEGORICAL:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    le_user[col] = le

le_offer = LabelEncoder()
df[OFFER_ID] = le_offer.fit_transform(df[OFFER_ID].astype(str))
num_offers = len(le_offer.classes_)


for col in USER_NUMERICAL:
    if df[col].dtype in ['Int8', 'Int16', 'Int32', 'int64']:
        df[col] = df[col].fillna(0).astype('float32')
    elif df[col].dtype == 'bool':
        df[col] = df[col].fillna(False).astype('float32')
    elif df[col].dtype == 'float32':
        df[col] = df[col].fillna(0.0)

# Scale user numerical features
scaler_user = StandardScaler()
df[USER_NUMERICAL] = scaler_user.fit_transform(df[USER_NUMERICAL])

# Replace any remaining inf/nan after scaling
df[USER_NUMERICAL] = df[USER_NUMERICAL].replace([np.inf, -np.inf], 0)
df[USER_NUMERICAL] = df[USER_NUMERICAL].fillna(0)

# Convert interaction features to float (handle bool and nullable ints)
for col in INTERACTION_FEATURES:
    if df[col].dtype in ['Int8', 'Int16', 'Int32', 'int64']:
        df[col] = df[col].fillna(0).astype('float32')
    elif df[col].dtype == 'bool':
        df[col] = df[col].fillna(False).astype('float32')
    elif df[col].dtype == 'float32':
        df[col] = df[col].fillna(0.0)

# Define feature columns
user_features_cols = USER_NUMERICAL + USER_CATEGORICAL
offer_id_col = OFFER_ID
cat_features_cols = INTERACTION_FEATURES

In [39]:
# Check for NaN and inf values
print(f"NaN in df: {df.isna().sum().sum()}")
print(f"Inf in user numerical: {np.isinf(df[USER_NUMERICAL].values).sum()}")
print(f"Label distribution: {df[LABEL].value_counts()}")
print(f"Label dtype: {df[LABEL].dtype}")
print(f"Sample labels: {df[LABEL].head(20).tolist()}")

NaN in df: 1194516
Inf in user numerical: 0
Label distribution: y
0    733113
1     37051
Name: count, dtype: int64
Label dtype: int64
Sample labels: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [None]:
# Check which columns still have NaN
nan_cols = df.columns[df.isna().any()].tolist()
print(f"Columns with NaN: {nan_cols[:20]}")
print(f"Total columns with NaN: {len(nan_cols)}")

print(f"\nNaN in USER_NUMERICAL: {df[USER_NUMERICAL].isna().sum().sum()}")
print(f"NaN in USER_CATEGORICAL: {df[USER_CATEGORICAL].isna().sum().sum()}")
print(f"NaN in INTERACTION_FEATURES: {df[INTERACTION_FEATURES].isna().sum().sum()}")

Columns with NaN: ['f22_others', 'f168', 'f218', 'f351', 'f354']
Total columns with NaN: 5

NaN in USER_NUMERICAL: 0
NaN in USER_CATEGORICAL: 0
NaN in INTERACTION_FEATURES: 0


In [50]:
X = df[user_features_cols + [offer_id_col] + cat_features_cols]
y = df[LABEL]
X_train, X_test, y_train, y_test, user_train, user_test = train_test_split(
    X, y, user_ids, test_size=0.2, random_state=42
)

In [None]:

class CustomDataset(Dataset):
    def __init__(self, user_features, offer_ids, cat_features, labels):
        self.user_features = torch.tensor(user_features.values, dtype=torch.float32)
        self.offer_ids = torch.tensor(offer_ids.values, dtype=torch.long)
        self.cat_features = torch.tensor(cat_features.values, dtype=torch.float32)
        self.labels = torch.tensor(labels.values, dtype=torch.float32)
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return self.user_features[idx], self.offer_ids[idx], self.cat_features[idx], self.labels[idx]

train_dataset = CustomDataset(X_train[user_features_cols], X_train[offer_id_col], X_train[cat_features_cols], y_train)
test_dataset = CustomDataset(X_test[user_features_cols], X_test[offer_id_col], X_test[cat_features_cols], y_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [23]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 770164 entries, 0 to 770163
Columns: 343 entries, id3 to f41_ratio
dtypes: Int16(64), Int32(9), Int8(14), bool(93), category(11), float32(150), int64(1), object(1)
memory usage: 723.5+ MB


In [None]:
class SimpleTripleTower(nn.Module):
    def __init__(self, num_user_features, num_offers, num_cat_features, 
                 embedding_dim=16, hidden_dim=64):
        super().__init__()
        
        # User Tower
        self.user_tower = nn.Sequential(
            nn.Linear(num_user_features, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim, 32),
            nn.BatchNorm1d(32)
        )
        
        # Offer Tower
        self.offer_embedding = nn.Embedding(num_offers, embedding_dim)
        self.offer_tower = nn.Sequential(
            nn.Linear(embedding_dim, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(32, 32),
            nn.BatchNorm1d(32)
        )
        
        self.interaction_tower = nn.Sequential(
            nn.Linear(num_cat_features, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim, 32),
            nn.BatchNorm1d(32)
        )
        
        # Gating
        self.gate = nn.Sequential(
            nn.Linear(32 * 3, 32),
            nn.ReLU(),
            nn.Linear(32, 3),
            nn.Softmax(dim=1)
        )
        
        # Final prediction
        self.final = nn.Sequential(
            nn.Linear(32 * 3, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    
    def forward(self, user_features, offer_ids, cat_features):
        # Get embeddings from each tower
        user_emb = self.user_tower(user_features)
        
        offer_emb = self.offer_embedding(offer_ids)
        offer_emb = self.offer_tower(offer_emb)
        
        interaction_emb = self.interaction_tower(cat_features)
        
        # Gating
        combined = torch.cat([user_emb, offer_emb, interaction_emb], dim=1)
        gates = self.gate(combined)
        
        # Apply gates
        weighted_user = user_emb * gates[:, 0:1]
        weighted_offer = offer_emb * gates[:, 1:2]
        weighted_interaction = interaction_emb * gates[:, 2:3]
        
        # Combine and predict
        final_input = torch.cat([weighted_user, weighted_offer, weighted_interaction], dim=1)
        output = self.final(final_input)
        
        return output


In [53]:
# Instantiate model
num_user_features = len(user_features_cols)
num_cat_features = len(cat_features_cols)
model = SimpleTripleTower(num_user_features, num_offers, num_cat_features)
model.to(device)

# Loss and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)  # Reduced learning rate

In [54]:
# Training loop
epochs = 10
for epoch in range(epochs):
    model.train()
    train_loss = 0
    for user, offer, cat, label in train_loader:
        user, offer, cat, label = user.to(device), offer.to(device), cat.to(device), label.to(device)
        optimizer.zero_grad()
        output = model(user, offer, cat)
        loss = criterion(output.squeeze(), label)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()
        train_loss += loss.item()
    
    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for user, offer, cat, label in test_loader:
            user, offer, cat, label = user.to(device), offer.to(device), cat.to(device), label.to(device)
            output = model(user, offer, cat)
            val_loss += criterion(output.squeeze(), label).item()
    
    print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(test_loader):.4f}')

Epoch 1/10, Train Loss: 0.1434, Val Loss: 0.1092
Epoch 2/10, Train Loss: 0.1095, Val Loss: 0.1029
Epoch 3/10, Train Loss: 0.1045, Val Loss: 0.0985
Epoch 4/10, Train Loss: 0.1011, Val Loss: 0.0960
Epoch 5/10, Train Loss: 0.0987, Val Loss: 0.0950
Epoch 6/10, Train Loss: 0.0968, Val Loss: 0.0942
Epoch 7/10, Train Loss: 0.0953, Val Loss: 0.0908
Epoch 8/10, Train Loss: 0.0943, Val Loss: 0.0935
Epoch 9/10, Train Loss: 0.0933, Val Loss: 0.0896
Epoch 10/10, Train Loss: 0.0924, Val Loss: 0.0881


In [55]:
# MAP@7 Evaluation
def calculate_map_at_k(model, X_test, y_test, user_test, user_features_cols, offer_id_col, cat_features_cols, device, k=7):
    """Calculate Mean Average Precision at K for each user"""
    model.eval()
    
    # Create a dataframe with predictions
    test_df = pd.DataFrame({
        'user_id': user_test.values,
        'true_label': y_test.values
    })
    
    # Get model predictions
    with torch.no_grad():
        user_feats = torch.tensor(X_test[user_features_cols].values, dtype=torch.float32).to(device)
        offer_ids = torch.tensor(X_test[offer_id_col].values, dtype=torch.long).to(device)
        cat_feats = torch.tensor(X_test[cat_features_cols].values, dtype=torch.float32).to(device)
        
        outputs = model(user_feats, offer_ids, cat_feats)
        predictions = torch.sigmoid(outputs.squeeze()).cpu().numpy()
    
    test_df['prediction'] = predictions
    test_df['offer_id'] = X_test[offer_id_col].values
    
    # Calculate MAP@7 for each user
    user_aps = []
    
    for user_id, user_data in test_df.groupby('user_id'):
        # Sort by prediction score (descending)
        user_data = user_data.sort_values('prediction', ascending=False)
        
        # Get top k predictions
        top_k = user_data.head(k)
        
        # Calculate average precision
        relevant_items = top_k['true_label'].values
        
        if relevant_items.sum() == 0:
            # No relevant items for this user
            continue
        
        precision_at_i = []
        num_relevant = 0
        
        for i, is_relevant in enumerate(relevant_items):
            if is_relevant == 1:
                num_relevant += 1
                precision_at_i.append(num_relevant / (i + 1))
        
        if len(precision_at_i) > 0:
            ap = np.mean(precision_at_i)
            user_aps.append(ap)
    
    map_score = np.mean(user_aps) if user_aps else 0.0
    return map_score, user_aps

# Calculate MAP@7
map7_score, all_aps = calculate_map_at_k(
    model, X_test, y_test, user_test, 
    user_features_cols, offer_id_col, cat_features_cols, 
    device, k=7
)
print(f"MAP@7 on validation set: {map7_score:.4f}")
print(f"Number of users evaluated: {len(all_aps)}")
print(f"AP distribution - Min: {np.min(all_aps):.4f}, Max: {np.max(all_aps):.4f}, Median: {np.median(all_aps):.4f}")

MAP@7 on validation set: 0.7407
Number of users evaluated: 2054
AP distribution - Min: 0.1429, Max: 1.0000, Median: 1.0000
