In [None]:
import os
import shutil

import networkx as nx
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold, train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.utils import to_dense_batch
from torch_geometric.utils import from_scipy_sparse_matrix
from torch_geometric.data import Data, DataLoader

In [None]:

seed_value = 2021
lr = 0.0001
epochs = 500
alpha = 0.5
beta = 0.5
timestep = 10
maxlen = 64
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
class MSDMT(nn.Module):
    def __init__(self,
                 timestep=10,
                 portrait_dim=32,
                 behavior_num=100 + 1,
                 behavior_emb_dim=16,
                 behavior_maxlen=64,
                 behavior_dim=32,
                 network_dim=32,
                 dropout=0.5):
        super(MSDMT, self).__init__()

        self.timestep = timestep
        self.dropout = dropout
        self.portrait_dim = portrait_dim
        self.behavior_num = behavior_num
        self.behavior_emb_dim = behavior_emb_dim
        self.behavior_maxlen = behavior_maxlen
        self.behavior_dim = behavior_dim
        self.network_dim = network_dim

        # portrait network
        self.portrait_lstm = nn.LSTM(input_size=self.portrait_dim, hidden_size=self.portrait_dim, batch_first=True)
        self.portrait_norm = nn.LayerNorm(self.portrait_dim)
        self.portrait_dense = nn.Linear(self.portrait_dim, self.portrait_dim, bias=False)

        # behavior network
        self.behavior_embedding = nn.Embedding(num_embeddings=self.behavior_num, embedding_dim=self.behavior_emb_dim, padding_idx=0)
        self.behavior_conv1d = nn.Conv1d(in_channels=self.behavior_emb_dim, out_channels=self.behavior_dim, kernel_size=3, padding=1)
        self.behavior_lstm = nn.LSTM(input_size=self.behavior_dim, hidden_size=self.behavior_dim, batch_first=True)
        self.behavior_norm = nn.LayerNorm(self.behavior_dim)
        self.behavior_dense = nn.Linear(self.behavior_dim, self.behavior_dim, bias=False)

        # graph network
        self.gcn_conv = GCNConv(in_channels=self.portrait_dim + self.behavior_dim, out_channels=self.network_dim)
        self.gcn_dropout = nn.Dropout(p=self.dropout)
        self.network_dense = nn.Linear(self.network_dim, self.network_dim)

        # output layers
        self.output1 = nn.Linear(self.network_dim, 1)
        self.output2 = nn.Linear(self.network_dim, 1)

    def forward(self, inputs):
        U, B, A = inputs  # U: user features, B: behavior sequence, A: adjacency matrix

        # portrait network
        H, _ = self.portrait_lstm(U)
        H = H[:, -1, :]  # last time step
        H = self.portrait_norm(H)
        H = F.relu(self.portrait_dense(H))

        # behavior network
        B_emb = self.behavior_embedding(B)  # shape: (batch, behavior_maxlen, behavior_emb_dim)
        B_emb = B_emb.permute(0, 2, 1)  # switch to (batch, channels, time)
        B_conv = F.relu(self.behavior_conv1d(B_emb))  # shape: (batch, behavior_dim, behavior_maxlen)
        B_pooled = torch.mean(B_conv, dim=2)  # global average pooling (batch, behavior_dim)
        B_pooled = B_pooled.unsqueeze(1).repeat(1, self.timestep, 1)  # shape: (batch, timestep, behavior_dim)
        O, _ = self.behavior_lstm(B_pooled)  # LSTM
        O = O[:, -1, :]  # last time step
        O = self.behavior_norm(O)
        O = F.relu(self.behavior_dense(O))

        # concatenate portrait and behavior features
        X = torch.cat([H, O], dim=-1)

        # graph network
        X_dense, mask = to_dense_batch(X, batch=A)  # convert sparse graph to dense batch
        V = F.relu(self.gcn_conv(X_dense, A))
        V = self.gcn_dropout(V)
        V = F.relu(self.network_dense(V))

        # outputs
        output1 = torch.sigmoid(self.output1(V))
        output2 = self.output2(V)
        return output1, output2


In [None]:
# set random seed
torch.manual_seed(seed_value)
np.random.seed(seed_value)

def data_process(timestep=10, maxlen=64):
    df_U = pd.read_csv('../data/sample_data_player_portrait.csv')
    df_B = pd.read_csv('../data/sample_data_behavior_sequence.csv')
    df_G = pd.read_csv('../data/sample_data_social_network.csv')
    df_Y = pd.read_csv('../data/sample_data_label.csv')

    # user features
    U = df_U.drop(['uid', 'ds'], axis=1).values
    U = U.reshape(-1, timestep, U.shape[-1])
    U = torch.tensor(U, dtype=torch.float32)

    # behavior sequences
    B = df_B['seq'].apply(lambda x: x.split(',') if pd.notna(x) else []).values
    B = torch.tensor(
        nn.utils.rnn.pad_sequence(
            [torch.tensor(list(map(int, seq)), dtype=torch.long) for seq in B],
            batch_first=True,
            padding_value=0
        ),
        dtype=torch.long
    ).reshape(-1, timestep, maxlen)

    # social network graph
    G = nx.from_pandas_edgelist(df=df_G, source='src_uid', target='dst_uid', edge_attr=['weight'])
    A = nx.adjacency_matrix(G)
    edge_index, edge_weight = from_scipy_sparse_matrix(A)

    # labels
    y1 = torch.tensor(df_Y['churn_label'].values, dtype=torch.float32).unsqueeze(-1)
    y2 = torch.tensor(np.log(df_Y['payment_label'].values + 1), dtype=torch.float32).unsqueeze(-1)

    print('U:', U.shape)
    print('B:', B.shape)
    print('G:', A.shape)
    print('y1:', y1.shape, 'y2:', y2.shape)

    return U, B, edge_index, edge_weight, y1, y2

In [None]:
U, B, edge_index, edge_weight, y1, y2 = data_process(timestep=timestep, maxlen=maxlen)

# dataset preparation
N = U.shape[0]

dataset = Data(x=torch.cat((U, B), dim=-1), edge_index=edge_index, edge_attr=edge_weight, y1=y1, y2=y2)
data_loader = DataLoader([dataset], batch_size=N)

# model and training
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed_value)

for train_index, test_index in kfold.split(U.numpy(), y1.numpy().ravel()):

    train_index, val_index = train_test_split(train_index, test_size=0.1, random_state=seed_value)

    mask_train = torch.zeros(N, dtype=torch.bool)
    mask_val = torch.zeros(N, dtype=torch.bool)
    mask_test = torch.zeros(N, dtype=torch.bool)
    mask_train[train_index] = True
    mask_val[val_index] = True
    mask_test[test_index] = True

    model = MSDMT(timestep=timestep, behavior_maxlen=maxlen).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion1 = nn.BCEWithLogitsLoss()
    criterion2 = nn.MSELoss()

    best_loss = float('inf')
    patience = 5
    wait = 0

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()

        output1, output2 = model([U.to(device), B.to(device), edge_index.to(device)])
        loss1 = criterion1(output1[mask_train], y1[mask_train].to(device))
        loss2 = criterion2(output2[mask_train], y2[mask_train].to(device))
        loss = alpha * loss1 + beta * loss2
        loss.backward()
        optimizer.step()

        # validation
        model.eval()
        with torch.no_grad():
            val_output1, val_output2 = model([U.to(device), B.to(device), edge_index.to(device)])
            val_loss1 = criterion1(val_output1[mask_val], y1[mask_val].to(device))
            val_loss2 = criterion2(val_output2[mask_val], y2[mask_val].to(device))
            val_loss = alpha * val_loss1 + beta * val_loss2

        print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {loss.item()}, Val Loss: {val_loss.item()}")

        # early stopping
        if val_loss.item() < best_loss:
            best_loss = val_loss.item()
            wait = 0
            torch.save(model.state_dict(), 'best_model.pt')
        else:
            wait += 1
            if wait >= patience:
                print("Early stopping triggered")
                break

    # load the best model for evaluation
    model.load_state_dict(torch.load('best_model.pt'))
    model.eval()

    with torch.no_grad():
        test_output1, test_output2 = model([U.to(device), B.to(device), edge_index.to(device)])
        test_loss1 = criterion1(test_output1[mask_test], y1[mask_test].to(device))
        test_loss2 = criterion2(test_output2[mask_test], y2[mask_test].to(device))
        print(f"Test Loss: {alpha * test_loss1 + beta * test_loss2}")
