In [None]:
def get_feature_columns():
    """构建特征列"""
    # 稀疏特征
    sparse_features = [
        SparseFeat('user_id', vocabulary_size=100, embedding_dim=8),
        SparseFeat('gender', vocabulary_size=2, embedding_dim=4),
        SparseFeat('region', vocabulary_size=5, embedding_dim=4),
        SparseFeat('user_hot', vocabulary_size=3, embedding_dim=4),
        SparseFeat('video_id', vocabulary_size=100, embedding_dim=8),
        SparseFeat('video_type', vocabulary_size=5, embedding_dim=4),
        SparseFeat('video_quality', vocabulary_size=2, embedding_dim=4)
    ]
    
    # 稠密特征
    dense_features = [
        DenseFeat('age', 1),
        DenseFeat('video_length', 1)
    ]
    
    # 序列特征
    seq_features = [
        VarLenSparseFeat(
            SparseFeat('hist_video_id', vocabulary_size=100, embedding_dim=8, embedding_name='video_id'),
            maxlen=50,
            length_name="seq_length"
        ),
        VarLenSparseFeat(
            SparseFeat('hist_video_type', vocabulary_size=5, embedding_dim=4, embedding_name='video_type'),
            maxlen=50,
            length_name="seq_length"
        )
    ]
    
    return sparse_features, dense_features, seq_features

In [None]:
def DIN_MMOE(
    dnn_feature_columns,
    history_feature_list,
    num_experts=4,
    expert_dnn_hidden_units=(256, 128),
    tower_dnn_hidden_units=(64,),
    gate_dnn_hidden_units=(),
    l2_reg_embedding=1e-6,
    l2_reg_dnn=0,
    seed=1024,
    dnn_dropout=0,
    dnn_activation='relu',
    dnn_use_bn=False,
    task_types=('binary', 'regression', 'binary', 'binary'),
    task_names=('is_exceed_5s', 'stay_time', 'is_watch', 'is_buy')
):
    """
    DIN和MMOE结合的多任务学习模型
    
    参数:
        dnn_feature_columns: 特征列配置
        history_feature_list: 历史行为特征列表
        num_experts: 专家网络数量
        expert_dnn_hidden_units: 专家网络隐藏层配置
        tower_dnn_hidden_units: 任务塔网络配置
        gate_dnn_hidden_units: 门控网络配置
        ...
    """
    
    # 构建输入层
    features = build_input_features(dnn_feature_columns)
    inputs_list = list(features.values())
    
    # 处理特征embedding
    sparse_embedding_list, dense_value_list = input_from_feature_columns(
        features, dnn_feature_columns, l2_reg_embedding, seed
    )
    
    # 获取目标item的embedding
    query_embed_list = embedding_lookup(sparse_embedding_list, features, history_feature_list)
    keys_embed_list = embedding_lookup(sparse_embedding_list, features, history_feature_list, history_feature_list)
    dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)

    # DIN注意力层处理
    att_output = AttentionSequencePoolingLayer()(
        [query_embed_list, keys_embed_list, features['seq_length']]
    )
    
    # 合并DIN输出和其他特征
    din_output = tf.concat([dnn_input, att_output], axis=-1)
    # 构建专家网络
    expert_outputs = []
    for i in range(num_experts):
        expert_network = DNN(
            expert_dnn_hidden_units,
            dnn_activation,
            l2_reg_dnn,
            dnn_dropout,
            dnn_use_bn,
            seed=seed,
            name=f'expert_{i}'
        )(din_output)
        expert_outputs.append(expert_network)
    
    expert_concat = tf.stack(expert_outputs, axis=1)  # (batch_size, num_experts, expert_output_dim)
    
    # 构建每个任务的门控网络和输出
    task_outputs = []
    for i, (task_type, task_name) in enumerate(zip(task_types, task_names)):
        # 门控网络
        gate_input = DNN(
            gate_dnn_hidden_units,
            dnn_activation,
            l2_reg_dnn,
            dnn_dropout,
            dnn_use_bn,
            seed=seed,
            name=f'gate_{task_name}'
        )(din_output)
        
        gate_output = Dense(
            num_experts,
            use_bias=False,
            activation='softmax',
            name=f'gate_softmax_{task_name}'
        )(gate_input)
        
        gate_output = tf.expand_dims(gate_output, axis=-1)
        
        # 专家组合
        weighted_expert = reduce_sum(
            expert_concat * gate_output,
            axis=1,
            keep_dims=False,
            name=f'gate_mul_expert_{task_name}'
        )
        
        # 任务塔网络
        tower_output = DNN(
            tower_dnn_hidden_units,
            dnn_activation,
            l2_reg_dnn,
            dnn_dropout,
            dnn_use_bn,
            seed=seed,
            name=f'tower_{task_name}'
        )(weighted_expert)
        
        # 输出层
        logit = Dense(1, use_bias=False)(tower_output)
        output = PredictionLayer(task_type, name=task_name)(logit)
        task_outputs.append(output)
    
    # 构建模型
    model = Model(inputs=inputs_list, outputs=task_outputs)
    return model

In [None]:
def train_model():
    # 获取特征列配置
    sparse_features, dense_features, seq_features = get_feature_columns()
    dnn_feature_columns = sparse_features + dense_features + seq_features
    history_feature_list = ['video_id', 'video_type']
    
    # 构建模型
    model = DIN_MMOE(
        dnn_feature_columns,
        history_feature_list,
        num_experts=4,
        expert_dnn_hidden_units=(256, 128),
        tower_dnn_hidden_units=(64,),
        task_types=('binary', 'regression', 'binary', 'binary'),
        task_names=('is_exceed_5s', 'stay_time', 'is_watch', 'is_buy')
    )
    
    # 编译模型
    model.compile(
        optimizer="adam",
        loss={
            'is_exceed_5s': 'binary_crossentropy',
            'stay_time': 'mse',
            'is_watch': 'binary_crossentropy',
            'is_buy': 'binary_crossentropy'
        },
        loss_weights={
            'is_exceed_5s': 1.0,
            'stay_time': 1.0,
            'is_watch': 1.0,
            'is_buy': 1.0
        },
        metrics={
            'is_exceed_5s': ['AUC'],
            'stay_time': ['mse'],
            'is_watch': ['AUC'],
            'is_buy': ['AUC']
        }
    )
    
    # 训练模型
    history = model.fit(
        train_model_input,
        [train['is_exceed_5s'], train['stay_time'], 
         train['is_watch'], train['is_buy']],
        batch_size=256,
        epochs=10,
        validation_split=0.2
    )