In [1]:
import sys
import os
import json
import tensorflow as tf
import tqdm
from models.model import *
from ltv_utils import *
from losses.custom_loss import *
pd.set_option('display.float_format', '{:.4f}'.format)  # 保留10位小数，可调整
import warnings
warnings.simplefilter(action='ignore', category=UserWarning)




def parse_function(serialized_example):
    feature_description = {
        'deviceid': tf.io.FixedLenFeature([], tf.string),
        'install_date': tf.io.FixedLenFeature([], tf.string),
        'dim_os_name1': tf.io.FixedLenFeature([], tf.string),
        'creative_classify1': tf.io.FixedLenFeature([], tf.string),
        'total_pay_amount1':  tf.io.FixedLenFeature([], tf.float32),
         'channel1': tf.io.FixedLenFeature([], tf.string),
        'b2_sale_amt_bias':  tf.io.FixedLenFeature([], tf.int64),
         'b2_sale_amt_7d': tf.io.FixedLenFeature([], tf.int64),
         'install_time': tf.io.FixedLenFeature([], tf.string),
        'install_order_diff':  tf.io.FixedLenFeature([], tf.int64),
        'all_install_order_7d_diff':  tf.io.FixedLenFeature([], tf.int64),
        'is_a1x_a33':  tf.io.FixedLenFeature([], tf.int64),
        'platform_label':  tf.io.FixedLenFeature([], tf.string),
        'user_dense_price_features': tf.io.FixedLenFeature([len(group_2_features['user_dense_price_features'])], tf.float32),
        'user_dense_duration_features': tf.io.FixedLenFeature([len(group_2_features['user_dense_duration_features'])], tf.float32),
        'user_dense_features': tf.io.FixedLenFeature([len(group_2_features['user_dense_features'])], tf.float32),
        'user_sparse_features': tf.io.FixedLenFeature([len(group_2_features['user_sparse_features'])], tf.float32)
    }
    example = tf.io.parse_single_example(serialized_example, feature_description)
    return example


# load tf records
group_2_features = read_feature_json_config('features/feature_list.json')
file_name = 'data/loca_test_tf.tfrecords'
data_path = file_name

dataset = tf.data.TFRecordDataset(data_path)
dataset = dataset.map(parse_function)
dataset = dataset.prefetch(buffer_size=10000)
dataset = dataset.batch(2048)


user_dense_price_features = group_2_features['user_dense_price_features']
user_dense_duration_features = group_2_features['user_dense_duration_features']
user_dense_features = group_2_features['user_dense_features']
user_sparse_features = group_2_features['user_sparse_features']


In [35]:
def create_tf_dataset(dataset):
    sample_batch = next(iter(dataset))
    sample_data = {k: v for k, v in sample_batch.items() if k not in ['b2_sale_amt_7d', 'total_pay_amount1']}

    # 对 == 4 的 数值变换
    def generator():
        for batch in dataset:
            hour = tf.cast(tf.gather(batch['user_sparse_features'],  indices=0, axis = 1) - 1, tf.int64)    # shape: (batch_size,)
            b2_7d = tf.cast(tf.reshape(batch.pop('b2_sale_amt_7d'), (-1, 1)), tf.float32)
            total_amt_1h = tf.reshape(batch.pop('total_pay_amount1'), (-1, 1))
    
            # ✅ 找到 hour == 4 的位置
            hour_mask = tf.equal(hour, 4)  # shape: (batch_size,)
            hour_mask = tf.reshape(hour_mask, (-1, 1))  # 广播成 (batch_size, 1)
            # ✅ 对应位置加 10000
            b2_7d = tf.where(hour_mask, b2_7d + 1.00, b2_7d)

            
            y_true_packed = tf.concat([b2_7d, total_amt_1h], axis=1)
            yield batch, y_true_packed
        

    # 正确写法：output_signature 中保留每个字段的真实 shape
    output_signature = (
        {
            name: tf.TensorSpec(shape=(None,) + v.shape[1:], dtype=v.dtype)
            for name, v in sample_data.items()
        },
        tf.TensorSpec(shape=(None, 2), dtype=tf.float32)
    )

    return tf.data.Dataset.from_generator(generator, output_signature=output_signature)

In [42]:

emb_features = [
'creative_classify','dim_device_manufacture', 'car_add_type_most','show_order_is_2arrival_latest', 'selecttirecount_most', 'show_order_is_2arrival_most','selecttirecount_latest',
 'new_sitename','advsite','car_add_type_latest','platform_level', 'tire_list_click_avg_index','tire_list_click_most_pid_level','tire_order_page_most_pid_level',
]


model = MULTI_HEAD_LTV_MODEL(5, [512], [200,128,128], 'user_dense_features', 'user_dense_price_features', 'user_dense_duration_features',
                            'user_sparse_features',user_sparse_features, emb_features)

sample = next(iter(dataset))
input_shape = {k: v.shape for k, v in sample.items()}



loss_fn = UnifiedLTVLoss(mode='mse', normalize=False)
model.compile(loss=loss_fn, optimizer = 'adam')

model.fit(
    create_tf_dataset(dataset),
    epochs=1,
)        


Tensor("IteratorGetNext:15", shape=(None, 2), dtype=float32)
Tensor("IteratorGetNext:15", shape=(None, 2), dtype=float32)


<keras.callbacks.History at 0x2a0ba788bb0>

In [43]:
res = model.evaluate(create_tf_dataset(dataset))
res = pd.DataFrame(res)
display(res)

Unnamed: 0,pred sum:,true sum:
0,"tf.Tensor(2686040.2, shape=(), dtype=float32)","tf.Tensor(2323635.0, shape=(), dtype=float32)"
1,"tf.Tensor(2662617.8, shape=(), dtype=float32)","tf.Tensor(2324560.0, shape=(), dtype=float32)"
2,"tf.Tensor(2602250.0, shape=(), dtype=float32)","tf.Tensor(2403513.0, shape=(), dtype=float32)"
3,"tf.Tensor(2624564.8, shape=(), dtype=float32)","tf.Tensor(2364758.0, shape=(), dtype=float32)"
4,"tf.Tensor(2658703.2, shape=(), dtype=float32)","tf.Tensor(2503617.0, shape=(), dtype=float32)"


In [44]:
model.sharebottom.weights

[<tf.Variable 'dense_168/kernel:0' shape=(161, 512) dtype=float32, numpy=
 array([[ 0.10864558,  0.05627091,  0.07785615, ...,  0.02592664,
          0.04326518,  0.06621341],
        [ 0.03090314, -0.06813363, -0.08137404, ...,  0.02333791,
         -0.08165032, -0.0050212 ],
        [-0.01180993,  0.09008521,  0.0207273 , ...,  0.09390711,
          0.04611598,  0.00056279],
        ...,
        [-0.07980931, -0.07302803, -0.07851141, ...,  0.01441339,
          0.0597057 , -0.07536792],
        [ 0.03634158, -0.06791074, -0.02580727, ...,  0.03182675,
         -0.02323934,  0.00420853],
        [-0.08091936, -0.06699446, -0.05119083, ..., -0.06515649,
          0.04037888, -0.04475241]], dtype=float32)>,
 <tf.Variable 'dense_168/bias:0' shape=(512,) dtype=float32, numpy=
 array([ 0.01716729,  0.01459961, -0.0084608 ,  0.01472607,  0.01829593,
         0.01539843,  0.01506466,  0.0200081 ,  0.01677879, -0.0073383 ,
         0.01938145,  0.01885287,  0.02680394,  0.00430138,  0.012027

In [45]:

# 训练第二阶段，只训练head层

model.sharebottom.trainable = False

model.process_dense_layer.trainable = False

model.process_emb_layer.trainable = False

for head in model.hour2headnn:

  head.trainable = True


model.compile(loss=loss_fn, optimizer = 'adam')

model.fit(
    create_tf_dataset(dataset),
    epochs=10,
)        



Epoch 1/10
Tensor("IteratorGetNext:15", shape=(None, 2), dtype=float32)
Tensor("IteratorGetNext:15", shape=(None, 2), dtype=float32)
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2a0beb3a410>

In [46]:
res = model.evaluate(create_tf_dataset(dataset))
res = pd.DataFrame(res)
display(res)

Unnamed: 0,pred sum:,true sum:
0,"tf.Tensor(2600853.0, shape=(), dtype=float32)","tf.Tensor(2323635.0, shape=(), dtype=float32)"
1,"tf.Tensor(2582252.2, shape=(), dtype=float32)","tf.Tensor(2324560.0, shape=(), dtype=float32)"
2,"tf.Tensor(2534499.2, shape=(), dtype=float32)","tf.Tensor(2403513.0, shape=(), dtype=float32)"
3,"tf.Tensor(2538851.8, shape=(), dtype=float32)","tf.Tensor(2364758.0, shape=(), dtype=float32)"
4,"tf.Tensor(2586609.8, shape=(), dtype=float32)","tf.Tensor(2503617.0, shape=(), dtype=float32)"


In [47]:
model.sharebottom.weights

[<tf.Variable 'dense_168/kernel:0' shape=(161, 512) dtype=float32, numpy=
 array([[ 0.10864558,  0.05627091,  0.07785615, ...,  0.02592664,
          0.04326518,  0.06621341],
        [ 0.03090314, -0.06813363, -0.08137404, ...,  0.02333791,
         -0.08165032, -0.0050212 ],
        [-0.01180993,  0.09008521,  0.0207273 , ...,  0.09390711,
          0.04611598,  0.00056279],
        ...,
        [-0.07980931, -0.07302803, -0.07851141, ...,  0.01441339,
          0.0597057 , -0.07536792],
        [ 0.03634158, -0.06791074, -0.02580727, ...,  0.03182675,
         -0.02323934,  0.00420853],
        [-0.08091936, -0.06699446, -0.05119083, ..., -0.06515649,
          0.04037888, -0.04475241]], dtype=float32)>,
 <tf.Variable 'dense_168/bias:0' shape=(512,) dtype=float32, numpy=
 array([ 0.01716729,  0.01459961, -0.0084608 ,  0.01472607,  0.01829593,
         0.01539843,  0.01506466,  0.0200081 ,  0.01677879, -0.0073383 ,
         0.01938145,  0.01885287,  0.02680394,  0.00430138,  0.012027