In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
import sys
sys.path.append("../utils")
sys.path.append("../data/")
from dataconfig import *
from utils import *
import numpy as np 

{'SPARSE_FEATURES': ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26'], 'DENSE_FEATURES': ['I1', 'I2', 'I3', 'I4', 'I5', 'I6', 'I7', 'I8', 'I9', 'I10', 'I11', 'I12', 'I13'], 'label': ['label']}


In [2]:
np.set_printoptions(precision=4, suppress=True)

In [3]:
spase_feature_names = DATA_CONFIG['SPARSE_FEATURES']
dense_feature_names = DATA_CONFIG['DENSE_FEATURES']
label_feature_names = DATA_CONFIG['label']


In [4]:
import platform
if platform.system() != 'Windows':
    dataset = DataUtil(spase_feature_names, dense_feature_names, label_feature_names).read_tfrecord("../data/tf_data/train_criteo_5w_rows.tfrecord", 512)
    eval_data = DataUtil(spase_feature_names, dense_feature_names, label_feature_names).read_tfrecord("../data/tf_data/test_criteo_1w_rows.tfrecord", 512)
    valid_data = DataUtil(spase_feature_names, dense_feature_names, label_feature_names).read_tfrecord("../data/tf_data/valid_criteo_1w_rows.tfrecord", 512)
else:
    dataset = DataUtil(spase_feature_names, dense_feature_names, label_feature_names).read_tfrecord("../data/tf_data/train_criteo_200w_rows.tfrecord", 512)
    eval_data = DataUtil(spase_feature_names, dense_feature_names, label_feature_names).read_tfrecord("../data/tf_data/test_criteo_20w_rows.tfrecord", 512)
    valid_data = DataUtil(spase_feature_names, dense_feature_names, label_feature_names).read_tfrecord("../data/tf_data/valid_criteo_20w_rows.tfrecord", 512)

# 定义 inputs

inputs = { name:keras.Input(shape=(1,), name=name, dtype=tf.float32) for name in dense_feature_names }
inputs.update({
    name:keras.Input(shape=(1,), name=name, dtype=tf.string) for name in spase_feature_names
})


In [16]:
from tensorflow.keras.layers import Layer
from tensorflow.keras import Model 
import tensorflow as tf 
import tensorflow.keras as layers
from tensorflow import keras

In [8]:
class FM(Layer):
    # test_x = tf.random.uniform(shape=(32, 8, 8))
    # fm_test = FM()
    # fm_test(test_x)
    def __init__(self):
        super().__init__()
    def call(self, X):
        # a + b 
        sum_squre = tf.square( tf.reduce_sum(X, axis = 1))
        squre_sum = tf.reduce_sum(tf.square(X), axis = 1)

        return 0.5 * tf.reduce_sum(sum_squre - squre_sum, axis = 1, keepdims=True)

<tf.Tensor: shape=(), dtype=float32, numpy=0.6931472>

In [10]:
class DNN(Layer):
    def __init__(self, units):
        super().__init__()
        self.dnn = keras.Sequential([])

        for unit in units:
            self.dnn.add( keras.layers.Dense(unit,  activation='relu') )
    def call(self, X):
        return self.dnn(X)

class DataProcess(Layer):
    def __init__(self, inputs, dense_features):
        self.dense_features = dense_features
        self.concat = layers.Concatenate()
        
    def call(self, inputs):
        dense_res = [] 
        
        for name, v in inputs:
            if name in dense_features:
                temp = tf.floor( tf.math.log1p(v) / tf.math.log(tf.constant(2.0, dtype=tf.float32)) )
                dense_res.append(temp)
        return self.concat(dense_res)

        
        

class deepFM(Model):
    def __init__(self, inputs, sparse_features, dense_features, units, if_fm = True):

        sparse_features_hashing = {}
        sparse_features_embeddings = {}

        self.sparse_features = sparse_features
        self.dense_features = dense_features
        num_bins = 1000000

        self.dense_feature_layer = DataProcess(inputs, dense_features)
        for name, v in inputs:
            if name in sparse_features:
                sparse_features_hashing[name] = keras.layers.Hashing(num_bins, mask_value='')
                sparse_features_embeddings[name] = keras.layers.Embedding(num_bins,8 , mask_zero=True)

        self.concat = layers.Concatenate()
        self.dnn = DNN(units)
        self.out_layer = Dense(1)
        if is_fm:
            self.fm = FM()
        

    def call(self, inputs):
        dense_res = self.dense_feature_layer(inputs)

        embeddings = []
        for name, v in inputs.items():
            if name in self.sparse_features:
                x = self.sparse_features_hashing[name](v)
                x = self.sparse_features_embeddings[name](x)
                embeddings.append(x)

        all_features_concat = self.concat(embeddings + [dense_res])
        
        logit = 0
        if self.is_fm:
            logit += self.fm(embeddings)
        logit += self.out_layer(self.dnn(all_features_concat))
        
        return logit
        

    def plot_model_in_class(self, inputs):

        # 获取模型的结构
        x = self.call(inputs)  # 调用 call 方法

        # 构建一个新的 Keras 模型，用于绘制图形
        model_for_plot = keras.Model(inputs, x)
        
        # 绘制模型
        tf.keras.utils.plot_model(
            model_for_plot, 
            to_file="custom_model2_plot.png",  # 可指定文件路径
            show_shapes=True, 
            show_layer_names=True,
            rankdir='LR',  # Left to Right 排列
            dpi=75
        )
        print("Model plot saved as custom_model2_plot.png")

    def predict_test_data(eval_data):
        auc = tf.metrics.AUC()
        for batch in eval_data:
            label = eval_data.pop('label')
            pred = self(batch)
            label = tf.expand_dims(label, 1) 
            
            auc.update_state(label, y_pred=pred)
            

        return auc.result().numpy()

    def train_step(self, inputs):

        train_data, label = inputs

        with tf.GradientTape() as tape:
            pred = self(inputs)
            loss = tf.reduce_mean(self.loss(label, pred))
            
        
        train_vars = self.trainable_variables
        gradients = tape.gradient(loss, train_vars)
        
        self.optimizer.apply_gradients(zip(gradients, train_vars))
        self.compiled_metrics.update_state(label, pred)

        results = {m.name : m.result() for m in self.metrics}
        return results
            
        
    

SyntaxError: invalid syntax (1412084083.py, line 13)