In [8]:
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow import keras
import pandas as pd 
import numpy as np 

np.set_printoptions(precision=4, suppress=True)

In [19]:
import sys 
sys.path.append("../data/liuliang_data")
from features_config import *

In [137]:
class Dense_Process_Layer(layers.Layer):
    def __init__(self, sparse_features, dense_features, price_features):
        super().__init__()
        self.sparse_features = sparse_features
        self.dense_features = dense_features
        self.price_features = price_features
        self.concat_layer = layers.Concatenate()  # Specifying axis in constructor
    
    def call(self, inputs):
        concat_numeric = []
        for name, input in inputs.items():
            if name in self.dense_features:
                input_cast = tf.cast(input, tf.float32)  # Cast input once
                if name not in self.price_features:
                    temp_feature = tf.math.log1p(input_cast) / tf.math.log(tf.constant(2.0, dtype=tf.float32))
                else:
                    temp_feature = tf.math.log1p(input_cast) / tf.math.log(tf.constant(10.0, dtype=tf.float32))
                temp_feature = tf.expand_dims(temp_feature, 1)
                concat_numeric.append(temp_feature)

        return self.concat_layer(concat_numeric)  # No need to specify axis again


In [138]:
class DNN(layers.Layer):
    def __init__(self, units = [256, 64]):
        super().__init__()
        self.dnn = keras.Sequential([
            layers.Dense(unit, activation = 'relu') for unit in units
        ])
    def call(self, x):
        return self.dnn(x)
        
        
class EveryDayModel(Model):
    def __init__(self, sparse_features, dense_features, price_features):
        super().__init__()
        self.embedding_dict = {}
        self.sparse_features = sparse_features
        num_bins = 1000
        for name in sparse_features:
            self.embedding_dict[name] = layers.Embedding(num_bins, 8, name = name)
        self.dense_process_layer = Dense_Process_Layer(sparse_features, dense_features, price_features)
        self.concat_embedding = layers.Concatenate()

        # 多任务 
        self.dnn = DNN([256, 64] )

        self.day1 = keras.layers.Dense(1)
        self.day2 = keras.layers.Dense(1)
        self.day3 = keras.layers.Dense(1)
        self.day4 = keras.layers.Dense(1)
        self.day5 = keras.layers.Dense(1)
        self.day6 = keras.layers.Dense(1)
        self.day7 = keras.layers.Dense(1)
        

    def call(self, inputs):
        dense_input = self.dense_process_layer(inputs)
        embeddings = [dense_input]

        for name, input in inputs.items():
            if name in self.embedding_dict:
                temp_embd = self.embedding_dict[name](input)
                embeddings.append(temp_embd)
        embedding_input = self.concat_embedding(embeddings)

        logit_7 = tf.sigmoid(self.day7(embedding_input))
        
        logit_1 = tf.sigmoid(self.day1(embedding_input) * logit_7)
        logit_2 = tf.sigmoid(self.day2(embedding_input) * logit_7)
        logit_3 = tf.sigmoid(self.day3(embedding_input) * logit_7)
        logit_4 = tf.sigmoid(self.day4(embedding_input) * logit_7)
        logit_5 = tf.sigmoid(self.day5(embedding_input) * logit_7)
        logit_6 = tf.sigmoid(self.day6(embedding_input) * logit_7)

        
        return layers.Concatenate()([logit_1, logit_2, logit_3, logit_4, logit_5, logit_6, logit_7])

In [139]:
data = pd.read_csv("../data/liuliang_data/toy_liuliang_data.csv", index_col= 0)

In [140]:
dense_features = [feature for feature in features if feature not in category_features]

sparse_features, dense_features, price_feature = category_features, dense_features, price_fatures

In [141]:
model = EveryDayModel(sparse_features, dense_features, price_feature)

In [142]:
data_dict = {name:tf.constant(v.values) for name,v in data.items() if name in features}

In [143]:
model(data_dict)

<tf.Tensor: shape=(20000, 7), dtype=float32, numpy=
array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]], dtype=float32)>

In [144]:
data.is_register.value_counts()

is_register
1.0    17435
0.0     2565
Name: count, dtype: int64

In [128]:
tf.

SyntaxError: invalid syntax (4190673377.py, line 1)