Skip to content

Commit

Permalink
Add xDeepFM & version check
Browse files Browse the repository at this point in the history
* Add xDeepFM
* Add auto version check
* Fix some bugs when model's input field size is 1
* Encapsulate some duplicate code
* Update docs
  • Loading branch information
Weichen Shen committed Dec 22, 2018
1 parent 0bab07a commit aad52c4
Show file tree
Hide file tree
Showing 44 changed files with 1,142 additions and 1,821 deletions.
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ install:
- pip install -q pytest-cov==2.4.0 #>=2.4.0,<2.6
- pip install -q python-coveralls
- pip install -q codacy-coverage
- pip install -q h5py
- pip install -q tensorflow==$TF_VERSION
- pip install -e .
# command to run tests
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
[![Codacy Badge](https://api.codacy.com/project/badge/Grade/d4099734dc0e4bab91d332ead8c0bdd0)](https://www.codacy.com/app/wcshen1994/DeepCTR?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=shenweichen/DeepCTR&amp;utm_campaign=Badge_Grade)
[![License](https://img.shields.io/github/license/shenweichen/deepctr.svg)](https://github.com/shenweichen/deepctr/blob/master/LICENSE)

DeepCTR is a **Easy-to-use**,**Modular** and **Extendible** package of deep-learning based CTR models along with lots of core components layer which can be used to build your own custom model easily.You can use any complex model with `model.fit()`and`model.predict()` just like any other keras model.And the layers are compatible with tensorflow.
DeepCTR is a **Easy-to-use**,**Modular** and **Extendible** package of deep-learning based CTR models along with lots of core components layer which can be used to build your own custom model easily.You can use any complex model with `model.fit()`and`model.predict()` .And the layers are compatible with tensorflow.

Through `pip install deepctr` get the package and [**Get Started!**](https://deepctr-doc.readthedocs.io/en/latest/Quick-Start.html)

Expand All @@ -32,3 +32,4 @@ Through `pip install deepctr` get the package and [**Get Started!**](https://d
|Attentional Factorization Machine|[IJCAI 2017][Attentional Factorization Machines: Learning the Weight of Feature Interactions via Attention Networks](http://www.ijcai.org/proceedings/2017/435)|
|Neural Factorization Machine|[SIGIR 2017][Neural Factorization Machines for Sparse Predictive Analytics](https://arxiv.org/pdf/1708.05027.pdf)|
|Deep Interest Network|[KDD 2018][Deep Interest Network for Click-Through Rate Prediction](https://arxiv.org/pdf/1706.06978.pdf)|
|xDeepFM|[KDD 2018][xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems](https://arxiv.org/pdf/1803.05170.pdf)|
5 changes: 4 additions & 1 deletion deepctr/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from .import activations
from .import layers
from .import sequence
from .import models
from . import models
from .utils import check_version
__version__ = '0.2.0a1'
check_version(__version__)
12 changes: 12 additions & 0 deletions deepctr/activations.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,15 @@ def get_config(self,):

def compute_output_shape(self, input_shape):
return input_shape


def activation_fun(activation, fc):

if isinstance(activation, str):
fc = tf.keras.layers.Activation(activation)(fc)
elif issubclass(activation, Layer):
fc = activation()(fc)
else:
raise ValueError(
"Invalid activation,found %s.You should use a str or a Activation Layer Class." % (activation))
return fc
833 changes: 505 additions & 328 deletions deepctr/layers.py

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion deepctr/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .fnn import FNN
from .pnn import PNN
from .wdl import WDL
from .xdeepfm import xDeepFM

__all__ = ["AFM", "DCN", "MLR", "DeepFM",
"MLR", "NFM", "DIN", "FNN", "PNN", "WDL"]
"MLR", "NFM", "DIN", "FNN", "PNN", "WDL", "xDeepFM"]
23 changes: 3 additions & 20 deletions deepctr/models/afm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,11 @@
"""

from tensorflow.python.keras.layers import Dense, Embedding, Concatenate, Reshape, add
from tensorflow.python.keras.layers import Dense, Concatenate, Reshape, add
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.initializers import RandomNormal
from tensorflow.python.keras.regularizers import l2

from ..utils import get_input
from ..utils import get_input, get_share_embeddings
from ..layers import PredictionLayer, AFMLayer, FM


Expand Down Expand Up @@ -50,7 +49,7 @@ def AFM(feature_dim_dict, embedding_size=8, use_attention=True, attention_factor
feature_dim_dict['dense']))

sparse_input, dense_input = get_input(feature_dim_dict, None)
sparse_embedding, linear_embedding, = get_embeddings(
sparse_embedding, linear_embedding, = get_share_embeddings(
feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding, l2_reg_linear)

embed_list = [sparse_embedding[i](sparse_input[i])
Expand Down Expand Up @@ -87,19 +86,3 @@ def AFM(feature_dim_dict, embedding_size=8, use_attention=True, attention_factor
output = PredictionLayer(final_activation)(final_logit)
model = Model(inputs=sparse_input + dense_input, outputs=output)
return model


def get_embeddings(feature_dim_dict, embedding_size, init_std, seed, l2_rev_V, l2_reg_w):
sparse_embedding = [Embedding(feature_dim_dict["sparse"][feat], embedding_size,
embeddings_initializer=RandomNormal(
mean=0.0, stddev=init_std, seed=seed),
embeddings_regularizer=l2(l2_rev_V),
name='sparse_emb_' + str(i) + '-' + feat) for i, feat in
enumerate(feature_dim_dict["sparse"])]
linear_embedding = [Embedding(feature_dim_dict["sparse"][feat], 1,
embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std,
seed=seed), embeddings_regularizer=l2(l2_reg_w),
name='linear_emb_' + str(i) + '-' + feat) for
i, feat in enumerate(feature_dim_dict["sparse"])]

return sparse_embedding, linear_embedding
2 changes: 1 addition & 1 deletion deepctr/models/dcn.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
Weichen Shen,wcshen1994@163.com
Reference:
[1] Deep & Cross Network for Ad Click Predictions (https://arxiv.org/abs/1708.05123)
[1] Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12. (https://arxiv.org/abs/1708.05123)
"""
from tensorflow.python.keras.layers import Dense, Embedding, Concatenate, Flatten
from tensorflow.python.keras.models import Model
Expand Down
27 changes: 4 additions & 23 deletions deepctr/models/deepfm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,14 @@
Weichen Shen,wcshen1994@163.com
Reference:
[1] DeepFM: A Factorization-Machine based Neural Network for CTR Prediction(https://arxiv.org/abs/1703.04247)
[1] Guo H, Tang R, Ye Y, et al. Deepfm: a factorization-machine based neural network for ctr prediction[J]. arXiv preprint arXiv:1703.04247, 2017.(https://arxiv.org/abs/1703.04247)
"""

from tensorflow.python.keras.layers import Dense, Embedding, Concatenate, Reshape, Flatten, add
from tensorflow.python.keras.layers import Dense, Concatenate, Reshape, Flatten, add
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.initializers import RandomNormal
from tensorflow.python.keras.regularizers import l2


from ..utils import get_input
from ..utils import get_input, get_share_embeddings
from ..layers import PredictionLayer, MLP, FM


Expand Down Expand Up @@ -50,7 +47,7 @@ def DeepFM(feature_dim_dict, embedding_size=8,
feature_dim_dict['dense']))

sparse_input, dense_input = get_input(feature_dim_dict, None)
sparse_embedding, linear_embedding, = get_embeddings(
sparse_embedding, linear_embedding, = get_share_embeddings(
feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding, l2_reg_linear)

embed_list = [sparse_embedding[i](sparse_input[i])
Expand Down Expand Up @@ -97,19 +94,3 @@ def DeepFM(feature_dim_dict, embedding_size=8,
output = PredictionLayer(final_activation)(final_logit)
model = Model(inputs=sparse_input + dense_input, outputs=output)
return model


def get_embeddings(feature_dim_dict, embedding_size, init_std, seed, l2_rev_V, l2_reg_w):
sparse_embedding = [Embedding(feature_dim_dict["sparse"][feat], embedding_size,
embeddings_initializer=RandomNormal(
mean=0.0, stddev=init_std, seed=seed),
embeddings_regularizer=l2(l2_rev_V),
name='sparse_emb_' + str(i) + '-' + feat) for i, feat in
enumerate(feature_dim_dict["sparse"])]
linear_embedding = [Embedding(feature_dim_dict["sparse"][feat], 1,
embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std,
seed=seed), embeddings_regularizer=l2(l2_reg_w),
name='linear_emb_' + str(i) + '-' + feat) for
i, feat in enumerate(feature_dim_dict["sparse"])]

return sparse_embedding, linear_embedding
2 changes: 1 addition & 1 deletion deepctr/models/din.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
Weichen Shen,wcshen1994@163.com
Reference:
[1] Deep Interest Network for Click-Through Rate Prediction (https://arxiv.org/pdf/1706.06978.pdf)
[1] Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068. (https://arxiv.org/pdf/1706.06978.pdf)
"""

from tensorflow.python.keras.layers import Input, Dense, Embedding, Concatenate, Reshape
Expand Down
36 changes: 8 additions & 28 deletions deepctr/models/fnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,15 @@
Weichen Shen,wcshen1994@163.com
Reference:
[1] Zhang, Weinan, Tianming Du, and Jun Wang. "Deep learning over multi-field categorical data." European conference on information retrieval. Springer, Cham, 2016.(https://arxiv.org/pdf/1601.02376.pdf)
[1] Zhang W, Du T, Wang J. Deep learning over multi-field categorical data[C]//European conference on information retrieval. Springer, Cham, 2016: 45-57.(https://arxiv.org/pdf/1601.02376.pdf)
"""

from tensorflow.python.keras.layers import Dense, Embedding, Concatenate, Reshape, add
from tensorflow.python.keras.layers import Dense, Concatenate, Reshape, add
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.initializers import RandomNormal
from tensorflow.python.keras.regularizers import l2

from ..layers import PredictionLayer, MLP
from ..utils import get_input
from ..utils import get_input, get_share_embeddings


def FNN(feature_dim_dict, embedding_size=8,
Expand Down Expand Up @@ -42,12 +41,8 @@ def FNN(feature_dim_dict, embedding_size=8,
"feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}")

sparse_input, dense_input = get_input(feature_dim_dict, None)
# sparse_embedding = [Embedding(feature_dim_dict["sparse"][feat], embedding_size,
# embeddings_initializer=RandomNormal( mean=0.0, stddev=init_std, seed=seed),
# embeddings_regularizer=l2( l2_reg_embedding),name='sparse_emb_' + str(i) + '-' + feat) for i, feat in
# enumerate(feature_dim_dict["sparse"])]
sparse_embedding, linear_embedding, = get_embeddings(feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding,
l2_reg_linear)
sparse_embedding, linear_embedding, = get_share_embeddings(feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding,
l2_reg_linear)

embed_list = [sparse_embedding[i](sparse_input[i])
for i in range(len(feature_dim_dict["sparse"]))]
Expand All @@ -56,10 +51,11 @@ def FNN(feature_dim_dict, embedding_size=8,
for i in range(len(sparse_input))]
if len(linear_term) > 1:
linear_term = add(linear_term)
elif len(linear_term) > 0:
elif len(linear_term) == 1:
linear_term = linear_term[0]
else:
linear_term = 0

#linear_term = add([linear_embedding[i](sparse_input[i]) for i in range(len(feature_dim_dict["sparse"]))])
if len(dense_input) > 0:
continuous_embedding_list = list(
map(Dense(embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding), ),
Expand All @@ -85,19 +81,3 @@ def FNN(feature_dim_dict, embedding_size=8,
model = Model(inputs=sparse_input + dense_input,
outputs=output)
return model


def get_embeddings(feature_dim_dict, embedding_size, init_std, seed, l2_rev_V, l2_reg_w):
sparse_embedding = [Embedding(feature_dim_dict["sparse"][feat], embedding_size,
embeddings_initializer=RandomNormal(
mean=0.0, stddev=init_std, seed=seed),
embeddings_regularizer=l2(l2_rev_V),
name='sparse_emb_' + str(i) + '-' + feat) for i, feat in
enumerate(feature_dim_dict["sparse"])]
linear_embedding = [Embedding(feature_dim_dict["sparse"][feat], 1,
embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std,
seed=seed), embeddings_regularizer=l2(l2_reg_w),
name='linear_emb_' + str(i) + '-' + feat) for
i, feat in enumerate(feature_dim_dict["sparse"])]

return sparse_embedding, linear_embedding

0 comments on commit aad52c4

Please sign in to comment.