In [1]:
import tensorflow as tf
tf.__version__

'2.10.0'

In [3]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix,classification_report
import seaborn as sns

In [6]:
!ls ../../data/adult/

adult-census.csv
adult_processed.csv
preprocessing.ipynb


In [9]:
data = []
path = r'../../data/adult/adult_processed.csv'
data = pd.read_csv(path)
num_col = [
    "age",
    "education-num",
    "capital-gain",
    "capital-loss",
    "hours-per-week",
]
nonum_col = [
    "workclass",
    "education",
    "marital-status",
    "occupation",
    "relationship",
    "race",
    "sex",
    "native-country",
]
data.head()

Unnamed: 0,age,workclass,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,target
0,25,4,1,7,4,7,3,2,1,0,0,40,39,0
1,38,4,11,9,2,5,0,4,1,0,0,50,39,0
2,28,2,7,12,2,11,0,4,1,0,0,40,39,1
3,44,4,15,10,2,7,0,2,1,7688,0,40,39,1
4,18,0,15,10,4,0,3,4,0,0,0,30,39,0


In [11]:
data[num_col] = data[num_col].astype(int)
for i in num_col:
    data[i] = MinMaxScaler(feature_range=[0,1]).fit_transform(data[i].values.reshape(-1, 1))
for i in nonum_col:
    data = pd.concat((data,pd.get_dummies(data[i])),axis=1)
    del data[i]
data.head()

Unnamed: 0,age,education-num,capital-gain,capital-loss,hours-per-week,target,0,1,2,3,...,32,33,34,35,36,37,38,39,40,41
0,0.109589,0.4,0.0,0.0,0.397959,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,0.287671,0.533333,0.0,0.0,0.5,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,0.150685,0.733333,0.0,0.0,0.397959,1,0,0,1,0,...,0,0,0,0,0,0,0,1,0,0
3,0.369863,0.6,0.076881,0.0,0.397959,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,0.013699,0.6,0.0,0.0,0.295918,0,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [12]:
train,test = train_test_split(data,test_size=0.3,random_state=10)

In [13]:
feature = train.columns.tolist()
feature.remove('target')
label = ['target']

In [14]:
def interacting_layer(x,head_num):
    dim = x.shape[-1]
    attention_output_dim = dim*x.shape[-2]
    Q = tf.keras.layers.Dense(units=head_num*dim)(x)
    K = tf.keras.layers.Dense(units=head_num*dim)(x)
    V = tf.keras.layers.Dense(units=head_num*dim)(x)
    Qs = tf.split(Q,head_num*[dim],-1)
    Ks = tf.split(K,head_num*[dim],-1)
    Vs = tf.split(V,head_num*[dim],-1)
    alphas = []
    for num in range(head_num):
        score = tf.nn.softmax(tf.matmul(Qs[num],Ks[num],transpose_b=True)/dim)
        alpha = tf.matmul(score,Vs[num])
        alpha = tf.keras.layers.Flatten()(alpha)
        alphas.append(alpha)
    attention_output = tf.keras.layers.concatenate(alphas)
    attention_output = tf.keras.layers.Dense(units=attention_output_dim)(attention_output)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(units=x.shape[-1])(x)
    interact_layer_output = tf.keras.layers.Activation('relu')(tf.keras.layers.add([attention_output,x]))
    return interact_layer_output

# 一层AutoInt

In [15]:
inputs = tf.keras.Input(shape=(train[feature].shape[-1],))
x = tf.keras.layers.Embedding(2,4)(inputs)
x = interacting_layer(x,4)
output = tf.keras.layers.Dense(units=1,activation='sigmoid')(x)
autoint = tf.keras.Model(inputs,output)

In [16]:
autoint.compile(optimizer=tf.keras.optimizers.Adam(1e-2),loss=tf.keras.losses.binary_crossentropy)

In [None]:
autoint_his = autoint.fit(train[feature].values,train[label].values,batch_size=100,epochs=20)

Epoch 1/20
Epoch 2/20
 15/342 [>.............................] - ETA: 4:46 - loss: 0.3469

# 两层AutoInt

In [None]:
inputs = tf.keras.Input(shape=(train[feature].shape[-1],))
x = tf.keras.layers.Embedding(2,4)(inputs)
x_dim,y_dim = x.shape[1],x.shape[-1]
for i in range(1):
    x = interacting_layer(x,4)
    x = tf.keras.layers.Reshape((x_dim,y_dim))(x)
x = interacting_layer(x,4)
output = tf.keras.layers.Dense(units=1,activation='sigmoid')(x)
autoint2 = tf.keras.Model(inputs,output)

In [None]:
autoint2.compile(optimizer=tf.keras.optimizers.Adam(1e-2),loss=tf.keras.losses.binary_crossentropy)
autoint2_his = autoint2.fit(train[feature].values,train[label].values,batch_size=100,epochs=20)

# wide&AutoInt

In [None]:
hot_col = [i for i in data.columns if i not in num_col+[label]]

num_inputs = tf.keras.Input(shape=(train[num_col].shape[-1],))
hot_inputs = tf.keras.Input(shape=(train[hot_col].shape[-1],))
wide_output = tf.keras.layers.Dense(units=30)(num_inputs)
x = tf.keras.layers.Embedding(2,4)(hot_inputs)
autoint_output = interacting_layer(x,4)
all_output = tf.keras.layers.concatenate([wide_output,autoint_output])
output = tf.keras.layers.Dense(units=1,activation='sigmoid')(all_output)
wide_autoint = tf.keras.Model([num_inputs,hot_inputs],output)

In [None]:
wide_autoint.compile(optimizer=tf.keras.optimizers.Adam(1e-2),loss=tf.keras.losses.binary_crossentropy)
wide_autoint_his = wide_autoint.fit([train[num_col].values,train[hot_col].values],train[label].values,batch_size=100,epochs=20)

# 训练结果可视化

In [None]:
plt.figure(dpi=200,figsize=(18,6))
plt.plot(autoint_his.history['loss'],label='autoint_loss')
plt.plot(autoint2_his.history['loss'],label='autoint2_loss')
plt.plot(wide_autoint_his.history['loss'],label='wide_autoint_loss')
plt.legend()
plt.grid()

# 推理

In [None]:
autoint_res = list(map(lambda x:1 if x>=0.5 else 0,autoint.predict(test[feature].values).reshape(1,-1)[0]))
autoint2_res = list(map(lambda x:1 if x>=0.5 else 0,autoint2.predict(test[feature].values).reshape(1,-1)[0]))
wide_autoint_res = list(map(lambda x:1 if x>=0.5 else 0,wide_autoint.predict([test[num_col].values,test[hot_col].values]).reshape(1,-1)[0]))

In [None]:
print(classification_report(test['income_bracket'],autoint_res))
print(classification_report(test['income_bracket'],autoint2_res))
print(classification_report(test['income_bracket'],wide_autoint_res))

In [None]:
plt.figure(dpi=200,figsize=(20,4))
plt.subplot(131)
plt.title('autoint_res')
sns.heatmap(confusion_matrix(test['income_bracket'],autoint_res),annot=True)
plt.subplot(132)
plt.title('autoint2_res')
sns.heatmap(confusion_matrix(test['income_bracket'],autoint2_res),annot=True)
plt.subplot(133)
plt.title('wide_autoint_res')
sns.heatmap(confusion_matrix(test['income_bracket'],wide_autoint_res),annot=True)