# Implementation of NN using TensorFlow

In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Dense
from tensorflow.keras.optimizers import Adam

## 创建自定义DataFrame
这里我们用自定义的DataFrame来实现一个模拟特征数据

In [6]:
# 设置随机种子，确保结果可复现
tf.random.set_seed(42)
np.random.seed(42)

# 1. 创建一个自定义的 DataFrame
num_samples = 500

# 创建一些模拟特征数据
df = pd.DataFrame({
    'age': np.random.randint(18, 70, size=num_samples),
    'income': np.random.normal(50000, 15000, size=num_samples),
    'gender': np.random.choice(['male', 'female'], size=num_samples),
    'city': np.random.choice(['New York', 'San Francisco', 'Chicago'], size=num_samples),
    'purchased': np.random.randint(0, 2, size=num_samples)  # 模拟二分类标签
})

In [7]:
# 特征和标签
features = df.drop('purchased', axis=1)
labels = df['purchased']

In [None]:
# One-hot 编码（将分类变量转换为数字）
# 神经网络不能直接处理字符串，要把 gender, city 变成数值形式。
features = pd.get_dummies(features)

# 划分训练集和测试集
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size=0.3, random_state=42)

# 对数值型特征进行标准化（年龄和收入）
ct = ColumnTransformer([('standardize', StandardScaler(), ['age', 'income'])], remainder='passthrough')

# fit the normalizer to the training data and convert from numpy arrays to pandas frame
features_train = ct.fit_transform(features_train)
# applied the trained normalizer on the test data and convert from numpy arrays to pandas frame
features_test = ct.transform(features_test)

features_train = pd.DataFrame(features_train, columns=features.columns)
features_test = pd.DataFrame(features_test, columns=features.columns)

## 定义模型结构

In [12]:
# 2. 定义模型结构
def design_model(features):
    model = Sequential(name="simple_custom_model")
    # 输入层，指定输入特征的形状。这里的特征是二维的，所以只需要指定列数，也就是说第一个参数就是features的数量
    model.add(InputLayer(input_shape=(features.shape[1], )))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))  # 输出层使用sigmoid，因为是二分类问题
    opt = Adam(learning_rate=0.001)
    model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
    return model

## 创建并训练模型

In [13]:
model = design_model(features_train)
print(model.summary())



None


In [14]:
model.fit(features_train, labels_train, epochs=30, batch_size=8, verbose=1)

Epoch 1/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5117 - loss: 0.6953 
Epoch 2/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5789 - loss: 0.6811
Epoch 3/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6000 - loss: 0.6738
Epoch 4/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5988 - loss: 0.6683 
Epoch 5/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5893 - loss: 0.6634 
Epoch 6/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6030 - loss: 0.6596 
Epoch 7/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6081 - loss: 0.6560 
Epoch 8/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6060 - loss: 0.6526 
Epoch 9/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x17fe37390>

## 评估模型

In [16]:
loss, accuracy = model.evaluate(features_test, labels_test, verbose=0)
print("Test Accuracy:", accuracy)

Test Accuracy: 0.4333333373069763
