In [13]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 生成数据
np.random.seed(42)
n_samples = 10000
abilities = np.random.randint(100, 5001, n_samples)
difficulties = np.random.randint(100, 5001, n_samples)
prob_correct = 1 / (1 + np.exp(-0.001 * (abilities - difficulties)))
labels = np.random.binomial(1, prob_correct)
df = pd.DataFrame({'用户能力': abilities, '题目难度': difficulties, '用户是否做对了这道题目': labels})

# 数据划分
X = df[['用户能力', '题目难度']]
y = df['用户是否做对了这道题目']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 数据标准化
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)  

In [14]:
import tensorflow as tf

# 构建神经网络模型
model = tf.keras.Sequential([
    tf.keras.layers.Dense(10, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# 编译模型
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [15]:
# 训练模型
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50


[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7412 - loss: 0.5802 - val_accuracy: 0.7706 - val_loss: 0.4964
Epoch 2/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 968us/step - accuracy: 0.7851 - loss: 0.4830 - val_accuracy: 0.7719 - val_loss: 0.4697
Epoch 3/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7868 - loss: 0.4605 - val_accuracy: 0.7713 - val_loss: 0.4654
Epoch 4/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 938us/step - accuracy: 0.7752 - loss: 0.4635 - val_accuracy: 0.7719 - val_loss: 0.4651
Epoch 5/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7739 - loss: 0.4681 - val_accuracy: 0.7731 - val_loss: 0.4642
Epoch 6/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7758 - loss: 0.4674 - val_accuracy: 0.7744 - val_loss: 0.4640
Epoch 7/50
[1m200/200[0m [32m━━━

In [4]:
# 评估模型
loss, accuracy = model.evaluate(X_test_scaled, y_test)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 794us/step - accuracy: 0.7645 - loss: 0.4710
Test Loss: 0.45523470640182495, Test Accuracy: 0.784500002861023


In [16]:
# 预测
probabilities = model.predict(X_test_scaled)
predictions = (probabilities > 0.5).astype(int)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [17]:
probabilities

array([[0.7021373 ],
       [0.12974265],
       [0.0428297 ],
       ...,
       [0.03143927],
       [0.93101984],
       [0.8664198 ]], dtype=float32)

In [18]:
len(probabilities)

2000

In [19]:
X[:5]

Unnamed: 0,用户能力,题目难度
0,960,3818
1,3872,2793
2,3192,725
3,566,4334
4,4526,3808


In [20]:
probabilities[:6]

array([[0.7021373 ],
       [0.12974265],
       [0.0428297 ],
       [0.31976128],
       [0.79732895],
       [0.19258882]], dtype=float32)

In [12]:
prob_correct[:6]

array([0.05426926, 0.7463047 , 0.92179577, 0.02257673, 0.67216645,
       0.93601603])

In [26]:
import numpy as np

# 假设您已经有一个训练好的模型model和一个用于标准化的scaler

# 准备数据
new_data = np.array([[5000, 100]])

# 使用相同的scaler来标准化数据
new_data_scaled = scaler.transform(new_data)

# 使用模型进行预测
prediction = model.predict(new_data_scaled)

print("预测结果:", prediction)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
预测结果: [[0.99091834]]


