In [2]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 示例病人数据，数据格式：[年龄，性别（0：女，1：男），体重（kg），身高（cm），验血结果]
X = np.array([
    [45, 0, 70, 160, 100],
    [60, 1, 85, 175, 130],
    [30, 1, 60, 180, 110],
    [25, 0, 55, 165, 90],
    [55, 0, 72, 155, 120],
    [40, 1, 80, 170, 105],
    [65, 0, 68, 150, 135],
    [35, 1, 78, 190, 95],
])

# 对应的标签：1表示糖尿病风险增加，0表示正常
y = np.array([0, 1, 0, 0, 1, 0, 1, 0])

# 将数据分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 数据预处理（标准化）
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 训练逻辑回归模型
model = LogisticRegression()
model.fit(X_train, y_train)

# 在测试集上评估模型
accuracy = model.score(X_test, y_test)
print(f"Model accuracy: {accuracy:.2f}")

# 预测新病人的糖尿病风险
def predict_diabetes_risk(age, gender, weight, height, blood_test_result):
    patient_data = np.array([[age, gender, weight, height, blood_test_result]])
    patient_data = scaler.transform(patient_data)
    risk_prediction = model.predict(patient_data)
    return risk_prediction[0]



Model accuracy: 1.00


In [8]:
# 示例：预测一个病人的糖尿病风险
age = 57
gender = 1  # 0 for female, 1 for male
weight = 80
height = 175
blood_test_result = 100

risk = predict_diabetes_risk(age, gender, weight, height, blood_test_result)
if risk == 1:
    print("Diabetes risk is increased.")
else:
    print("Diabetes risk is normal.")


Diabetes risk is normal.
