In [6]:
import pandas as pd
import statsmodels.api as sm
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler

# 1. 读取数据
train_df = pd.read_csv('train_data.csv')
test_df = pd.read_csv('test_data.csv')

# 2. 拆分X和y
y_train = train_df['noncompliance']
X_train = sm.add_constant(train_df.drop(columns=['noncompliance']))  # 加截距
y_test = test_df['noncompliance']
X_test = sm.add_constant(test_df.drop(columns=['noncompliance']))

# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train.values)  # 计算均值和方差并标准化
# X_test = scaler.transform(X_test.values)


# 3. 定义评估函数
def evaluate_model(model_name, model_class):
    model = model_class(y_train, X_train).fit(disp=0)
    # print(model.summary())

    y_prob = model.predict(X_test)
    y_pred = (y_prob >= 0.5).astype(int)

    precision = precision_score(y_test, y_pred, zero_division=0)
    recall = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)

    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"F1 Score:  {f1:.4f}")

In [7]:
evaluate_model("Probit", sm.Probit)

Precision: 0.4737
Recall:    0.0186
F1 Score:  0.0359




In [None]:
evaluate_model("Logit", sm.Logit)