In [1]:
from seaborn import load_dataset
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings("ignore")  # 忽略模块变动警告

df = load_dataset("titanic")  # 加载泰坦尼克数据集
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [2]:
X = df[["pclass", "sex", "embarked"]]  # 特征
y = df["alive"]  # 目标

In [3]:
X = pd.get_dummies(X)  # 独热编码
X.head()

Unnamed: 0,pclass,sex_female,sex_male,embarked_C,embarked_Q,embarked_S
0,3,False,True,False,False,True
1,1,True,False,True,False,False
2,3,True,False,False,False,True
3,1,True,False,False,False,True
4,3,False,True,False,False,True


In [4]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()  # 随机森林
np.mean(cross_val_score(model, X, y, cv=5))  # 5 次交叉验证求平均

0.8114556525014125

In [5]:
import joblib

model.fit(X, y)  # 训练模型
joblib.dump(model, "titanic.pkl")  # 保存模型

['titanic.pkl']

In [7]:
%%writefile predict.py
from flask import Flask, request, jsonify
import joblib
import pandas as pd

app = Flask(__name__)


@app.route("/", methods=["POST"])  # 请求方法为 POST
def predict():
    json_ = request.json  # 解析请求数据
    query_df = pd.DataFrame(json_)  # 将 JSON 变为 DataFrame
    columns_onehot = [
        "pclass",
        "sex_female",
        "sex_male",
        "embarked_C",
        "embarked_Q",
        "embarked_S",
    ]  # 独热编码 DataFrame 列名
    query = pd.get_dummies(query_df).reindex(
        columns=columns_onehot, fill_value=0
    )  # 将请求数据 DataFrame 处理成独热编码样式
    clf = joblib.load("titanic.pkl")  # 加载模型
    prediction = clf.predict(query)  # 模型推理
    return jsonify({"prediction": list(prediction)})  # 返回推理结果

Writing predict.py


In [None]:
# In [1]: import requests                                                                             

# In [2]: sample = [{"pclass": 1, "sex": "male", "embarked": "C"}, {"pclass": 2, "sex": "female", "embarked": "S"}]                             
# In [3]: requests.post(url='http://127.0.0.1:5000', json=sample).content                             
# Out[3]: b'{"prediction":["no","yes"]}\n'