In [None]:
import pickle
import pandas as pd

with open("data/X_meta_use-target_sub.pickle", "rb") as f:
    X_meta = pickle.load(f)

X_train_meta, X_valid_meta, X_test_meta = X_meta
    
with open("data/X_token_use-target_sub.pickle", "rb") as f:
    X_token = pickle.load(f)

X_train_token, X_valid_token, X_test_token = X_token
    
with open("data/y_use-target_sub.pickle", "rb") as f:
    y = pickle.load(f)
    
y_train, y_valid, y_test = y

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer

count_vectorizer = CountVectorizer(min_df=.015, max_df=.8, max_features=1000, ngram_range=[1, 3])
count_vectorizer.fit(X_train_token)
X_train_token_vec = pd.DataFrame.sparse.from_spmatrix(count_vectorizer.transform(X_train_token))
X_valid_token_vec = pd.DataFrame.sparse.from_spmatrix(count_vectorizer.transform(X_valid_token))
X_test_token_vec = pd.DataFrame.sparse.from_spmatrix(count_vectorizer.transform(X_test_token))


In [None]:
X_train = pd.concat([X_train_meta, X_train_token_vec], axis=1)
X_valid = pd.concat([X_valid_meta, X_valid_token_vec], axis=1)
X_test = pd.concat([X_test_meta, X_test_token_vec], axis=1)

y_train = pd.DataFrame(y_train, columns=['target'])
y_valid = pd.DataFrame(y_valid, columns=['target'])
y_test = pd.DataFrame(y_test, columns=['target'])

train_df = pd.concat([X_train, y_train], axis=1)
valid_df = pd.concat([X_valid, y_valid], axis=1)
test_df = pd.concat([X_test, y_test], axis=1)

In [1]:
from xgboost import XGBClassifier

model = XGBClassifier()

with open("data/stomach_tree.pkl", "rb") as f:
    model = pickle.load(f)
        
prob = model.predict_proba(X_test)[:, 1]

In [None]:
import numpy as np
        
def compute_meta_risk(x):
    risk = 0
    risk += x['pain_NRS'] * 0.1
    
    if x["temperature"] > 37 and x["temperature"] <= 38:
        risk += 0.3
    elif x["temperature"] > 38 and x["temperature"] <= 39:
        risk += 0.6
    else:
        risk += 1
    
    if x["pulse"] > 80 and x["pulse"] <= 90:
        risk += 0.3
    elif x["pulse"] > 90 and x["pulse"] <= 100:
        risk += 0.6
    elif x["pulse"] > 100:
        risk += 1
        
    if x["respiration"] > 16 and x["respiration"] <= 18:
        risk += 0.3
    elif x["respiration"] > 18 and x["respiration"] <= 20:
        risk += 0.6
    elif x["respiration"] > 20:
        risk += 1
        
    if x["is_operation"] == 1:
        risk += 0.3
        
    if x["is_medical_history"] == 1:
        risk += 0.3
        
    if x["is_alertness"] == 0:
        risk += 1
        
    if x["is_digestive"] == 1:
        risk += 0.3
        
    if x["is_hemoptysis"] == 1:
        risk += 0.5
        
    if x["is_bloody_excrement"] == 1:
        risk += 0.8
        
    if risk > 1:
        risk = 1
        
    return risk

In [None]:
meta_risk = X_train.apply(compute_meta_risk, axis=1)

final_prob = (prob + meta_risk) / 2