In [8]:
import numpy as np
import pandas as pd
import sklearn

import matplotlib.pyplot as plt

In [11]:
# step1：去掉数据不完整的行
df = pd.read_csv("2017.csv").dropna()
# step2：性别、是否吸烟等特征转one-hot
df = pd.get_dummies(df, columns=['sex','SMK_STAT_TYPE_RSPS_CD'])
# step3: 去掉creatinine高于2.5的样本
df = df.loc[(df["CREATININE "]<=2.5)&(df["CREATININE "]>=0.0)]
# step4: 切分训练集与测试集
df_train = df[0:900000]; df_test = df[900000:990112] 

In [12]:
# step5: 训练集与测试集下采样
intervals = [(i * 0.1 + 0.05, i * 0.1 + 0.15) for i in range(25)]

temp_train, temp_test = [], []

for index, (low, high) in enumerate(intervals):
    
    df_sample_train = df_train.loc[(df_train["CREATININE "] > low) & (df_train["CREATININE "] < high)]
    df_sample_test  = df_test.loc[(df_test["CREATININE "] > low) & (df_test["CREATININE "] < high)]
    
    if index in range(3, 14): df_sample_train = df_sample_train.sample(2500)
    if index in range(3, 16): df_sample_test  = df_sample_test.sample(100)
    
    temp_train.append(df_sample_train)
    temp_test.append(df_sample_test)

df_train = pd.concat(temp_train, ignore_index=True)
df_test = pd.concat(temp_test, ignore_index=True)

# step6: 选取特征x与目标y
y_train = df_train["CREATININE "].values.reshape(-1, 1)
x_train = df_train.drop(columns=["id", "sido", "CREATININE ", "GFR", "stage", "height", "weight"]).values

y_test = df_test["CREATININE "].values.reshape(-1, 1)
x_test = df_test.drop(columns=["id", "sido", "CREATININE ", "GFR", "stage", "height", "weight"]).values

In [None]:
# step7: 定义resnet_regressor模型
from keras.models import Model
from keras.optimizers import Adam
from keras.layers.advanced_activations import ReLU
from keras.layers import Input, Dense, BatchNormalization, add

def dense_block(input_tensor,units):

    x = Dense(units)(input_tensor)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = Dense(units)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = Dense(units)(x)
    x = BatchNormalization()(x)

    shortcut = Dense(units)(input_tensor)
    shortcut = BatchNormalization()(shortcut)

    x = add([x, shortcut])
    x = ReLU()(x)

    return x

def identity_block(input_tensor,units):

    x = Dense(units)(input_tensor)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = Dense(units)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = Dense(units)(x)
    x = BatchNormalization()(x)

    x = add([x, input_tensor])
    x = ReLU()(x)

    return x

def ResNetRegression():

    Res_input = Input(shape=(26,))

    width = 16

    x = dense_block(Res_input,width)
    x = identity_block(x,width)
    x = identity_block(x,width)

    x = dense_block(x,width)
    x = identity_block(x,width)
    x = identity_block(x,width)

    x = dense_block(x,width)
    x = identity_block(x,width)
    x = identity_block(x,width)

    x = BatchNormalization()(x)
    x = Dense(1, activation='linear')(x)

    model = Model(inputs=Res_input, outputs=x)

    return model

model = ResNetRegression()
model.summary(line_length=140)

In [None]:
# step8: 训练ResNet_Regressor模型，计算预测值与真实值的R2
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score

ss_x, ss_y = StandardScaler(), StandardScaler()

x_train_normalized = ss_x.fit_transform(x_train)
x_test_normalized = ss_x.transform(x_test)

model.compile(loss="mse", optimizer='adam', metrics=['mae'])
model.fit(x_train_normalized, y_train, validation_data = [x_test_normalized, y_test], batch_size=32, nb_epoch=20, verbose=1) 

x_test_normalized = ss_x.transform(x_test)
test_pred = model.predict(x_test_normalized)
print(r2_score(y_test, test_pred))