In [1]:
import pandas as pd

data=pd.read_csv('train.csv')

In [2]:
cols=['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']

sdata=data[cols].sample(frac=1)

In [3]:
import numpy as np
from sklearn import preprocessing

def prepare(sdata):
    sdata['Age']=sdata['Age'].fillna(sdata['Age'].mean())
    sdata['Embarked']=sdata['Embarked'].fillna('S')
    print (sdata.isnull().sum())

    sdata['Sex']=sdata['Sex'].map({'female':0,'male':1}).astype(int)
    sdata['Embarked']=sdata['Embarked'].map({'C':0,'Q':1,'S':2}).astype(int) 

    print (sdata[:5])

    features=sdata.drop(['Survived'],axis=1) #DataFrame (891,7)
    f=features.values  #ndarray (891,7)

    scale=preprocessing.MinMaxScaler() 
    norm_features=scale.fit_transform(f) #ndarray (891,7)

    #scale.fit_transfrom()传入不管是features还是f，输出的都是ndarray数组
    #因此，我们最好还是令函数的返回值统一为ndarray类型
    labels=sdata[['Survived']] #DataFrame (891,1)
    labels=labels.values #ndarray (891,1)
    
    return norm_features,labels

norm_features,labels=prepare(sdata)


Survived    0
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
Embarked    0
dtype: int64
     Survived  Pclass  Sex        Age  SibSp  Parch      Fare  Embarked
821         1       3    1  27.000000      0      0    8.6625         2
384         0       3    1  29.699118      0      0    7.8958         2
14          0       3    0  14.000000      0      0    7.8542         2
182         0       3    1   9.000000      4      2   31.3875         2
716         1       1    0  38.000000      0      0  227.5250         0


In [4]:
size=int(len(norm_features)*0.8)

x_train=norm_features[:size]
y_train=labels[:size]

x_test=norm_features[size:]
y_test=labels[size:]

In [5]:
import tensorflow as tf

model=tf.keras.models.Sequential()

#隐层1
model.add(tf.keras.layers.Dense(units=64,
                                #input_shape=(7,)
                                input_dim=7,
                               activation='relu'))
model.add(tf.keras.layers.Dropout(rate=0.3))

#隐层2
model.add(tf.keras.layers.Dense(units=32,
                               activation='relu'))
model.add(tf.keras.layers.Dropout(rate=0.3))

#输出层
model.add(tf.keras.layers.Dense(units=1,
                               activation='sigmoid'))


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [6]:
model.compile(optimizer=tf.keras.optimizers.Adam(0.003),
             loss='binary_crossentropy',
             metrics=['accuracy'])


# 设置回调参数

tf.keras.callbacks.TensorBoard
tf.keras.callbacks.ModelCheckpoint
tf.keras.callbacks.LearningRateScheduler
tf.keras.callbacks.EarlyStopping


本代码所在目录并无logs和ckpt这两个文件夹，拟合模型model.fit时，只要一调用callbacks就会自动创建文件与文件夹拉！其中period=5是指每5个epoch记录一次模型

In [7]:
logdir='./logs'
ckpt_path='./ckpt/titanic_{epoch:02d}_{val_loss:.2f}.ckpt'

callbacks=[tf.keras.callbacks.TensorBoard(log_dir=logdir,
                                          histogram_freq=2),
         tf.keras.callbacks.ModelCheckpoint(filepath=ckpt_path,
                                           save_weights_only=True,
                                           verbose=1,
                                           period=5)]


In [8]:
train_history=model.fit(x=x_train,
                        y=y_train,
                        validation_split=0.2,
                        epochs=100,
                        batch_size=40,
                        callbacks=callbacks,
                        verbose=2)

#如果想要查看tensorboard，这个时候就可以去查看拉
#进入终端
#conda activate dayuse
#tensorboard --logdir 文件夹路径

Train on 569 samples, validate on 143 samples
Instructions for updating:
Use tf.cast instead.
Epoch 1/100
 - 1s - loss: 0.6587 - acc: 0.5729 - val_loss: 0.5481 - val_acc: 0.7133
Epoch 2/100
 - 0s - loss: 0.5834 - acc: 0.7170 - val_loss: 0.5115 - val_acc: 0.7902
Epoch 3/100
 - 0s - loss: 0.5259 - acc: 0.7733 - val_loss: 0.4734 - val_acc: 0.7902
Epoch 4/100
 - 0s - loss: 0.4949 - acc: 0.7926 - val_loss: 0.4680 - val_acc: 0.7902
Epoch 5/100

Epoch 00005: saving model to ./ckpt/titanic_05_0.45.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
Instructions for updating:
Use tf.train.CheckpointManager to manage checkpoints rather than manually editing the Checkpoint proto.
 - 0s - loss: 0.4765 - acc: 0.7838 - val_loss: 0.4475 - val_acc: 0.7902
Epoch 6/100
 - 0s - loss: 0.4807 - acc: 0.7891 - val_loss: 0.4608 - val_acc: 0.7902
Epoch 7/100
 - 0s - loss: 0.4616 - acc: 0.7944 - val_loss: 0.4405 - val_acc: 0.7972
Epoch 8/100
 - 0s - loss: 0.4658 - acc: 0.7891 - val_loss: 0.4490 - val_a

 - 0s - loss: 0.4117 - acc: 0.8207 - val_loss: 0.4168 - val_acc: 0.8252
Epoch 46/100
 - 0s - loss: 0.4014 - acc: 0.8348 - val_loss: 0.4226 - val_acc: 0.8252
Epoch 47/100
 - 0s - loss: 0.4048 - acc: 0.8278 - val_loss: 0.4139 - val_acc: 0.8112
Epoch 48/100
 - 0s - loss: 0.4133 - acc: 0.8207 - val_loss: 0.4152 - val_acc: 0.8252
Epoch 49/100
 - 0s - loss: 0.4058 - acc: 0.8313 - val_loss: 0.4114 - val_acc: 0.8322
Epoch 50/100

Epoch 00050: saving model to ./ckpt/titanic_50_0.41.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
 - 0s - loss: 0.4125 - acc: 0.8207 - val_loss: 0.4150 - val_acc: 0.8182
Epoch 51/100
 - 0s - loss: 0.4131 - acc: 0.8225 - val_loss: 0.4141 - val_acc: 0.8322
Epoch 52/100
 - 0s - loss: 0.4165 - acc: 0.8207 - val_loss: 0.4120 - val_acc: 0.8322
Epoch 53/100
 - 0s - loss: 0.4098 - acc: 0.8295 - val_loss: 0.4191 - val_acc: 0.8182
Epoch 54/100
 - 0s - loss: 0.4118 - acc: 0.8313 - val_loss: 0.4141 - val_acc: 0.8182
Epoch 55/100

Epoch 00055: saving model to ./ckpt

Epoch 95/100

Epoch 00095: saving model to ./ckpt/titanic_95_0.41.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
 - 0s - loss: 0.3925 - acc: 0.8348 - val_loss: 0.4106 - val_acc: 0.8182
Epoch 96/100
 - 0s - loss: 0.3949 - acc: 0.8348 - val_loss: 0.4060 - val_acc: 0.8462
Epoch 97/100
 - 0s - loss: 0.3855 - acc: 0.8348 - val_loss: 0.3950 - val_acc: 0.8322
Epoch 98/100
 - 0s - loss: 0.3907 - acc: 0.8172 - val_loss: 0.4040 - val_acc: 0.8322
Epoch 99/100
 - 0s - loss: 0.3782 - acc: 0.8348 - val_loss: 0.3983 - val_acc: 0.8462
Epoch 100/100

Epoch 00100: saving model to ./ckpt/titanic_100_0.39.ckpt

Consider using a TensorFlow optimizer from `tf.train`.
 - 0s - loss: 0.3831 - acc: 0.8489 - val_loss: 0.3915 - val_acc: 0.8531


In [9]:
train_history.history

{'loss': [0.6587369190578092,
  0.5833902192450995,
  0.5259014776071471,
  0.4949246454846461,
  0.47646250309969296,
  0.48071800659431935,
  0.4616326936938013,
  0.4657607224683141,
  0.4516465955124169,
  0.4471079030439179,
  0.4575946504390931,
  0.43724586569781043,
  0.44999320435817297,
  0.4428041096521388,
  0.4403024052903187,
  0.4430569413466697,
  0.4364031989304169,
  0.42561847296247163,
  0.43916666465819615,
  0.43473590797824896,
  0.4173639301465978,
  0.4165492407676415,
  0.448184239791022,
  0.4291022886081823,
  0.4293100233656241,
  0.4213972911893379,
  0.43686148576661027,
  0.4332975215673028,
  0.4137095805578366,
  0.4394123462570573,
  0.42134953074170334,
  0.40750026747401025,
  0.40762763267451724,
  0.4113681821286783,
  0.4072934159704587,
  0.42278119502670947,
  0.4005571176381438,
  0.4062846559108037,
  0.4122344888157501,
  0.4198842688687866,
  0.4081448931476144,
  0.4059766840536691,
  0.4101275075718053,
  0.4123667151731012,
  0.411708690

# 从checkpoint文件中恢复模型

In [10]:
latest=tf.train.latest_checkpoint('./ckpt/')

In [11]:
model.load_weights(latest)

<tensorflow.python.training.checkpointable.util.CheckpointLoadStatus at 0x1a37539080>

# 恢复模型评估

In [12]:
loss,acc=model.evaluate(x_test,y_test)
print ('acc:{:5.2f}%'.format(100*acc))

acc:79.89%
