In [254]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler, PolynomialFeatures
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold, GridSearchCV, StratifiedShuffleSplit, learning_curve
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import confusion_matrix, f1_score, roc_curve, classification_report
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.feature_selection import SelectFromModel, SelectKBest, chi2, f_classif, SelectFpr
from sklearn.ensemble import RandomForestClassifier
import tensorflow as tf
import pylab as plt
import mglearn
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten
from keras.wrappers.scikit_learn import KerasClassifier
%matplotlib inline

In [2]:
def learning_curve_model(X, Y, model, cv, train_sizes):
    """Кривая обучения"""
    plt.figure(figsize=(10, 10))
    plt.title("Learning curve")
    plt.xlabel("Training examples")
    plt.ylabel("Score")


    train_sizes, train_scores, test_scores = learning_curve(model, X, Y, cv=cv, n_jobs=4, train_sizes=train_sizes)

    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std  = np.std(train_scores, axis=1)
    test_scores_mean  = np.mean(test_scores, axis=1)
    test_scores_std   = np.std(test_scores, axis=1)
    plt.grid()
    
    plt.fill_between(train_sizes, train_scores_mean - train_scores_std,train_scores_mean + train_scores_std, alpha=0.1,
                     color="r")
    plt.fill_between(train_sizes, test_scores_mean - test_scores_std,test_scores_mean + test_scores_std, alpha=0.1, color="g")
    plt.plot(train_sizes, train_scores_mean, 'o-', color="r",label="Training score")
    plt.plot(train_sizes, test_scores_mean, 'o-', color="g",label="Cross-validation score")
                     
    plt.legend(loc="best")
    return plt

In [3]:
class NullColumnsCleaner(BaseEstimator, TransformerMixin):
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None):
        nulls = X.isnull().sum()
        nulls_keys = nulls[nulls > 50].keys()
        return X.drop(nulls_keys, axis=1).dropna()

In [311]:
data = pd.read_excel('Data.xlsm')
data = NullColumnsCleaner().fit_transform(data).reset_index(drop=True)
#data = data.dropna().reset_index(drop=True)

X = data.drop(["DisNeuro01",  "Stroke(AIS,TIA,AHS)"], axis=1)
y = data["DisNeuro01"].copy()
split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_index, test_index in split.split(X, y):
    strat_train = data.loc[train_index]
    strat_test = data.loc[test_index]
    
X_train = strat_train.drop(["DisNeuro01",  "Stroke(AIS,TIA,AHS)"], axis=1)
y_train = strat_train["DisNeuro01"].copy()
X_test = strat_test.drop(["DisNeuro01",  "Stroke(AIS,TIA,AHS)"], axis=1)
y_test = strat_test["DisNeuro01"].copy()

In [346]:
prep_pipe = Pipeline([('poly_feature', PolynomialFeatures(interaction_only=True)),]).fit(X_train)
X_train_prep = prep_pipe.transform(X_train)
X_test_prep = prep_pipe.transform(X_test)

clf = ExtraTreesClassifier()
clf = clf.fit(X_train_prep, y_train)
model = SelectFromModel(clf, prefit=True)
#model = SelectFpr(f_classif).fit(X_train_prep, y_train)
#model = SelectKBest(chi2, k=20).fit(X_train_prep, y_train)
X_new_train = model.transform(X_train_prep)
X_new_test = model.transform(X_test_prep)
print(X_new_train.shape)

(114, 47)


In [353]:
X_tr = X_new_train
X_te = X_new_test
model = Sequential()
model.add(Dense(40, input_dim=X_tr.shape[1], activation='relu'))
#model.add(Dropout(0.5))
#model.add(Dense(10, activation='relu'))
#model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_tr, y_train.values.reshape(-1, 1), epochs=100, verbose=0, validation_data=(X_te, y_test.values))
print(model.evaluate(X_te, y_test.values))

[0.8848273754119873, 0.62068963050842285]


In [354]:
def get_train_inputs():
    x = tf.constant(X_tr)
    y = tf.constant(y_train.values)

    return x, y

tf.logging.set_verbosity(tf.logging.ERROR)
feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X_tr)
dnn_clf = tf.contrib.learn.DNNClassifier(hidden_units=[40, 20], n_classes=2, feature_columns=feature_columns)
dnn_clf.fit(input_fn=get_train_inputs, steps=300)
dnn_clf.evaluate(X_te, y_test.values, steps=1)

  equality = a == b


{'accuracy': 0.48275861,
 'accuracy/baseline_label_mean': 0.48275861,
 'accuracy/threshold_0.500000_mean': 0.48275861,
 'auc': 0.54761904,
 'global_step': 300,
 'labels/actual_label_mean': 0.48275861,
 'labels/prediction_mean': 0.46441624,
 'loss': 1.9080863,
 'precision/positive_threshold_0.500000_mean': 0.46153846,
 'recall/positive_threshold_0.500000_mean': 0.42857143}

In [355]:
for layer in model.layers:
    print(layer.get_weights()[0])

[[ 0.07022304  0.01963894 -0.09562147 ...,  0.21451265 -0.10202125
  -0.29104653]
 [ 0.0274011   0.04265553 -0.17784394 ..., -0.19714531  0.04068512
   0.03098069]
 [ 0.30032614 -0.39405358 -0.12452837 ..., -0.3665016  -0.21119604
  -0.10556354]
 ..., 
 [-0.27000073  0.0360661   0.02590852 ...,  0.24353525 -0.16821069
   0.13587095]
 [ 0.21006039 -0.27518669 -0.23353732 ...,  0.08160054 -0.06464237
   0.07504508]
 [-0.23072854  0.14799185 -0.16450757 ..., -0.04847949  0.29932025
  -0.04206764]]
[[-0.1300237 ]
 [ 0.32955766]
 [ 0.37371755]
 [-0.11488052]
 [ 0.18006392]
 [-0.17435811]
 [ 0.51251978]
 [-0.31183073]
 [-0.40609482]
 [ 0.29470089]
 [ 0.23230675]
 [ 0.26352566]
 [-0.36774698]
 [ 0.43156052]
 [-0.32761279]
 [-0.19082861]
 [ 0.16531532]
 [-0.18332806]
 [-0.20882291]
 [ 0.23127972]
 [ 0.0962158 ]
 [ 0.64265615]
 [-0.04209156]
 [ 0.42647254]
 [-0.46566743]
 [-0.08309542]
 [ 0.12557921]
 [-0.27126643]
 [-0.19862708]
 [ 0.43681416]
 [-0.14535341]
 [-0.4568108 ]
 [ 0.29668903]
 [ 0.