# 构建数据集

In [None]:
from sklearn import linear_model, metrics
from sklearn.cross_validation import train_test_split
import pandas as pd
import seaborn as sns
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

In [None]:
model_data = pd.read_csv("date_data.csv")
model_data.head()
Y = model_data["Dated"]
X = model_data.ix[ :,'income':'assets']
train_data, test_data, train_target, test_target = train_test_split(X, Y, test_size=0.2,random_state=0)

# 建模

In [None]:
logistic_model = linear_model.LogisticRegression()
logistic_model.fit(train_data, train_target)

In [None]:
test_est = logistic_model.predict(test_data)
train_est = logistic_model.predict(train_data)
test_est_p = logistic_model.predict_proba(test_data)[:,1]
train_est_p = logistic_model.predict_proba(train_data)[:,1]

# 决策（Decisions）类检验

In [None]:
print(metrics.classification_report(test_target, test_est))

In [None]:
metrics.accuracy_score(test_target, test_est)

# 排序（Rankings）类检验

### ROC曲线

In [None]:
fpr_test, tpr_test, th_test = metrics.roc_curve(test_target, test_est_p)
fpr_train, tpr_train, th_train = metrics.roc_curve(train_target, train_est_p)
plt.figure(figsize=[6,6])
plt.plot(fpr_test, tpr_test,color='red')
plt.plot(fpr_train, tpr_train,color='black')
plt.title('ROC curve')

In [None]:
test_AUC=metrics.roc_auc_score(test_target, test_est_p)
train_AUC=metrics.roc_auc_score(train_target, train_est_p)
print ("test_AUC:",test_AUC, "train_AUC:",train_AUC)

### KS曲线

In [None]:
test_x_axis = np.arange(len(fpr_test))/float(len(fpr_test))
train_x_axis = np.arange(len(fpr_train))/float(len(fpr_train))
plt.figure(figsize=[6,6])
plt.plot(fpr_test, test_x_axis, color='blue')
plt.plot(tpr_test, test_x_axis, color='red')
#plt.plot(fpr_train, train_x_axis, color=red)
#plt.plot(tpr_train, train_x_axis, color=red)
plt.title('KS curve')

In [None]:
from scipy.stats import ks_2samp
ks_2samp(fpr_test,tpr_test)