### ROC曲线
### 曲线分布在45度线和90度线之间，则代表模型有预测能力
## ROC评估模型

In [None]:
### ROC曲线只接受ture和false(1,0)
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn import preprocessing

iris = load_iris()
X = iris.data[50:150,]

### 后一百个target标签为1,2，用fit_transform将1,2转成0,1标签
le = preprocessing.LabelEncoder()
y = le.fit_transform(iris.target[50:150])
y

In [None]:
from sklearn.model_selection import train_test_split
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size = 0.33, random_state = 123)
clf = DecisionTreeClassifier()
clf.fit(train_X, train_y)

In [None]:
probas_ = clf.fit(train_X, train_y).predict_proba(test_X)
probas_[:, 1]
# probas_有两列，0列的值是0的话，1列的值就是1，所以得到ROC曲线时，取一列就可以l
 

In [None]:
from sklearn.metrics import roc_curve, auc
fpr, tpr, thresholds = roc_curve(test_y, probas_[:, 1])
#tpr

In [None]:
import matplotlib.pyplot as plt
## ROC曲线将fpr做X轴，tpr做Y轴
plt.plot(fpr, tpr, label='ROC curve')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()

In [None]:
## ROC曲线在45度和90度之间说明模型有预测能力
## 再用auc的大小来评判模型的预测能力高低
## AUC的值越接近1，它的预测能力更好
from sklearn.metrics import auc
roc_auc = auc(fpr, tpr)
print("Area under the ROC curve : %f" % roc_auc)

#### 评估不同模型

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

clf1 = DecisionTreeClassifier()
clf1.fit(train_X, train_y)

clf2 = SVC(probability=True)
clf2.fit(train_X, train_y)

clf3 = LogisticRegression()
clf3.fit(train_X, train_y)

clf4 = RandomForestClassifier()
clf4.fit(train_X, train_y)

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize= [20,10])
for clf, title in zip([clf1,clf2,clf3, clf4], ['Decision Tree', 'SVM', 'LogisticRegression', 'RandomForest']):
    probas_ = clf.fit(train_X, train_y).predict_proba(test_X)
    fpr, tpr, thresholds = roc_curve(test_y, probas_[:, 1])
    plt.plot(fpr, tpr, label='%s - AUC:%.2f'%(title, auc(fpr, tpr)) )
    
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.xlabel('False Positive Rate', fontsize = 20)
plt.ylabel('True Positive Rate', fontsize = 20)
plt.title('Receiver operating characteristic example', fontsize = 20)
plt.legend(loc="lower right", fontsize = 20)
plt.show()