In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

%matplotlib inline

def plot_svc(svc, X, y, h=0.02, pad=0.25):
    x_min, x_max = X[:, 0].min()-pad, X[:, 0].max()+pad
    y_min, y_max = X[:, 1].min()-pad, X[:, 1].max()+pad
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    Z = svc.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.2)

    plt.scatter(X[:,0], X[:,1], s=70, c=y, cmap=mpl.cm.Paired)
    sv = svc.support_vectors_
    plt.scatter(sv[:,0], sv[:,1], c='k', marker='x', s=100, linewidths='1')
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    plt.xlabel('X1')
    plt.ylabel('X2')
    plt.show()
    print('Number of support vectors: ', svc.support_.size)
    
from sklearn.metrics import auc
from sklearn.metrics import roc_curve
from sklearn import svm, datasets
from sklearn.metrics import plot_roc_curve
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split

In [None]:
from time import time
import timeit #imports timeit module
start_time = timeit.default_timer() #defines start time so computational time can be calculated
elapsed = timeit.default_timer() - start_time #gives total computation time
print("---Run time is %s seconds ---" % elapsed) #prints computation time
print()

from sklearn.svm import SVC
dfes = pd.read_csv('RiverRoad_Quan_RF.csv')
X = dfes.drop(['GRID_ID', 'wetland_type', 'FID_RiverRd_AOI', 'wetland'], axis = 1)
y = dfes.FID_RiverRd_AOI
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5, random_state=2)

In [3]:
svm3 = SVC(C=1, kernel='rbf', gamma=1)
svm3.fit(X_train, y_train)

SVC(C=1, gamma=1)

In [4]:
svm4 = SVC(C=1, kernel='rbf', gamma=50)
svm4.fit(X_train, y_train)

SVC(C=1, gamma=50)

In [None]:
start_time = timeit.default_timer()
y_train_score3 = svm3.decision_function(X_train)
y_train_score4 = svm4.decision_function(X_train)
elapsed = timeit.default_timer() - start_time

In [6]:
X

Unnamed: 0,qu_dem_MIN,qu_dem_MAX,qu_dem_RANGE,qu_dem_MEAN,qu_dem_STD,qu_dem_SUM,qu_dsm_MIN,qu_dsm_MAX,qu_dsm_RANGE,qu_dsm_MEAN,...,qu_curpl_RANGE,qu_curpl_MEAN,qu_curpl_STD,qu_curpl_SUM,qu_curpr_MIN,qu_curpr_MAX,qu_curpr_RANGE,qu_curpr_MEAN,qu_curpr_STD,qu_curpr_SUM
0,0.691847,1.731059,1.039212,1.117221,0.202755,313.939159,0.731374,6.256660,5.525286,2.557360,...,1240.556824,6.008817,201.975394,1688.477507,-561.943237,585.859314,1147.802551,-6.984654,173.043352,-1941.733846
1,0.641888,1.487661,0.845773,1.059745,0.147762,295.668776,0.661373,6.679267,6.017894,2.126747,...,1336.323364,-4.048045,150.061933,-1129.404510,-841.298767,679.957519,1521.256287,0.069758,168.483844,19.392675
2,0.770032,1.507396,0.737363,1.176038,0.160304,329.290651,0.770032,5.681332,4.911300,1.700724,...,1046.087708,-3.552538,157.071596,-994.710507,-614.981445,614.381775,1229.363220,-9.649778,201.707961,-2692.288038
3,0.761148,1.413489,0.652341,1.131694,0.115760,316.874198,0.761148,1.462370,0.701221,1.130406,...,1129.399506,-8.086345,157.114282,-2264.176594,-577.830017,633.895386,1211.725403,-1.244892,180.105412,-347.324849
4,0.696000,1.392386,0.696386,1.160470,0.116413,322.610787,0.667013,1.392386,0.725372,1.158858,...,1234.020294,-2.054687,159.943256,-571.203099,-574.029480,501.253113,1075.282593,-1.556956,170.969213,-435.947672
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8606,0.851000,1.676798,0.825798,1.409729,0.230505,390.494948,0.851000,1.676798,0.825798,1.409729,...,432.526062,0.385976,46.427991,106.915450,-692.625366,738.732422,1431.357788,4.547107,170.083954,1268.642730
8607,0.723952,1.662994,0.939042,1.425776,0.219366,396.365613,0.723952,1.662994,0.939042,1.425776,...,446.547195,-1.177780,47.524469,-327.422941,-390.115845,542.452454,932.568298,10.324390,134.407219,2890.829178
8608,0.836438,1.653542,0.817103,1.452142,0.180319,406.599629,0.836438,1.653542,0.817103,1.452142,...,509.295837,0.999254,51.975801,279.791020,-728.279236,753.877563,1482.156799,1.897032,165.294550,527.374799
8609,0.997941,1.636024,0.638083,1.461971,0.166600,406.427984,0.997941,1.636024,0.638083,1.461971,...,514.188934,-1.129348,43.513180,-313.958618,-622.526367,402.200531,1024.726898,4.975549,115.808141,1383.202760


In [None]:
random_state = np.random.RandomState(42)

cv = StratifiedKFold(n_splits=6)
classifier = svm.SVC(kernel='linear', probability=True,
                     random_state=random_state)

tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)

fig, ax = plt.subplots()
for i, (train, test) in enumerate(cv.split(X, y)):
   # print(i)
   # print(X.iloc[train,:])
   # print(y.iloc[train])
    classifier.fit(X.iloc[train,:], y.iloc[train])
    viz = plot_roc_curve(classifier, X.iloc[test,:], y.iloc[test],
                         name='ROC fold {}'.format(i),
                         alpha=0.3, lw=1, ax=ax)
    interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
    interp_tpr[0] = 0.0
    tprs.append(interp_tpr)
    aucs.append(viz.roc_auc)

ax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',
        label='Chance', alpha=.8)

mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
std_auc = np.std(aucs)
ax.plot(mean_fpr, mean_tpr, color='b',
        label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
        lw=2, alpha=.8)

std_tpr = np.std(tprs, axis=0)
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
ax.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,
                label=r'$\pm$ 1 std. dev.')

ax.set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05],
       title="Receiver operating characteristic example")
ax.legend(loc="lower right")
plt.show()

0
      qu_dem_MIN  qu_dem_MAX  qu_dem_RANGE  qu_dem_MEAN  qu_dem_STD  \
1244    0.733000    2.087574      1.354574     1.392465    0.197510   
1245    0.954206    1.483000      0.528794     1.113536    0.092421   
1246    0.739000    1.536478      0.797478     1.070050    0.197377   
1247    0.765000    1.755000      0.990000     1.294456    0.174349   
1248    1.014394    1.708059      0.693665     1.347289    0.131329   
...          ...         ...           ...          ...         ...   
8606    0.851000    1.676798      0.825798     1.409729    0.230505   
8607    0.723952    1.662994      0.939042     1.425776    0.219366   
8608    0.836438    1.653542      0.817103     1.452142    0.180319   
8609    0.997941    1.636024      0.638083     1.461971    0.166600   
8610    0.962564    1.649397      0.686833     1.506675    0.148577   

      qu_dem_SUM  qu_dsm_MIN  qu_dsm_MAX  qu_dsm_RANGE  qu_dsm_MEAN  ...  \
1244  389.890154    1.175613   15.378582     14.202969     2.706664  

In [None]:
false_pos_rate3, true_pos_rate3, _ = roc_curve(y_train, y_train_score3)
roc_auc3 = auc(false_pos_rate3, true_pos_rate3)

false_pos_rate4, true_pos_rate4, _ = roc_curve(y_train, y_train_score4)
roc_auc4 = auc(false_pos_rate4, true_pos_rate4)

fig, (ax1,ax2) = plt.subplots(1, 2, figsize=(14,6))
ax1.plot(false_pos_rate3, true_pos_rate3, label='SVM $\gamma = 1$ ROC curve (area = %0.2f)' % roc_auc3, color='b')
ax1.plot(false_pos_rate4, true_pos_rate4, label='SVM $\gamma = 50$ ROC curve (area = %0.2f)' % roc_auc4, color='r')
ax1.set_title('Training Data')

y_test_score3 = svm3.decision_function(X_test)
y_test_score4 = svm4.decision_function(X_test)

false_pos_rate3, true_pos_rate3, _ = roc_curve(y_test, y_test_score3)
roc_auc3 = auc(false_pos_rate3, true_pos_rate3)

false_pos_rate4, true_pos_rate4, _ = roc_curve(y_test, y_test_score4)
roc_auc4 = auc(false_pos_rate4, true_pos_rate4)

ax2.plot(false_pos_rate3, true_pos_rate3, label='SVM $\gamma = 1$ ROC curve (area = %0.2f)' % roc_auc3, color='b')
ax2.plot(false_pos_rate4, true_pos_rate4, label='SVM $\gamma = 50$ ROC curve (area = %0.2f)' % roc_auc4, color='r')
ax2.set_title('Test Data')

for ax in fig.axes:
    ax.plot([0, 1], [0, 1], 'k--')
    ax.set_xlim([-0.05, 1.0])
    ax.set_ylim([0.0, 1.05])
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.legend(loc="lower right")