In [51]:
import numpy as np
import os
from time import time
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
#from sklearn.datasets import make_moons, make_circles, make_classification
#from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

#from sklearn.preprocessing import OneHotEncoder
#from sklearn.preprocessing import LabelBinarizer

In [140]:
file = "Vegegtation indices - wheat rust AnMin.xlsx"
full_data = pd.read_excel(file, sheet_name='Vege indices ')
full_data.head()

Unnamed: 0,label,PRI,NDVI 761,NDVI 850,NDVI 705,GNDVI,TVI Triangular VG ind,RVI,CRI 550,CRI 700,TVI Transformed veg ind
0,0,-0.070534,0.728496,0.756927,0.263149,0.666534,14.8774,7.705382,13.006808,10.821075,1.108375
1,0,-0.074177,0.740989,0.769035,0.267875,0.679105,15.8752,8.230294,13.078137,10.778979,1.113997
2,0,-0.070585,0.763441,0.788329,0.277427,0.697424,17.3918,9.226846,13.315305,10.537926,1.124029
3,0,-0.069958,0.731135,0.758946,0.252463,0.6692,15.94,7.906004,11.73137,9.366585,1.109565
4,0,-0.074269,0.613729,0.660931,0.197407,0.601273,9.4168,4.777538,10.641968,10.913149,1.055334


In [141]:
label = full_data['label'].tolist()
label = np.asarray(label).reshape(-1,)
label


array([0, 0, 0, ..., 6, 6, 6])

In [142]:
cols = full_data.columns.tolist()
cols = cols[1:]
cols

['PRI',
 'NDVI 761',
 'NDVI 850',
 'NDVI 705',
 'GNDVI',
 'TVI Triangular VG ind',
 'RVI',
 'CRI 550',
 'CRI 700',
 'TVI Transformed veg ind']

## Put all data together to do classify

In [57]:
x = full_data.iloc[:,1:]
#x = x['PRI'].tolist()
#x = np.asarray(x).reshape(-1,1)
x_train, x_test, y_train, y_test = train_test_split(x, label, test_size=0.3, random_state=42)

### SVM

In [59]:
# #############################################################################
# Train a SVM classification model

print("Fitting the classifier to the training set")
t0 = time()
param_grid = {'kernel':['rbf','sigmoid'],'C': [1e2,5e2,1e3, 5e3, 1e4],
              'gamma': [0.001, 0.005, 0.01,0.05,0.1,0.2]}
print("Set Grid parameters")
clf = GridSearchCV(
    SVC(class_weight='balanced',probability=True), param_grid, cv=10)
print("Start to fit")
clf = clf.fit(x_train, y_train)
print("done in %0.3fs" % (time() - t0))
print("Best estimator found by grid search:")
print(clf.best_estimator_)

Fitting the classifier to the training set
Set Grid parameters
Start to fit




done in 508.769s
Best estimator found by grid search:
SVC(C=5000.0, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False)


In [60]:
clf.score(x_test,y_test)

0.4114441416893733

### Decision Tree

In [61]:
x_train, x_test, y_train, y_test = train_test_split(x, label, test_size=0.3, random_state=42)

In [62]:
print("Fitting the classifier to the training set")
t0 = time()
param_grid = {'max_depth': range(1,20), 'criterion':['gini','entropy']}
print("Set Grid parameters")
clf = GridSearchCV(
    DecisionTreeClassifier(), param_grid, cv=10)
print("Start to fit")
clf = clf.fit(x_train, y_train)
print("done in %0.3fs" % (time() - t0))
print("Best estimator found by grid search:")
print(clf.best_estimator_)

Fitting the classifier to the training set
Set Grid parameters
Start to fit
done in 4.608s
Best estimator found by grid search:
DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=7,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')




In [63]:
clf.score(x_test,y_test)

0.44141689373297005

## Classify by each Vegetation index

### SVM

In [143]:
x = full_data.iloc[:,1:]
x = x[cols[4]].tolist()
x = np.asarray(x).reshape(-1,1)
x_train, x_test, y_train, y_test = train_test_split(x, label, test_size=0.3, random_state=42)

In [144]:
# #############################################################################
# Train a SVM classification model

print("Fitting the classifier to the training set")
t0 = time()
param_grid = {'kernel':['rbf','sigmoid'],'C': [1e2,5e2,1e3, 5e3, 1e4],
              'gamma': [0.001, 0.005, 0.01,0.05,0.1,0.2]}
print("Set Grid parameters")
clf = GridSearchCV(
    SVC(class_weight='balanced',probability=True), param_grid, cv=10)
print("Start to fit")
clf = clf.fit(x_train, y_train)
print("done in %0.3fs" % (time() - t0))
print("Best estimator found by grid search:")
print(clf.best_estimator_)

Fitting the classifier to the training set
Set Grid parameters
Start to fit
done in 82.139s
Best estimator found by grid search:
SVC(C=10000.0, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.2, kernel='sigmoid',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False)




In [145]:
clf.score(x_test,y_test)

0.3651226158038147

In [146]:
result_scores = []
for col in cols:
    x = full_data.iloc[:,1:]
    x = x[col].tolist()
    x = np.asarray(x).reshape(-1,1)
    x_train, x_test, y_train, y_test = train_test_split(x, label, test_size=0.3, random_state=42)
          
    clf = SVC(kernel='sigmoid', gamma='auto')
    clf.fit(x_train, y_train)
    result = dict()
    result['Index'] = col
    result['score'] = clf.score(x_test,y_test)
    result_scores.append(result)

In [148]:
result_pd = pd.DataFrame(result_scores)
result_pd.to_excel("all stage each index scores.xlsx")

In [149]:
result_pd

Unnamed: 0,Index,score
0,PRI,0.354223
1,NDVI 761,0.354223
2,NDVI 850,0.354223
3,NDVI 705,0.354223
4,GNDVI,0.354223
5,TVI Triangular VG ind,0.354223
6,RVI,0.354223
7,CRI 550,0.354223
8,CRI 700,0.354223
9,TVI Transformed veg ind,0.354223


### Random Forest

In [71]:
result_scores = []
for col in cols:
    x = full_data.iloc[:,1:]
    x = x[col].tolist()
    x = np.asarray(x).reshape(-1,1)
    x_train, x_test, y_train, y_test = train_test_split(x, label, test_size=0.3, random_state=42)
          
    clf = RandomForestClassifier()
    clf.fit(x_train, y_train)
    result = dict()
    result['Index'] = col
    result['score'] = clf.score(x_test,y_test)
    result_scores.append(result)



In [72]:
result_pd = pd.DataFrame(result_scores)
result_pd

Unnamed: 0,Index,score
0,PRI,0.288828
1,NDVI 761,0.275204
2,NDVI 850,0.242507
3,NDVI 705,0.237057
4,GNDVI,0.247956
5,TVI Triangular VG ind,0.250681
6,RVI,0.302452
7,CRI 550,0.26703
8,CRI 700,0.250681
9,TVI Transformed veg ind,0.286104


## To classify each stage

In [74]:
health = full_data[full_data['label'] == 0]
stage_1 = full_data[full_data['label'] == 1]
stage_2 = full_data[full_data['label'] == 2]
stage_3 = full_data[full_data['label'] == 3]
stage_4 = full_data[full_data['label'] == 4]
stage_5 = full_data[full_data['label'] == 5]
stage_6 = full_data[full_data['label'] == 6]

### process stage 1

In [78]:
length = len(stage_1)
length

276

In [90]:
health_1 = health.sample(length)
stage_1 = pd.concat([health_1, stage_1])

### SVM 

In [136]:
label = stage_1['label'].tolist()
label = np.asarray(label).reshape(-1,)

result_scores_1 = []
for col in cols:
    x = stage_1.iloc[:,1:]
    x = x[col].tolist()
    x = np.asarray(x).reshape(-1,1)
    x_train, x_test, y_train, y_test = train_test_split(x, label, test_size=0.3, random_state=42)
          
    clf = SVC(kernel='sigmoid', gamma='auto')
    clf.fit(x_train, y_train)
    result = dict()
    result['Index'] = col
    result['score_1'] = clf.score(x_test,y_test)
    result_scores_1.append(result)

In [137]:
result_pd1 = pd.DataFrame(result_scores_1)
result_pd1

Unnamed: 0,Index,score
0,PRI,0.662651
1,NDVI 761,0.662651
2,NDVI 850,0.662651
3,NDVI 705,0.662651
4,GNDVI,0.662651
5,TVI Triangular VG ind,0.662651
6,RVI,0.662651
7,CRI 550,0.662651
8,CRI 700,0.662651
9,TVI Transformed veg ind,0.662651


### Random Forests

In [88]:
label = stage_1['label'].tolist()
label = np.asarray(label).reshape(-1,)

result_scores = []
for col in cols:
    x = stage_1.iloc[:,1:]
    x = x[col].tolist()
    x = np.asarray(x).reshape(-1,1)
    x_train, x_test, y_train, y_test = train_test_split(x, label, test_size=0.3, random_state=42)
          
    clf = RandomForestClassifier()
    clf.fit(x_train, y_train)
    result = dict()
    result['Index'] = col
    result['score'] = clf.score(x_test,y_test)
    result_scores.append(result)



In [89]:
result_pd = pd.DataFrame(result_scores)
result_pd

Unnamed: 0,Index,score
0,PRI,0.518072
1,NDVI 761,0.572289
2,NDVI 850,0.560241
3,NDVI 705,0.548193
4,GNDVI,0.590361
5,TVI Triangular VG ind,0.554217
6,RVI,0.566265
7,CRI 550,0.506024
8,CRI 700,0.524096
9,TVI Transformed veg ind,0.578313


### Process Stage 2

In [92]:
length = len(stage_2)
length

160

In [93]:
health_2 = health.sample(length)
stage_2 = pd.concat([health_2, stage_2])

### SVM

In [134]:
label = stage_2['label'].tolist()
label = np.asarray(label).reshape(-1,)

result_scores_2 = []
for col in cols:
    x = stage_2.iloc[:,1:]
    x = x[col].tolist()
    x = np.asarray(x).reshape(-1,1)
    x_train, x_test, y_train, y_test = train_test_split(x, label, test_size=0.3, random_state=42)
          
    clf = SVC(kernel='sigmoid', gamma='auto')
    clf.fit(x_train, y_train)
    result = dict()
    result['Index'] = col
    result['score_2'] = clf.score(x_test,y_test)
    result_scores_2.append(result)

In [135]:
result_pd2 = pd.DataFrame(result_scores_2)
result_pd2

Unnamed: 0,Index,score
0,PRI,0.479167
1,NDVI 761,0.677083
2,NDVI 850,0.666667
3,NDVI 705,0.479167
4,GNDVI,0.770833
5,TVI Triangular VG ind,0.479167
6,RVI,0.479167
7,CRI 550,0.479167
8,CRI 700,0.479167
9,TVI Transformed veg ind,0.479167


### Random Forest

In [96]:
label = stage_2['label'].tolist()
label = np.asarray(label).reshape(-1,)

result_scores = []
for col in cols:
    x = stage_2.iloc[:,1:]
    x = x[col].tolist()
    x = np.asarray(x).reshape(-1,1)
    x_train, x_test, y_train, y_test = train_test_split(x, label, test_size=0.3, random_state=42)
          
    clf = RandomForestClassifier()
    clf.fit(x_train, y_train)
    result = dict()
    result['Index'] = col
    result['score'] = clf.score(x_test,y_test)
    result_scores.append(result)



In [97]:
result_pd = pd.DataFrame(result_scores)
result_pd

Unnamed: 0,Index,score
0,PRI,0.520833
1,NDVI 761,0.645833
2,NDVI 850,0.645833
3,NDVI 705,0.614583
4,GNDVI,0.708333
5,TVI Triangular VG ind,0.645833
6,RVI,0.65625
7,CRI 550,0.479167
8,CRI 700,0.479167
9,TVI Transformed veg ind,0.645833


### Stage 3

In [98]:
length = len(stage_3)
length

129

In [99]:
health_3 = health.sample(length)
stage_3 = pd.concat([health_3, stage_3])

### SVM

In [130]:
label = stage_3['label'].tolist()
label = np.asarray(label).reshape(-1,)

result_scores_3 = []
for col in cols:
    x = stage_3.iloc[:,1:]
    x = x[col].tolist()
    x = np.asarray(x).reshape(-1,1)
    x_train, x_test, y_train, y_test = train_test_split(x, label, test_size=0.3, random_state=42)
          
    clf = SVC(kernel='sigmoid', gamma='auto')
    clf.fit(x_train, y_train)
    result = dict()
    result['Index'] = col
    result['score_3'] = clf.score(x_test,y_test)
    result_scores_3.append(result)

In [131]:
result_pd3 = pd.DataFrame(result_scores_3)
result_pd3

Unnamed: 0,Index,score
0,PRI,0.487179
1,NDVI 761,0.653846
2,NDVI 850,0.730769
3,NDVI 705,0.487179
4,GNDVI,0.653846
5,TVI Triangular VG ind,0.487179
6,RVI,0.487179
7,CRI 550,0.487179
8,CRI 700,0.487179
9,TVI Transformed veg ind,0.487179


### Stage 4

In [102]:
length = len(stage_4)
length

75

In [103]:
health_4 = health.sample(length)
stage_4 = pd.concat([health_4, stage_4])

### SVM

In [132]:
label = stage_4['label'].tolist()
label = np.asarray(label).reshape(-1,)

result_scores_4 = []
for col in cols:
    x = stage_4.iloc[:,1:]
    x = x[col].tolist()
    x = np.asarray(x).reshape(-1,1)
    x_train, x_test, y_train, y_test = train_test_split(x, label, test_size=0.3, random_state=42)
          
    clf = SVC(kernel='sigmoid', gamma='auto')
    clf.fit(x_train, y_train)
    result = dict()
    result['Index'] = col
    result['score_4'] = clf.score(x_test,y_test)
    result_scores_4.append(result)

In [133]:
result_pd4 = pd.DataFrame(result_scores_4)
result_pd4

Unnamed: 0,Index,score
0,PRI,0.4
1,NDVI 761,0.4
2,NDVI 850,0.4
3,NDVI 705,0.4
4,GNDVI,0.4
5,TVI Triangular VG ind,0.4
6,RVI,0.4
7,CRI 550,0.4
8,CRI 700,0.4
9,TVI Transformed veg ind,0.4


### Stage 5

In [106]:
length = len(stage_5)
length

94

In [107]:
health_5 = health.sample(length)
stage_5 = pd.concat([health_5, stage_5])

### SVM

In [124]:
label = stage_5['label'].tolist()
label = np.asarray(label).reshape(-1,)

result_scores_5 = []
for col in cols:
    x = stage_5.iloc[:,1:]
    x = x[col].tolist()
    x = np.asarray(x).reshape(-1,1)
    x_train, x_test, y_train, y_test = train_test_split(x, label, test_size=0.3, random_state=66)
          
    clf = SVC(kernel='sigmoid', gamma='auto')
    clf.fit(x_train, y_train)
    result = dict()
    result['Index'] = col
    result['score_5'] = clf.score(x_test,y_test)
    result_scores_5.append(result)

In [127]:
result_pd5 = pd.DataFrame(result_scores_5)
result_pd5

Unnamed: 0,Index,score_5
0,PRI,0.473684
1,NDVI 761,0.473684
2,NDVI 850,0.473684
3,NDVI 705,0.473684
4,GNDVI,0.473684
5,TVI Triangular VG ind,0.473684
6,RVI,0.473684
7,CRI 550,0.473684
8,CRI 700,0.473684
9,TVI Transformed veg ind,0.473684


### Stage 6

In [114]:
length = len(stage_6)
length

74

In [115]:
health_6 = health.sample(length)
stage_6 = pd.concat([health_6, stage_6])

### SVM

In [122]:
label = stage_6['label'].tolist()
label = np.asarray(label).reshape(-1,)

result_scores_6 = []
for col in cols:
    x = stage_6.iloc[:,1:]
    x = x[col].tolist()
    x = np.asarray(x).reshape(-1,1)
    x_train, x_test, y_train, y_test = train_test_split(x, label, test_size=0.3, random_state=11)
          
    clf = SVC(kernel='sigmoid', gamma='auto')
    clf.fit(x_train, y_train)
    result = dict()
    result['Index'] = col
    result['score_6'] = clf.score(x_test,y_test)
    result_scores_6.append(result)

In [126]:
result_pd6 = pd.DataFrame(result_scores_6)
result_pd6

Unnamed: 0,Index,score_6
0,PRI,0.488889
1,NDVI 761,0.888889
2,NDVI 850,0.866667
3,NDVI 705,0.488889
4,GNDVI,0.844444
5,TVI Triangular VG ind,0.488889
6,RVI,0.488889
7,CRI 550,0.488889
8,CRI 700,0.488889
9,TVI Transformed veg ind,0.488889


In [138]:
full_score = pd.concat([result_pd1,result_pd2,result_pd3,result_pd4,result_pd5,result_pd6], axis=1)
full_score

Unnamed: 0,Index,score,Index.1,score.1,Index.2,score.2,Index.3,score.3,Index.4,score_5,Index.5,score_6
0,PRI,0.662651,PRI,0.479167,PRI,0.487179,PRI,0.4,PRI,0.473684,PRI,0.488889
1,NDVI 761,0.662651,NDVI 761,0.677083,NDVI 761,0.653846,NDVI 761,0.4,NDVI 761,0.473684,NDVI 761,0.888889
2,NDVI 850,0.662651,NDVI 850,0.666667,NDVI 850,0.730769,NDVI 850,0.4,NDVI 850,0.473684,NDVI 850,0.866667
3,NDVI 705,0.662651,NDVI 705,0.479167,NDVI 705,0.487179,NDVI 705,0.4,NDVI 705,0.473684,NDVI 705,0.488889
4,GNDVI,0.662651,GNDVI,0.770833,GNDVI,0.653846,GNDVI,0.4,GNDVI,0.473684,GNDVI,0.844444
5,TVI Triangular VG ind,0.662651,TVI Triangular VG ind,0.479167,TVI Triangular VG ind,0.487179,TVI Triangular VG ind,0.4,TVI Triangular VG ind,0.473684,TVI Triangular VG ind,0.488889
6,RVI,0.662651,RVI,0.479167,RVI,0.487179,RVI,0.4,RVI,0.473684,RVI,0.488889
7,CRI 550,0.662651,CRI 550,0.479167,CRI 550,0.487179,CRI 550,0.4,CRI 550,0.473684,CRI 550,0.488889
8,CRI 700,0.662651,CRI 700,0.479167,CRI 700,0.487179,CRI 700,0.4,CRI 700,0.473684,CRI 700,0.488889
9,TVI Transformed veg ind,0.662651,TVI Transformed veg ind,0.479167,TVI Transformed veg ind,0.487179,TVI Transformed veg ind,0.4,TVI Transformed veg ind,0.473684,TVI Transformed veg ind,0.488889


In [139]:
full_score.to_excel('Vegegtation indices SVM scores.xlsx')