In [53]:
# Python code to perform classification using K-Nearest Neighbour on the vegetation cluster datasets.

# Importing libraries for simple data handling:
import os

# For data handling:
import numpy as np
import pandas as pd
import geopandas as gpd

# For visualisation.
import matplotlib.pyplot as plt


# Libraries for k-NN.
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

In [54]:
# first import the data - we are going to use geopandas:

# present tree clusters

current_dir = os.path.dirname(os.path.dirname(os.getcwd()))

p_tree_cluster_path = current_dir + "\\Data\\Final\\Vegetation\\Clusters"

p_tree_clusters = gpd.read_file(p_tree_cluster_path)


In [55]:
# Looking at how much data is available to us.
print(p_tree_clusters.shape) # 88761 rows and 10 columns.

(88761, 10)


In [56]:
print(p_tree_clusters.head())

   layer  Acacia  Cstnstv  Eclypts  Pnspnst  Pinuspn  Qrcsrtn  Qrcssbr  \
0    1.0     1.0      0.0      1.0      1.0      0.0      0.0      0.0   
1    1.0     0.0      0.0      1.0      1.0      0.0      0.0      0.0   
2    1.0     1.0      0.0      1.0      1.0      0.0      0.0      0.0   
3    1.0     1.0      0.0      1.0      1.0      0.0      0.0      0.0   
4    1.0     1.0      0.0      1.0      1.0      0.0      0.0      0.0   

  cluster_id                                           geometry  
0          1  POLYGON ((194175.268 576088.229, 195175.268 57...  
1          2  POLYGON ((194175.268 575088.229, 195175.268 57...  
2          1  POLYGON ((190175.268 574088.229, 191175.268 57...  
3          1  POLYGON ((191175.268 574088.229, 192175.268 57...  
4          1  POLYGON ((192175.268 574088.229, 193175.268 57...  


In [57]:
print(p_tree_clusters.dtypes)

layer          float64
Acacia         float64
Cstnstv        float64
Eclypts        float64
Pnspnst        float64
Pinuspn        float64
Qrcsrtn        float64
Qrcssbr        float64
cluster_id      object
geometry      geometry
dtype: object


In [58]:
# seperate the data into features and target labels:

# features
xs = p_tree_clusters[['Acacia', 'Cstnstv', 'Eclypts', 'Pnspnst', 'Pinuspn', 'Qrcsrtn', 'Qrcssbr']]

# target label
ys = p_tree_clusters['cluster_id']

In [59]:
# check
print(xs.head())


   Acacia  Cstnstv  Eclypts  Pnspnst  Pinuspn  Qrcsrtn  Qrcssbr
0     1.0      0.0      1.0      1.0      0.0      0.0      0.0
1     0.0      0.0      1.0      1.0      0.0      0.0      0.0
2     1.0      0.0      1.0      1.0      0.0      0.0      0.0
3     1.0      0.0      1.0      1.0      0.0      0.0      0.0
4     1.0      0.0      1.0      1.0      0.0      0.0      0.0


In [60]:
print(ys.head())

0    1
1    2
2    1
3    1
4    1
Name: cluster_id, dtype: object


In [61]:
# train-test splitting and randomization.
from sklearn.model_selection import train_test_split

xs_train, xs_test, ys_train, ys_test = train_test_split(xs, ys, test_size=0.2, random_state=7)

In [62]:
print("\n ys_train: \n")
print(ys_train.head()) # checking


 ys_train: 

1322     2
84803    3
23058    9
81015    3
16175    8
Name: cluster_id, dtype: object


In [63]:
# feature scaling: Z-score normalisation.
# This is to ensure that the features are all scaled appropriately in order to prevent features with high scale ranges
# from throwing off the algorithm when making predictions:

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

In [64]:
# only scale the features (xs)

xs_train = scaler.fit_transform(xs_train)
xs_test = scaler.transform(xs_test)

In [65]:
# instatiating a KNN classifer object from scikit learn.
KNN = KNeighborsClassifier()

In [66]:
# we will also employ grid search cross validation.
# therefore, we will need to define a hyper-parameter grid.

param_grid = [{
    'weights': ["uniform", "distance"],
    'n_neighbors': range(1, 20),
    'metric':['euclidean', 'manhattan', 'cosine']}]

cv_mean = []

# hyperparameter tuning:

grid_search = GridSearchCV(KNN,
                           param_grid,
                           cv=14,
                           verbose=2)



In [67]:
# fitting this hyperparameter value to the training set.
grid_search.fit(xs_train, ys_train)

print(grid_search.best_estimator_)

Fitting 14 folds for each of 114 candidates, totalling 1596 fits
[CV] END ...metric=euclidean, n_neighbors=1, weights=uniform; total time=   1.2s
[CV] END ...metric=euclidean, n_neighbors=1, weights=uniform; total time=   1.2s
[CV] END ...metric=euclidean, n_neighbors=1, weights=uniform; total time=   1.1s
[CV] END ...metric=euclidean, n_neighbors=1, weights=uniform; total time=   1.1s
[CV] END ...metric=euclidean, n_neighbors=1, weights=uniform; total time=   1.1s
[CV] END ...metric=euclidean, n_neighbors=1, weights=uniform; total time=   1.1s
[CV] END ...metric=euclidean, n_neighbors=1, weights=uniform; total time=   1.1s
[CV] END ...metric=euclidean, n_neighbors=1, weights=uniform; total time=   1.1s
[CV] END ...metric=euclidean, n_neighbors=1, weights=uniform; total time=   1.1s
[CV] END ...metric=euclidean, n_neighbors=1, weights=uniform; total time=   1.1s
[CV] END ...metric=euclidean, n_neighbors=1, weights=uniform; total time=   1.1s
[CV] END ...metric=euclidean, n_neighbors=1,

[CV] END ..metric=euclidean, n_neighbors=4, weights=distance; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=4, weights=distance; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=4, weights=distance; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=4, weights=distance; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=4, weights=distance; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=4, weights=distance; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=4, weights=distance; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=4, weights=distance; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=4, weights=distance; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=4, weights=distance; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=4, weights=distance; total time=   0.7s
[CV] END ...metric=euclidean, n_neighbors=5, weights=uniform; total time=   0.8s
[CV] END ...metric=euclidean

[CV] END ...metric=euclidean, n_neighbors=8, weights=uniform; total time=   0.7s
[CV] END ...metric=euclidean, n_neighbors=8, weights=uniform; total time=   0.8s
[CV] END ...metric=euclidean, n_neighbors=8, weights=uniform; total time=   0.8s
[CV] END ...metric=euclidean, n_neighbors=8, weights=uniform; total time=   0.7s
[CV] END ...metric=euclidean, n_neighbors=8, weights=uniform; total time=   0.8s
[CV] END ...metric=euclidean, n_neighbors=8, weights=uniform; total time=   0.8s
[CV] END ...metric=euclidean, n_neighbors=8, weights=uniform; total time=   0.8s
[CV] END ..metric=euclidean, n_neighbors=8, weights=distance; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=8, weights=distance; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=8, weights=distance; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=8, weights=distance; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=8, weights=distance; total time=   0.7s
[CV] END ..metric=euclidean,

[CV] END .metric=euclidean, n_neighbors=11, weights=distance; total time=   0.7s
[CV] END .metric=euclidean, n_neighbors=11, weights=distance; total time=   0.7s
[CV] END .metric=euclidean, n_neighbors=11, weights=distance; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=12, weights=uniform; total time=   0.8s
[CV] END ..metric=euclidean, n_neighbors=12, weights=uniform; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=12, weights=uniform; total time=   0.8s
[CV] END ..metric=euclidean, n_neighbors=12, weights=uniform; total time=   0.8s
[CV] END ..metric=euclidean, n_neighbors=12, weights=uniform; total time=   0.8s
[CV] END ..metric=euclidean, n_neighbors=12, weights=uniform; total time=   0.8s
[CV] END ..metric=euclidean, n_neighbors=12, weights=uniform; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=12, weights=uniform; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=12, weights=uniform; total time=   0.7s
[CV] END ..metric=euclidean,

[CV] END .metric=euclidean, n_neighbors=15, weights=distance; total time=   0.7s
[CV] END .metric=euclidean, n_neighbors=15, weights=distance; total time=   0.7s
[CV] END .metric=euclidean, n_neighbors=15, weights=distance; total time=   0.7s
[CV] END .metric=euclidean, n_neighbors=15, weights=distance; total time=   0.7s
[CV] END .metric=euclidean, n_neighbors=15, weights=distance; total time=   0.7s
[CV] END .metric=euclidean, n_neighbors=15, weights=distance; total time=   0.7s
[CV] END .metric=euclidean, n_neighbors=15, weights=distance; total time=   0.7s
[CV] END .metric=euclidean, n_neighbors=15, weights=distance; total time=   0.7s
[CV] END .metric=euclidean, n_neighbors=15, weights=distance; total time=   0.7s
[CV] END .metric=euclidean, n_neighbors=15, weights=distance; total time=   0.7s
[CV] END .metric=euclidean, n_neighbors=15, weights=distance; total time=   0.7s
[CV] END .metric=euclidean, n_neighbors=15, weights=distance; total time=   0.7s
[CV] END .metric=euclidean, 

[CV] END ..metric=euclidean, n_neighbors=19, weights=uniform; total time=   0.8s
[CV] END ..metric=euclidean, n_neighbors=19, weights=uniform; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=19, weights=uniform; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=19, weights=uniform; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=19, weights=uniform; total time=   0.8s
[CV] END ..metric=euclidean, n_neighbors=19, weights=uniform; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=19, weights=uniform; total time=   0.7s
[CV] END ..metric=euclidean, n_neighbors=19, weights=uniform; total time=   0.8s
[CV] END ..metric=euclidean, n_neighbors=19, weights=uniform; total time=   0.8s
[CV] END .metric=euclidean, n_neighbors=19, weights=distance; total time=   0.7s
[CV] END .metric=euclidean, n_neighbors=19, weights=distance; total time=   0.7s
[CV] END .metric=euclidean, n_neighbors=19, weights=distance; total time=   0.7s
[CV] END .metric=euclidean, 

[CV] END ..metric=manhattan, n_neighbors=3, weights=distance; total time=   0.6s
[CV] END ..metric=manhattan, n_neighbors=3, weights=distance; total time=   0.6s
[CV] END ..metric=manhattan, n_neighbors=3, weights=distance; total time=   0.6s
[CV] END ..metric=manhattan, n_neighbors=3, weights=distance; total time=   0.6s
[CV] END ..metric=manhattan, n_neighbors=3, weights=distance; total time=   0.6s
[CV] END ...metric=manhattan, n_neighbors=4, weights=uniform; total time=   0.7s
[CV] END ...metric=manhattan, n_neighbors=4, weights=uniform; total time=   0.7s
[CV] END ...metric=manhattan, n_neighbors=4, weights=uniform; total time=   0.7s
[CV] END ...metric=manhattan, n_neighbors=4, weights=uniform; total time=   0.7s
[CV] END ...metric=manhattan, n_neighbors=4, weights=uniform; total time=   0.7s
[CV] END ...metric=manhattan, n_neighbors=4, weights=uniform; total time=   0.7s
[CV] END ...metric=manhattan, n_neighbors=4, weights=uniform; total time=   0.7s
[CV] END ...metric=manhattan

[CV] END ...metric=manhattan, n_neighbors=7, weights=uniform; total time=   0.7s
[CV] END ..metric=manhattan, n_neighbors=7, weights=distance; total time=   0.6s
[CV] END ..metric=manhattan, n_neighbors=7, weights=distance; total time=   0.6s
[CV] END ..metric=manhattan, n_neighbors=7, weights=distance; total time=   0.6s
[CV] END ..metric=manhattan, n_neighbors=7, weights=distance; total time=   0.6s
[CV] END ..metric=manhattan, n_neighbors=7, weights=distance; total time=   0.6s
[CV] END ..metric=manhattan, n_neighbors=7, weights=distance; total time=   0.6s
[CV] END ..metric=manhattan, n_neighbors=7, weights=distance; total time=   0.6s
[CV] END ..metric=manhattan, n_neighbors=7, weights=distance; total time=   0.6s
[CV] END ..metric=manhattan, n_neighbors=7, weights=distance; total time=   0.6s
[CV] END ..metric=manhattan, n_neighbors=7, weights=distance; total time=   0.6s
[CV] END ..metric=manhattan, n_neighbors=7, weights=distance; total time=   0.6s
[CV] END ..metric=manhattan,

[CV] END ..metric=manhattan, n_neighbors=11, weights=uniform; total time=   0.7s
[CV] END ..metric=manhattan, n_neighbors=11, weights=uniform; total time=   0.7s
[CV] END ..metric=manhattan, n_neighbors=11, weights=uniform; total time=   0.7s
[CV] END ..metric=manhattan, n_neighbors=11, weights=uniform; total time=   0.7s
[CV] END ..metric=manhattan, n_neighbors=11, weights=uniform; total time=   0.7s
[CV] END ..metric=manhattan, n_neighbors=11, weights=uniform; total time=   0.7s
[CV] END ..metric=manhattan, n_neighbors=11, weights=uniform; total time=   0.7s
[CV] END ..metric=manhattan, n_neighbors=11, weights=uniform; total time=   0.7s
[CV] END ..metric=manhattan, n_neighbors=11, weights=uniform; total time=   0.7s
[CV] END ..metric=manhattan, n_neighbors=11, weights=uniform; total time=   0.7s
[CV] END ..metric=manhattan, n_neighbors=11, weights=uniform; total time=   0.7s
[CV] END .metric=manhattan, n_neighbors=11, weights=distance; total time=   0.6s
[CV] END .metric=manhattan, 

[CV] END .metric=manhattan, n_neighbors=14, weights=distance; total time=   0.6s
[CV] END .metric=manhattan, n_neighbors=14, weights=distance; total time=   0.6s
[CV] END .metric=manhattan, n_neighbors=14, weights=distance; total time=   0.6s
[CV] END .metric=manhattan, n_neighbors=14, weights=distance; total time=   0.6s
[CV] END .metric=manhattan, n_neighbors=14, weights=distance; total time=   0.6s
[CV] END .metric=manhattan, n_neighbors=14, weights=distance; total time=   0.6s
[CV] END .metric=manhattan, n_neighbors=14, weights=distance; total time=   0.6s
[CV] END ..metric=manhattan, n_neighbors=15, weights=uniform; total time=   0.7s
[CV] END ..metric=manhattan, n_neighbors=15, weights=uniform; total time=   0.7s
[CV] END ..metric=manhattan, n_neighbors=15, weights=uniform; total time=   0.7s
[CV] END ..metric=manhattan, n_neighbors=15, weights=uniform; total time=   0.7s
[CV] END ..metric=manhattan, n_neighbors=15, weights=uniform; total time=   0.7s
[CV] END ..metric=manhattan,

[CV] END ..metric=manhattan, n_neighbors=18, weights=uniform; total time=   0.7s
[CV] END ..metric=manhattan, n_neighbors=18, weights=uniform; total time=   0.7s
[CV] END ..metric=manhattan, n_neighbors=18, weights=uniform; total time=   0.7s
[CV] END .metric=manhattan, n_neighbors=18, weights=distance; total time=   0.6s
[CV] END .metric=manhattan, n_neighbors=18, weights=distance; total time=   0.6s
[CV] END .metric=manhattan, n_neighbors=18, weights=distance; total time=   0.6s
[CV] END .metric=manhattan, n_neighbors=18, weights=distance; total time=   0.6s
[CV] END .metric=manhattan, n_neighbors=18, weights=distance; total time=   0.6s
[CV] END .metric=manhattan, n_neighbors=18, weights=distance; total time=   0.6s
[CV] END .metric=manhattan, n_neighbors=18, weights=distance; total time=   0.6s
[CV] END .metric=manhattan, n_neighbors=18, weights=distance; total time=   0.6s
[CV] END .metric=manhattan, n_neighbors=18, weights=distance; total time=   0.6s
[CV] END .metric=manhattan, 

[CV] END ......metric=cosine, n_neighbors=3, weights=uniform; total time=   2.7s
[CV] END ......metric=cosine, n_neighbors=3, weights=uniform; total time=   2.7s
[CV] END ......metric=cosine, n_neighbors=3, weights=uniform; total time=   2.7s
[CV] END ......metric=cosine, n_neighbors=3, weights=uniform; total time=   2.8s
[CV] END ......metric=cosine, n_neighbors=3, weights=uniform; total time=   2.7s
[CV] END ......metric=cosine, n_neighbors=3, weights=uniform; total time=   2.7s
[CV] END ......metric=cosine, n_neighbors=3, weights=uniform; total time=   2.7s
[CV] END ......metric=cosine, n_neighbors=3, weights=uniform; total time=   2.7s
[CV] END ......metric=cosine, n_neighbors=3, weights=uniform; total time=   2.7s
[CV] END ......metric=cosine, n_neighbors=3, weights=uniform; total time=   2.7s
[CV] END ......metric=cosine, n_neighbors=3, weights=uniform; total time=   2.7s
[CV] END ......metric=cosine, n_neighbors=3, weights=uniform; total time=   2.7s
[CV] END ......metric=cosine

[CV] END .....metric=cosine, n_neighbors=6, weights=distance; total time=   4.3s
[CV] END .....metric=cosine, n_neighbors=6, weights=distance; total time=   3.7s
[CV] END .....metric=cosine, n_neighbors=6, weights=distance; total time=   4.1s
[CV] END .....metric=cosine, n_neighbors=6, weights=distance; total time=   4.4s
[CV] END .....metric=cosine, n_neighbors=6, weights=distance; total time=   4.4s
[CV] END .....metric=cosine, n_neighbors=6, weights=distance; total time=   4.4s
[CV] END .....metric=cosine, n_neighbors=6, weights=distance; total time=   4.4s
[CV] END .....metric=cosine, n_neighbors=6, weights=distance; total time=   4.4s
[CV] END .....metric=cosine, n_neighbors=6, weights=distance; total time=   4.1s
[CV] END ......metric=cosine, n_neighbors=7, weights=uniform; total time=   4.2s
[CV] END ......metric=cosine, n_neighbors=7, weights=uniform; total time=   4.3s
[CV] END ......metric=cosine, n_neighbors=7, weights=uniform; total time=   4.6s
[CV] END ......metric=cosine

[CV] END .....metric=cosine, n_neighbors=10, weights=uniform; total time=   4.4s
[CV] END .....metric=cosine, n_neighbors=10, weights=uniform; total time=   4.5s
[CV] END .....metric=cosine, n_neighbors=10, weights=uniform; total time=   4.4s
[CV] END .....metric=cosine, n_neighbors=10, weights=uniform; total time=   4.4s
[CV] END .....metric=cosine, n_neighbors=10, weights=uniform; total time=   4.3s
[CV] END ....metric=cosine, n_neighbors=10, weights=distance; total time=   4.1s
[CV] END ....metric=cosine, n_neighbors=10, weights=distance; total time=   4.3s
[CV] END ....metric=cosine, n_neighbors=10, weights=distance; total time=   4.4s
[CV] END ....metric=cosine, n_neighbors=10, weights=distance; total time=   4.3s
[CV] END ....metric=cosine, n_neighbors=10, weights=distance; total time=   4.2s
[CV] END ....metric=cosine, n_neighbors=10, weights=distance; total time=   4.2s
[CV] END ....metric=cosine, n_neighbors=10, weights=distance; total time=   3.7s
[CV] END ....metric=cosine, 

[CV] END ....metric=cosine, n_neighbors=13, weights=distance; total time=   4.1s
[CV] END .....metric=cosine, n_neighbors=14, weights=uniform; total time=   4.2s
[CV] END .....metric=cosine, n_neighbors=14, weights=uniform; total time=   4.3s
[CV] END .....metric=cosine, n_neighbors=14, weights=uniform; total time=   4.4s
[CV] END .....metric=cosine, n_neighbors=14, weights=uniform; total time=   4.4s
[CV] END .....metric=cosine, n_neighbors=14, weights=uniform; total time=   4.4s
[CV] END .....metric=cosine, n_neighbors=14, weights=uniform; total time=   4.3s
[CV] END .....metric=cosine, n_neighbors=14, weights=uniform; total time=   3.8s
[CV] END .....metric=cosine, n_neighbors=14, weights=uniform; total time=   4.2s
[CV] END .....metric=cosine, n_neighbors=14, weights=uniform; total time=   4.5s
[CV] END .....metric=cosine, n_neighbors=14, weights=uniform; total time=   4.4s
[CV] END .....metric=cosine, n_neighbors=14, weights=uniform; total time=   4.5s
[CV] END .....metric=cosine,

[CV] END ....metric=cosine, n_neighbors=17, weights=distance; total time=   4.3s
[CV] END ....metric=cosine, n_neighbors=17, weights=distance; total time=   4.3s
[CV] END ....metric=cosine, n_neighbors=17, weights=distance; total time=   4.2s
[CV] END ....metric=cosine, n_neighbors=17, weights=distance; total time=   3.7s
[CV] END ....metric=cosine, n_neighbors=17, weights=distance; total time=   4.1s
[CV] END ....metric=cosine, n_neighbors=17, weights=distance; total time=   4.4s
[CV] END ....metric=cosine, n_neighbors=17, weights=distance; total time=   4.4s
[CV] END ....metric=cosine, n_neighbors=17, weights=distance; total time=   4.4s
[CV] END ....metric=cosine, n_neighbors=17, weights=distance; total time=   4.4s
[CV] END ....metric=cosine, n_neighbors=17, weights=distance; total time=   4.4s
[CV] END ....metric=cosine, n_neighbors=17, weights=distance; total time=   4.1s
[CV] END .....metric=cosine, n_neighbors=18, weights=uniform; total time=   4.1s
[CV] END .....metric=cosine,

In [68]:
# evaluating the accuracy based on the test set.
ys_pred = grid_search.predict(xs_test)
acc_s = accuracy_score(ys_test, ys_pred)

# evaluating the accuracy on the training set.
train_pred = grid_search.predict(xs_train)
train_s = accuracy_score(ys_train, train_pred)

In [69]:
print("\nTraining set accuracy: " + str(train_s) + "\n")
print("\nTesting set accuracy: " + str(acc_s) + "\n")


Training set accuracy: 1.0


Testing set accuracy: 1.0



In [70]:
# once the model has been trained, we can go ahead and apply it to our future dataset to identify these clusters:

# opening the future dataset.
f_tree_cluster_path = current_dir + "\\Data\\Intermediate\\Vegetation\\Combined\\future_combined.shp"
f_tree_clusters = gpd.read_file(f_tree_cluster_path)

# and we will also define the output right now:
f_tree_output_path = current_dir + "\\Data\\Final\\Vegetation\\Clusters\\future_tree_clusters.shp"

In [71]:
# select the features from the new dataset.
f_xs = f_tree_clusters[['Acacia', 'Cstnstv', 'Eclypts', 'Pnspnst', 'Pinuspn', 'Qrcsrtn', 'Qrcssbr']]

# scale the features in the same way as the training data.
f_xs = scaler.transform(f_xs)

# use the model to make predictions on the new dataset.
f_ys_pred = grid_search.predict(f_xs)

# add these predictions to your new dataframe.
f_tree_clusters['cluster_id'] = f_ys_pred

# and finally, save the output.
f_tree_clusters.to_file(f_tree_output_path)

print("Classification complete")

Classification complete
