<a href="https://colab.research.google.com/github/sahuongithub/My-Projects/blob/main/Crowdsourced_Mapping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load dataset
df = pd.read_csv('/content/CrowdSourced.csv')

# Replace null values with the mode of the respective column
df = df.fillna(df.mode().iloc[0])

# Split dataset into features and target variable
X = df.drop('Land_Class', axis=1)
y = df['Land_Class']

# Encode categorical variables
le = LabelEncoder()
for column in X.columns:
    if X[column].dtype == type(object):
        X[column] = le.fit_transform(X[column].astype(str))

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Build Decision Tree model
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

# Build K-nearest neighbors model
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

# Build SVM model
svm = SVC()
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

# Evaluate model performance
print('Decision Tree')
print('Accuracy:', accuracy_score(y_test, y_pred_dt))
print('Precision:', precision_score(y_test, y_pred_dt, average='weighted'))
print('Recall:', recall_score(y_test, y_pred_dt, average='weighted'))
print('F1-score:', f1_score(y_test, y_pred_dt, average='weighted'))
print('')

print('K-nearest neighbors')
print('Accuracy:', accuracy_score(y_test, y_pred_knn))
print('Precision:', precision_score(y_test, y_pred_knn, average='weighted'))
print('Recall:', recall_score(y_test, y_pred_knn, average='weighted'))
print('F1-score:', f1_score(y_test, y_pred_knn, average='weighted'))
print('')

print('SVM')
print('Accuracy:', accuracy_score(y_test, y_pred_svm))
print('Precision:', precision_score(y_test, y_pred_svm, average='weighted'))
print('Recall:', recall_score(y_test, y_pred_svm, average='weighted'))
print('F1-score:', f1_score(y_test, y_pred_svm, average='weighted'))


Decision Tree
Accuracy: 0.9633333333333334
Precision: 0.9647706042728167
Recall: 0.9633333333333334
F1-score: 0.9631866773258586

K-nearest neighbors
Accuracy: 0.7466666666666667
Precision: 0.6313610108303249
Recall: 0.7466666666666667
F1-score: 0.6782610907610908

SVM
Accuracy: 0.7566666666666667
Precision: 0.5725444444444445
Recall: 0.7566666666666667
F1-score: 0.6518532574320051


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


**Hyperparameter Tuning for Decision Tree**

In [11]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

# Define the hyperparameters to be tuned
param_grid = {'max_depth': [None, 10, 20, 30],
              'min_samples_split': [2, 5, 10],
              'min_samples_leaf': [1, 2, 4]}

# Create a Decision Tree classifier object
dtc = DecisionTreeClassifier(random_state=42)

# Use GridSearchCV to find the best hyperparameters
clf = GridSearchCV(dtc, param_grid, cv=5, scoring='accuracy')
clf.fit(X_train, y_train)

# Print the best hyperparameters
print("Best Hyperparameters:", clf.best_params_)

# Predict using the best estimator
y_pred = clf.best_estimator_.predict(X_test)

# Print the classification report
print("Classification Report:\n", classification_report(y_test, y_pred))




Best Hyperparameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2}
Classification Report:
               precision    recall  f1-score   support

    Farmland       1.00      1.00      1.00        19
      Forest       1.00      1.00      1.00       228
  Industrial       1.00      1.00      1.00         3
      Meadow       1.00      1.00      1.00         7
 Residential       1.00      1.00      1.00        14
       Water       1.00      1.00      1.00        29

    accuracy                           1.00       300
   macro avg       1.00      1.00      1.00       300
weighted avg       1.00      1.00      1.00       300



**Hyperparameter Tuning for K-nearest neighbors**

In [12]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

# Define the hyperparameters to be tuned
param_grid = {'n_neighbors': [3, 5, 7, 9],
              'weights': ['uniform', 'distance'],
              'metric': ['euclidean', 'manhattan']}

# Create a K-nearest neighbors classifier object
knn = KNeighborsClassifier()

# Use GridSearchCV to find the best hyperparameters
clf = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
clf.fit(X_train, y_train)

# Print the best hyperparameters
print("Best Hyperparameters:", clf.best_params_)

# Predict using the best estimator
y_pred = clf.best_estimator_.predict(X_test)

# Print the classification report
print("Classification Report:\n", classification_report(y_test, y_pred))




Best Hyperparameters: {'metric': 'manhattan', 'n_neighbors': 7, 'weights': 'distance'}
Classification Report:
               precision    recall  f1-score   support

    Farmland       0.50      0.21      0.30        19
      Forest       0.79      1.00      0.88       228
  Industrial       0.00      0.00      0.00         3
      Meadow       0.00      0.00      0.00         7
 Residential       0.00      0.00      0.00        14
       Water       0.50      0.03      0.06        29

    accuracy                           0.77       300
   macro avg       0.30      0.21      0.21       300
weighted avg       0.68      0.77      0.69       300



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


**Hyperparameter Tuning for SVM**

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

# Define the hyperparameters to be tuned
param_grid = {'C': [0.1, 1, 10],
              'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
              'degree': [2, 3, 4],
              'gamma': ['scale', 'auto']}

# Create a SVM classifier object
svm = SVC()

# Use GridSearchCV to find the best hyperparameters
clf = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy')
clf.fit(X_train, y_train)

# Print the best hyperparameters
print("Best Hyperparameters:", clf.best_params_)

# Predict using the best estimator
y_pred = clf.best_estimator_.predict(X_test)

# Print the classification report
print("Classification Report:\n", classification_report(y_test, y_pred))