In [3]:
import os
import cv2
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [4]:
def load_images_from_folder(folder_path, label):
    images = []
    labels = []
    scaler = MinMaxScaler()
    
    for filename in os.listdir(folder_path):
        img_path = os.path.join(folder_path, filename)
        try:
            if os.path.isfile(img_path):
                img = cv2.imread(img_path)
                # Resize image to a fixed size (e.g., 64x64)
                img = cv2.resize(img, (64, 64))
                # Convert image to grayscale
                img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                # Flatten the image into a vector
                img_vector = img_gray.flatten()
                images.append(img_vector)
                labels.append(label)
        except Exception as e:
            print(f"Error processing image {img_path}: {str(e)}")
            pass  # Skip this image and continue with the next one
    
    # Normalize images using Min-Max scaling
    images = scaler.fit_transform(images)
    
    return images, labels

In [5]:
# Define paths to your image folders
building_folder='/home/merlingpu2/swapnil/assignment/dataset_full/Building'
forest_folder='/home/merlingpu2/swapnil/assignment/dataset_full/Forest'
glacier_folder='/home/merlingpu2/swapnil/assignment/dataset_full/Glacier'
mountains_folder='/home/merlingpu2/swapnil/assignment/dataset_full/Mountains'
sea_folder='/home/merlingpu2/swapnil/assignment/dataset_full/Sea'
streets_folder='/home/merlingpu2/swapnil/assignment/dataset_full/Streets'

In [6]:
building_images, building_labels = load_images_from_folder(building_folder, label=0)
forest_images, forest_labels = load_images_from_folder(forest_folder, label=1)
glacier_images, glacier_labels = load_images_from_folder(glacier_folder, label=2)
mountains_images, mountains_labels = load_images_from_folder(mountains_folder, label=3)
sea_images, sea_labels = load_images_from_folder(sea_folder, label=4)
streets_images, streets_labels = load_images_from_folder(streets_folder, label=5)

Error processing image /home/merlingpu2/swapnil/assignment/dataset_full/Building/.DS_Store: OpenCV(4.9.0) /io/opencv/modules/imgproc/src/resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'resize'

Error processing image /home/merlingpu2/swapnil/assignment/dataset_full/Glacier/.DS_Store: OpenCV(4.9.0) /io/opencv/modules/imgproc/src/resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'resize'

Error processing image /home/merlingpu2/swapnil/assignment/dataset_full/Mountains/.DS_Store: OpenCV(4.9.0) /io/opencv/modules/imgproc/src/resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'resize'

Error processing image /home/merlingpu2/swapnil/assignment/dataset_full/Sea/.DS_Store: OpenCV(4.9.0) /io/opencv/modules/imgproc/src/resize.cpp:4152: error: (-215:Assertion failed) !ssize.empty() in function 'resize'

Error processing image /home/merlingpu2/swapnil/assignment/dataset_full/Streets/.DS_Store: OpenCV(4.9.0) /io/opencv/m

In [7]:
# Concatenate images and labels for all classes
all_images = np.concatenate([building_images,forest_images,glacier_images,mountains_images,sea_images,streets_images], axis=0)
all_labels = np.concatenate([building_labels, forest_labels, glacier_labels,mountains_labels,sea_labels,streets_labels], axis=0)

In [8]:
# Shuffle the data
random_indices = np.random.permutation(len(all_images))
all_images = all_images[random_indices]
all_labels = all_labels[random_indices]

In [9]:
# Convert images and labels to numpy arrays
all_images = np.array(all_images)
all_labels = np.array(all_labels)

In [10]:
flat_images = all_images.reshape(all_images.shape[0], -1)

In [11]:
X_train, X_test, y_train, y_test = train_test_split(flat_images, all_labels, test_size=0.2, random_state=42)

In [51]:
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=42, splitter='best')

In [52]:
# Predict on the test set
y_pred = clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.4938036224976168


In [14]:
from sklearn.ensemble import RandomForestClassifier

In [15]:
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=42, verbose=0,
                       warm_start=False)

In [16]:
# Predict on the test set
y_pred = clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.6387035271687321


In [17]:
# Define the parameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt'],
    'bootstrap': [True, False]
}

# Instantiate the grid search model
grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42), param_grid=param_grid, 
                           cv=3, n_jobs=-1, verbose=2)

# Perform grid search
grid_search.fit(X_train, y_train)

Fitting 3 folds for each of 324 candidates, totalling 972 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   17.8s
[Parallel(n_jobs=-1)]: Done 130 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 333 tasks      | elapsed:  5.0min
[Parallel(n_jobs=-1)]: Done 616 tasks      | elapsed: 11.2min
[Parallel(n_jobs=-1)]: Done 972 out of 972 | elapsed: 18.9min finished


GridSearchCV(cv=3, error_score=nan,
             estimator=RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                              class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              max_samples=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators=100, n_jobs=None,
                                              oob_score=False, random_state=42,
                                  

In [18]:
# Get the best parameters
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

Best Parameters: {'bootstrap': False, 'max_depth': None, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 200}


In [19]:
# Use the best model for prediction
best_clf = grid_search.best_estimator_
y_pred = best_clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.6472831267874166


In [20]:
from sklearn.ensemble import GradientBoostingClassifier

# Build Gradient Boosting classifier
clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
clf.fit(X_train, y_train)

GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='deviance', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=100,
                           n_iter_no_change=None, presort='deprecated',
                           random_state=42, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [21]:
# Predict on the test set
y_pred = clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.6587225929456625


In [12]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.utils import to_categorical

In [13]:
import sys
print(sys.version)

3.7.6 (default, Jan  8 2020, 19:59:22) 
[GCC 7.3.0]


In [55]:
# Encode labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(all_labels)
num_classes = len(label_encoder.classes_)

In [56]:
# Convert images and labels to numpy arrays
images = np.array(all_images)
labels_encoded = np.array(labels_encoded)

In [65]:
X_train = X_train.reshape(-1, 64, 64, 1)
X_test = X_test.reshape(-1, 64, 64, 1)

In [58]:
# Normalize pixel values to range [0, 1]
X_train = X_train / 255.0
X_test = X_test / 255.0

In [59]:
# Convert labels to one-hot encoded vectors
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

In [14]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

In [40]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid for grid search
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100]}

# Perform grid search
grid_search = GridSearchCV(LogisticRegression(max_iter=1000), param_grid, cv=5, verbose=1)
grid_search.fit(X_train, y_train)

# Get the best model from grid search
best_model = grid_search.best_estimator_

# Predictions
train_predictions = best_model.predict(X_train)
test_predictions = best_model.predict(X_test)

# Calculate accuracies
train_accuracy = accuracy_score(y_train, train_predictions)
test_accuracy = accuracy_score(y_test, test_predictions)

print("Best Model Parameters:", grid_search.best_params_)
print("Train Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:  1.6min finished


Best Model Parameters: {'C': 100}
Train Accuracy: 0.6561010486177312
Test Accuracy: 0.6129647283126788


In [75]:
# Encode labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(all_labels)
num_classes = len(label_encoder.classes_)

# Convert images and labels to numpy arrays
X = np.array(all_images)
y = np.array(labels_encoded)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [76]:
# Normalize pixel values to range [0, 1]
X_train = X_train / 255.0
X_test = X_test / 255.0

# Build the neural network model
model = Sequential([
    Flatten(input_shape=(64, 64)),  # Input shape matches the shape of the flattened images
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(num_classes, activation='softmax')
])

In [77]:
# Compile the model
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


In [78]:
# Implement early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, 
                    validation_data=(X_test, y_test), callbacks=[early_stopping])

Epoch 1/100


ValueError: in user code:

    File "/home/merlingpu2/.local/lib/python3.7/site-packages/keras/engine/training.py", line 1249, in train_function  *
        return step_function(self, iterator)
    File "/home/merlingpu2/.local/lib/python3.7/site-packages/keras/engine/training.py", line 1233, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/merlingpu2/.local/lib/python3.7/site-packages/keras/engine/training.py", line 1222, in run_step  **
        outputs = model.train_step(data)
    File "/home/merlingpu2/.local/lib/python3.7/site-packages/keras/engine/training.py", line 1023, in train_step
        y_pred = self(x, training=True)
    File "/home/merlingpu2/.local/lib/python3.7/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/home/merlingpu2/.local/lib/python3.7/site-packages/keras/engine/input_spec.py", line 296, in assert_input_compatibility
        f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "sequential_6" is incompatible with the layer: expected shape=(None, 64, 64), found shape=(None, 4096)


In [15]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [16]:
# Build SVM model
svm_model = make_pipeline(StandardScaler(), SVC(kernel='linear'))

# Train SVM model
svm_model.fit(X_train, y_train)

Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svc',
                 SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None,
                     coef0=0.0, decision_function_shape='ovr', degree=3,
                     gamma='scale', kernel='linear', max_iter=-1,
                     probability=False, random_state=None, shrinking=True,
                     tol=0.001, verbose=False))],
         verbose=False)

In [17]:
# Predict on test data
y_pred = svm_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)

Test Accuracy: 0.7016205910390848


In [18]:
import pickle

# Save the SVM model to a file
with open('svm_model.pkl', 'wb') as f:
    pickle.dump(svm_model, f)

In [None]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid for grid search
param_grid = {
    'svc__C': [0.1, 1, 10],  # Regularization parameter
    'svc__gamma': [0.01, 0.1, 1],  # Kernel coefficient for 'rbf', 'poly', and 'sigmoid'
    'svc__kernel': ['linear', 'rbf', 'poly', 'sigmoid']  # Kernel type
}

# Create SVM model with pipeline
svm_model = make_pipeline(StandardScaler(), SVC())

# Create grid search object
grid_search = GridSearchCV(estimator=svm_model, param_grid=param_grid, cv=5, scoring='accuracy')

# Perform grid search
grid_search.fit(X_train, y_train)

# Best parameters
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

# Predict on test data with best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)
