In [1]:
# Parameters
PROJECT_ROOT = "C:\\Users\\Owner\\OneDrive - University Of Houston\\Desktop\\projects\\vegetation-corridor-analysis"


In [2]:
#testing purpose
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import os

# If running from Streamlit (Papermill)
try:
    PROJECT_ROOT
except NameError:
    PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))

print("PROJECT_ROOT:", PROJECT_ROOT)

DATASET_PATH = os.path.join(PROJECT_ROOT, "dataset", "train", "train.csv")
# Load the dataset
dataset = pd.read_csv(DATASET_PATH)

# Encode the color labels numerically
label_encoder = LabelEncoder()
dataset['repeated_colour'] = label_encoder.fit_transform(dataset['repeated_colour'])

# Split features and target variable
X = dataset.drop(columns=['grid', 'repeated_colour'])
y = dataset['repeated_colour']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training with hyperparameter tuning
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Model evaluation
y_pred = rf_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Print classification report
# Get unique classes from the label encoder
unique_classes = label_encoder.classes_

# Print classification report with specified labels
#print(classification_report(y_test, y_pred, labels=range(len(unique_classes)), target_names=unique_classes, zero_division='warn'))

# Load the new dataset
TEST_DATA_PATH = os.path.join(
    PROJECT_ROOT,
    "dataset",
    "test_images",
    "image",
    "test.csv"
)

print("TEST PATH:", TEST_DATA_PATH)

new_data = pd.read_csv(TEST_DATA_PATH)
# Drop the target variable from the new dataset
new_data_features = new_data.drop(columns=['grid', 'repeated_colour'])

# Make predictions
new_predictions = rf_classifier.predict(new_data_features)

# Decode the numerical predictions back to color labels
decoded_predictions = label_encoder.inverse_transform(new_predictions)

# Extract grid values
grid_values = new_data['grid']

# Create a mapping of color labels to numerical representations
color_numerical_map = {label: i for i, label in enumerate(decoded_predictions)}


# Combine grid values with predicted colors
predictions_with_grid = pd.DataFrame({'grid': grid_values, 'predicted_color': decoded_predictions})

# Print the predictions with corresponding grid values
# Set display options to show all rows
pd.set_option('display.max_rows', None)

# Print the DataFrame
#print(predictions_with_grid)


PROJECT_ROOT: C:\Users\Owner\OneDrive - University Of Houston\Desktop\projects\vegetation-corridor-analysis


Accuracy: 0.9524986271279516
TEST PATH: C:\Users\Owner\OneDrive - University Of Houston\Desktop\projects\vegetation-corridor-analysis\dataset\test_images\image\test.csv


In [3]:
#validating purpose
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
TRAIN_PATH = os.path.join(
    PROJECT_ROOT,
    "dataset",
    "train",
    "train.csv"
)

print("TRAIN PATH:", TRAIN_PATH)
print("Exists?", os.path.exists(TRAIN_PATH))

dataset = pd.read_csv(TRAIN_PATH)
# Encode the color labels numerically
label_encoder = LabelEncoder()
dataset['repeated_colour'] = label_encoder.fit_transform(dataset['repeated_colour'])

# Split features and target variable
X = dataset.drop(columns=['grid', 'repeated_colour'])
y = dataset['repeated_colour']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training with hyperparameter tuning
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Model evaluation
y_pred = rf_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Print classification report
# Get unique classes from the label encoder
unique_classes = label_encoder.classes_

# Print classification report with specified labels
#print(classification_report(y_test, y_pred, labels=range(len(unique_classes)), target_names=unique_classes, zero_division='warn'))

# Load the new dataset
VALIDATION_PATH = os.path.join(
    PROJECT_ROOT,
    "dataset",
    "validation",
    "validation.csv"
)

print("VALIDATION PATH:", VALIDATION_PATH)
print("Exists?", os.path.exists(VALIDATION_PATH))

new_data = pd.read_csv(VALIDATION_PATH)
# Drop the target variable from the new dataset
new_data_features = new_data.drop(columns=['grid'])

# Make predictions
new_predictions = rf_classifier.predict(new_data_features)

# Decode the numerical predictions back to color labels
decoded_predictions = label_encoder.inverse_transform(new_predictions)

# Extract grid values
grid_values = new_data['grid']

# Create a mapping of color labels to numerical representations
color_numerical_map = {label: i for i, label in enumerate(decoded_predictions)}


# Combine grid values with predicted colors
predictions_with_grid = pd.DataFrame({'grid': grid_values, 'predicted_color': decoded_predictions})

# Store predictions_with_grid DataFrame into a CSV file
OUTPUT_PATH = os.path.join(
    PROJECT_ROOT,
    "dataset",
    "validation",
    "predictions_with_grid.csv"
)

print("Saving to:", OUTPUT_PATH)

predictions_with_grid.to_csv(OUTPUT_PATH, index=False)
# Print the predictions with corresponding grid values
# Set display options to show all rows
pd.set_option('display.max_rows', None)

# Print the DataFrame
#print(predictions_with_grid)


TRAIN PATH: C:\Users\Owner\OneDrive - University Of Houston\Desktop\projects\vegetation-corridor-analysis\dataset\train\train.csv
Exists? True


Accuracy: 0.9524986271279516
VALIDATION PATH: C:\Users\Owner\OneDrive - University Of Houston\Desktop\projects\vegetation-corridor-analysis\dataset\validation\validation.csv
Exists? True
Saving to: C:\Users\Owner\OneDrive - University Of Houston\Desktop\projects\vegetation-corridor-analysis\dataset\validation\predictions_with_grid.csv


In [4]:
# Define validation folder path
VALIDATION_DIR = os.path.join(
    PROJECT_ROOT,
    "dataset",
    "validation"
)

# Build file paths
VALIDATION_CSV_PATH = os.path.join(VALIDATION_DIR, "validation.csv")
PREDICTIONS_PATH = os.path.join(VALIDATION_DIR, "predictions_with_grid.csv")
MERGED_OUTPUT_PATH = os.path.join(VALIDATION_DIR, "merged_data.csv")

print("Validation path:", VALIDATION_CSV_PATH)
print("Predictions path:", PREDICTIONS_PATH)

# Load CSV files
csv_data = pd.read_csv(VALIDATION_CSV_PATH)
predictions_with_grid = pd.read_csv(PREDICTIONS_PATH)

# Merge on 'grid'
merged_data = pd.merge(csv_data, predictions_with_grid, on='grid')

# Save merged file (same folder, same name as before)
merged_data.to_csv(MERGED_OUTPUT_PATH, index=False)

print("merged_data.csv saved at:", MERGED_OUTPUT_PATH)

Validation path: C:\Users\Owner\OneDrive - University Of Houston\Desktop\projects\vegetation-corridor-analysis\dataset\validation\validation.csv
Predictions path: C:\Users\Owner\OneDrive - University Of Houston\Desktop\projects\vegetation-corridor-analysis\dataset\validation\predictions_with_grid.csv
merged_data.csv saved at: C:\Users\Owner\OneDrive - University Of Houston\Desktop\projects\vegetation-corridor-analysis\dataset\validation\merged_data.csv


In [5]:
import pandas as pd
import re
import cv2
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import colors

# Load the merged CSV file
# Build paths using PROJECT_ROOT

MERGED_DATA_PATH = os.path.join(
    PROJECT_ROOT,
    "dataset",
    "validation",
    "merged_data.csv"
)

IMAGE_PATH = os.path.join(
    PROJECT_ROOT,
    "dataset",
    "test_images",
    "image",
    "image.jpg"
)

print("Merged data path:", MERGED_DATA_PATH)
print("Image path:", IMAGE_PATH)

# Load files
merged_data = pd.read_csv(MERGED_DATA_PATH)
image = cv2.imread(IMAGE_PATH)

# Get the last row of the CSV file
last_row = merged_data.iloc[-1]

# Extract the grid coordinates from the last row
grid_coordinates = last_row[0].strip('"')

# Extract the grid row and column using a regular expression
match = re.match(r'Grid\((\d+), (\d+)\)', grid_coordinates)
grid_row, grid_col = map(int, match.groups())
#print(grid_row,grid_col)
#print(type(grid_row))
# Calculate the image height and width based on the grid coordinates
image_height = grid_row * 32
image_width = grid_col * 32
rw=grid_row+1
cl=grid_col+1
print(rw,cl)
# Define the grid size and spacing
grid_size = 32
grid_spacing = grid_size - 1

# Create a color map based on the color_ranges variable
color_map = {
    "black": (0, 0, 0),
    "white": (255, 255, 255),
    "Red": (255, 0, 0),
    "Orange": (255, 165, 0),
    "Inchworm": (143, 188, 143),
    "Lawn Green": (124, 252, 0),
    "Bright Green": (100, 221, 2),
    "Celadon": (172, 225, 238),
    "Pastel Green": (173, 255, 47),
    "Green": (0, 255, 0),
    "Pistachio": (179, 238, 225),
    "Dollar Bill": (255, 223, 0),
    "Asparagus": (154, 205, 50),
    "Dark Pastel Green": (154, 255, 154),
    "Camouflage Green": (100, 149, 237),
    "India Green": (34, 139, 34),
    "Blue": (0, 0, 255),
    "Indigo": (75, 0, 130),
    "Purple": (128, 0, 128),
    "Pink": (255, 192, 203),
    "Dark Olive Green": (85, 107, 47),
    "Rifle Green": (64, 128, 128),
    "RAINBOW": (255, 0, 255)
}

# Create an empty 3D array of size image_height*image_width
image_array = np.zeros((image_height, image_width, 3), dtype=np.uint8)

# Fill the 2D array with color indexes based on the predicted_color column
for index, row in merged_data.iterrows():
    grid_str = row['grid']
    match = re.match(r'Grid\((\d+), (\d+)\)', grid_str)
    if match:
        grid_x = int(match.group(1))
        grid_y = int(match.group(2))
        #print(grid_x,',',grid_y)
        if 0 <= grid_x < rw and 0 <= grid_y < cl:
            color_name = row['predicted_color']
            cv=color_map[color_name]
            #print(color_name,cv)
            image_array[grid_x*32:grid_x*32+32, grid_y*32:grid_y*32+32,:] = cv
        else:
            print(f"Grid coordinates ({grid_x}, {grid_y}) are outside the bounds of the grid")
    else:
        print(f"Could not extract grid coordinates from '{grid_str}'")
        
# Function to draw the grid on an image
def draw_grid(image):
    # Calculate the number of grid lines
    num_horizontal_lines = image.shape[0] // grid_spacing
    num_vertical_lines = image.shape[1] // grid_spacing

    # Draw the horizontal and vertical grid lines
    for i in range(num_horizontal_lines):
        start_point = (0, i * grid_spacing)
        end_point = (image.shape[1], i * grid_spacing)
        cv2.line(image, start_point, end_point, (255, 255, 255), 1)

    for i in range(num_vertical_lines):
        start_point = (i * grid_spacing, 0)
        end_point = (i * grid_spacing, image.shape[0])
        cv2.line(image, start_point, end_point, (255, 255, 255), 1)

        
# Draw the grid on both images        
draw_grid(image)
draw_grid(image_array)

# save the segemented Frame
OUTPUT_DIR = os.path.join(PROJECT_ROOT, "outputs")
os.makedirs(OUTPUT_DIR, exist_ok=True)

OUTPUT_PATH = os.path.join(OUTPUT_DIR, "segmented_frame.jpg")

plt.imsave(OUTPUT_PATH, image_array)

print("Segmented image saved at:", OUTPUT_PATH)

# Display the images
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.imshow(image)
plt.title("original")
plt.subplot(1, 2, 2)
plt.imshow(image_array)
plt.title("segemented_frame")
plt.show()

Merged data path: C:\Users\Owner\OneDrive - University Of Houston\Desktop\projects\vegetation-corridor-analysis\dataset\validation\merged_data.csv
Image path: C:\Users\Owner\OneDrive - University Of Houston\Desktop\projects\vegetation-corridor-analysis\dataset\test_images\image\image.jpg
10 21
Segmented image saved at: C:\Users\Owner\OneDrive - University Of Houston\Desktop\projects\vegetation-corridor-analysis\outputs\segmented_frame.jpg


  grid_coordinates = last_row[0].strip('"')
  plt.show()


In [6]:
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
# Load the CSV file
CLASSIFY_PATH = os.path.join(
    PROJECT_ROOT,
    "dataset",
    "classification",
    "classify.csv"
)

print("Classify path:", CLASSIFY_PATH)

data = pd.read_csv(CLASSIFY_PATH) # Replace 'your_csv_file.csv' with the path to your CSV file

# Define true labels and predicted labels
true_labels = data['original_image']
predicted_labels = data['segmented_output']

# Generate classification report
report = classification_report(true_labels, predicted_labels)
print("Classification Report:")
print(report)

# Generate confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)
print("\nConfusion Matrix:")
print(cm)

# Calculate true positive (TP), false positive (FP), true negative (TN), and false negative (FN)
TP = cm[1][1]
FP = cm[0][1]
TN = cm[0][0]
FN = cm[1][0]

# Calculate true positive rate (TPR), false positive rate (FPR), true negative rate (TNR), and false negative rate (FNR)
TPR = TP / (TP + FN)
FPR = FP / (FP + TN)
TNR = TN / (FP + TN)
FNR = FN / (TP + FN)

# Print TPR, FPR, TNR, and FNR
print("\nTrue Positive Rate (TPR):", TPR)
print("False Positive Rate (FPR):", FPR)
print("True Negative Rate (TNR):", TNR)
print("False Negative Rate (FNR):", FNR)

Classify path: C:\Users\Owner\OneDrive - University Of Houston\Desktop\projects\vegetation-corridor-analysis\dataset\classification\classify.csv
Classification Report:
                precision    recall  f1-score   support

non_vegetation       0.23      0.44      0.30        16
    vegetation       0.90      0.77      0.83       104

      accuracy                           0.72       120
     macro avg       0.56      0.60      0.56       120
  weighted avg       0.81      0.72      0.76       120


Confusion Matrix:
[[ 7  9]
 [24 80]]

True Positive Rate (TPR): 0.7692307692307693
False Positive Rate (FPR): 0.5625
True Negative Rate (TNR): 0.4375
False Negative Rate (FNR): 0.23076923076923078
