# << part 1 Data Scrapping>>

## Libraries

In [None]:
import time
import urllib.request
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import TimeoutException, NoSuchElementException, StaleElementReferenceException ,  ElementClickInterceptedException

In [16]:
service = Service(ChromeDriverManager().install())
options = Options()
driver = Chrome(options, service)

In [17]:
urlPath = 'https://www.shutterstock.com/search/street-traffic-lights?image_type=photo'
driver.get(urlPath)

In [18]:
 #Scrolling the page till the end
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") 
time.sleep(5)      #Giving the time to fully load an image 

In [None]:
#extractng the images by class names
images = driver.find_elements(By.CLASS_NAME , "mui-t7xql4-a-inherit-link")
URL = []

#Outer Loop : Used to move to the next page in case the first one has reached to its end
for i in range(0,13): #accessing only 14 pages so that we will get approximately 1400 images 
 try:
   next_page = WebDriverWait(driver, 20).until(
    EC.presence_of_element_located((By.XPATH, '//*[@id="__next"]/div[3]/div/div/div[1]/div/div[6]/div[3]/div/div[1]/a'))   #accessing the "NEXT" button through XPATH
    )
   driver.execute_script("arguments[0].scrollIntoView();", next_page) #To Avoid ElementClickInterceptedException
   next_page.click()

 except TimeoutException:
    driver.back() #IF no button is found , skip to the next page

 for img in images:     #Inner Loop for going through every image and getting their URL so that they can be downloaded
    try:
        img.click()   #first click on the image to go to the next page
        
        WebDriverWait(driver, 20).until(
            EC.element_to_be_clickable((By.CLASS_NAME, "mui-1jn9gxg-link-disabled"))   #Waitng for the image on the next page to be loaded fully
        )

        image1 = driver.find_element(By.CLASS_NAME, "mui-1jn9gxg-link-disabled")  
        image1.click()                        #Click on the loaded image

        WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.CLASS_NAME, "mui-1fxui0y-image"))   #Waiting for the actual image to be fully loaded
        )

        actual_image = driver.find_element(By.CLASS_NAME, "mui-1fxui0y-image")   #Extracting the actual image
        URL.append(actual_image.get_attribute('src'))      #Extracting the "URL" of the actual image that is written in "src" attribute
    
    
        driver.back()  #Going back to the main page to repeat the process
    
    #Handling Exceptions
    except TimeoutException:                                                      
        driver.back()      #Skip to the next image if the element is not found after waiting for the image to be loaded by going back to the main page

    except NoSuchElementException:
        driver.back()       #Skip to the next image if the element is not found after waiting for the image to be loaded by going back to the main page



In [None]:
#Downloading the images from the URL's and storing them in jpg format
for i in range(0,len(URL)):    
    urllib.request.urlretrieve(str(URL[i]),"images/traffic_signals{}.jpg".format(i))

## Result of Selinium 

In [None]:
for filename in os.listdir(image_folder):
    if filename.endswith(".jpeg") or filename.endswith(".jpg") or filename.endswith(".png"):
        # Read the original image
        image_path = os.path.join(image_folder, filename)
        original_image = cv2.imread(image_path)

        # Determine orientation
        scale_factor = 1.2
        result_image = estimate_anchor_boxes(image_path, scale_factor)
        # Display the original and augmented images
        plt.imshow(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB))
        plt.title("Original Image")

        plt.show()

# <<  Part 2  Data Wrangling >>

##         Libraries

In [2]:
import cv2
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.cluster import KMeans

2023-11-25 22:29:51.878284: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


##  2.1 Data  Augmentation...

##  2.1.1 Calculate the image orientation

In [3]:
def determine_traffic_signal_orientation(image_path):
    # Read the image
    image = cv2.imread(image_path)

    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply GaussianBlur to reduce noise and improve contour detection
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)  #(5, 5): This tuple represents the size of the kernel used for blurring 
    #0: This parameter represents the standard deviation of the Gaussian kernel along the x and y directions

    # Use Canny edge detection 50: This is the lower threshold for the Canny edge detector 150: This is the upper threshold for the Canny edge detector
    edges = cv2.Canny(blurred, 50, 150)
#Pixels with gradient magnitudes above this threshold are considered strong edges. The gap between the lower and upper thresholds is used to identify pixels that are part of weak edges

    
    # Find contours in the edged image
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Iterate through the contours and determine the aspect ratio
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = float(w) / h

        # Adjust the aspect ratio threshold based on your dataset
        if aspect_ratio > 1.5:
            orientation = "Horizontal"
        else:
            orientation = "Vertical"

        return orientation
        #A bounding box is a rectangular box that encloses a region of interest or an object in an image.
    #Contours are the continuous curves that form the boundary of an object in an image

    return "Unable to determine orientation"

## 2.1.2 image Rotation based on orientation

In [4]:
def apply_orientation_aware_augmentation(image, orientation):
    datagen = ImageDataGenerator() #s a class provided by Keras that allows you to perform various image augmentations

    if orientation == 'Horizontal':
        datagen.horizontal_flip = True
        datagen.rotation_range = 10  # You can adjust the rotation range
    elif orientation == 'Vertical':
        datagen.vertical_flip = True
        datagen.rotation_range = 10

    # Reshape the image to (1, height, width, channels) as required by the flow method
    image = np.expand_dims(image, axis=0)

    # Generate augmented images
    augmented_images = datagen.flow(image, batch_size=1)

    # Retrieve the first augmented image
    augmented_image = augmented_images.next()[0].astype('uint8')

    return augmented_image

## 2.1.3 Increase Brightness and contrast

In [5]:
def adjust_brightness_contrast(image, brightness_factor=1.0, contrast_factor=1.0):
    # Convert the image to a float
    image_float = image.astype(float)

    # Adjust brightness
    image_float *= brightness_factor

    # Adjust contrast
    image_float = (image_float - 128) * contrast_factor + 128

    # Clip values to be in the valid range [0, 255]
    image_float = np.clip(image_float, 0, 255)  

    # Convert back to uint8
    adjusted_image = image_float.astype(np.uint8)

    return adjusted_image

## 2.1.3 Apply Zooming techniques

In [6]:
def apply_orientation_aware_zoom(image, orientation):
    zoom_factor = np.random.uniform(1.2, 1.5)  # Adjust as needed

    if orientation == 'Vertical':
        # Apply zooming more along the vertical axis
        zoomed_image = cv2.resize(image, None, fx=1, fy=zoom_factor)
    elif orientation == 'Horizontal':
        # Apply zooming more along the horizontal axis
        zoomed_image = cv2.resize(image, None, fx=zoom_factor, fy=1)
    else:
        # Apply zooming with equal scaling for other orientations
        zoomed_image = cv2.resize(image, None, fx=zoom_factor, fy=zoom_factor)
    return zoomed_image

## 2.1 Result of Augumented Images

In [None]:
image_folder = "Desktop/images"

# Iterate over each file in the folder
for filename in os.listdir(image_folder):
    if filename.endswith(".jpeg") or filename.endswith(".jpg") or filename.endswith(".png"):
        # Read the original image
        image_path = os.path.join(image_folder, filename)
        original_image = cv2.imread(image_path)

        # Determine orientation
        orientation = determine_traffic_signal_orientation(image_path)

        # Apply orientation-aware augmentation
        augmented_image = apply_orientation_aware_augmentation(original_image, orientation)

        # Adjust brightness and contrast
        brightness_factor = 1.5  # Adjust as needed
        contrast_factor = 1.2    # Adjust as needed
        adjusted_image = adjust_brightness_contrast(augmented_image, brightness_factor, contrast_factor)

        # Apply orientation-aware augmentation again (if needed)
        Zoomed_image = apply_orientation_aware_augmentation(adjusted_image, orientation)
        plt.figure(figsize=(15, 7))
        # Display the original and augmented images
        plt.subplot(1, 2, 1)
        plt.imshow(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB))
        plt.title("Original Image")

        plt.subplot(1, 2, 2)
        plt.imshow(cv2.cvtColor(Zoomed_image, cv2.COLOR_BGR2RGB))
        plt.title("Augmented Image")

        plt.show()

## 2.2  normalization

In [8]:
def pixel_normalization(image, color_space=cv2.COLOR_BGR2LAB):
    # Pixel normalization with scaling
    normalized_image = ((image.astype(np.float32) / 255.0) * 255).astype(np.uint8)
    normalized_image = cv2.cvtColor(normalized_image, color_space)
    return normalized_image

## Result of NOrmalization

In [None]:

for filename in os.listdir(image_folder):
    if filename.endswith(".jpeg") or filename.endswith(".jpg") or filename.endswith(".png"):
        # Read the original image
        image_path = os.path.join(image_folder, filename)
        original_image = cv2.imread(image_path)

        # Determine orientation
        normalized_pixel_image = pixel_normalization(original_image)
        plt.figure(figsize=(15, 7))
        # Display the original and augmented images
        plt.subplot(1, 2, 1)
        plt.imshow(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB))
        plt.title("Original Image")

        plt.subplot(1, 2, 2)
        plt.imshow(cv2.cvtColor(normalized_pixel_image, cv2.COLOR_BGR2RGB))
        plt.title("normalized_pixel_image")

        plt.show()

## 2.3 Anchor Boxes

In [16]:
def estimate_anchor_boxes(image_path,  scale_factor):
    # Read the color image
    image = cv2.imread(image_path)

    # Apply edge detection on the color image
    edges = cv2.Canny(image, 50, 150)

    # Find contours in the edge-detected image contours are simply the boundaries of an object with the same intensity or color in an image.
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Extract bounding boxes directly from contours
    bounding_boxes = [cv2.boundingRect(contour) for contour in contours]

    # Use the specified number of bounding boxes or the available ones if fewer sorts the boxes in descending order, based on the product of width and height of each bounding box 
    selected_boxes = sorted(bounding_boxes, key=lambda x: x[2] * x[3], reverse=True)
    num_boxes = min(3, len(selected_boxes))
    
    # Take the top 'num_boxes' boxes
    selected_boxes = selected_boxes[:num_boxes]

    # Scale the selected anchor boxes by the desired scale factor
    scaled_anchor_boxes = (np.array(selected_boxes) * scale_factor).astype(int)

    # Visualize the anchor boxes on the original image  (x, y): The coordinates of the top-left corner of the rectangle. (x + w, y + h): The coordinates of the bottom-right corner of the rectangle
    image_with_boxes = image.copy()
    for box in scaled_anchor_boxes:
        x, y, w, h = box
        cv2.rectangle(image_with_boxes, (x, y), (x + w, y + h), (0, 255, 0), 2)

    return image_with_boxes

## Result of Anchor BOxes

In [None]:
for filename in os.listdir(image_folder):
    if filename.endswith(".jpeg") or filename.endswith(".jpg") or filename.endswith(".png"):
        # Read the original image
        image_path = os.path.join(image_folder, filename)
        original_image = cv2.imread(image_path)

        # Determine orientation
        scale_factor = 1.2
        result_image = estimate_anchor_boxes(image_path, scale_factor)
        # Display the original and augmented images
        plt.figure(figsize=(15, 7))
        plt.subplot(1, 2, 1)
        plt.imshow(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB))
        plt.title("Original Image")

        plt.subplot(1, 2, 2)
        plt.imshow(cv2.cvtColor(result_image, cv2.COLOR_BGR2RGB))
        plt.title("Augmented Image")

        plt.show()