# Dataset Creation

In [None]:
import pandas as pd
import numpy as np
import os
import urllib.request
import math

def get_tile_coordinates(lat1, lng1, lat2, lng2, zoom):
    tile_size = 256
    num_tiles_x = 1 << zoom
    num_tiles_y = 1 << zoom

    x1, y1 = get_tile(lat1, lng1, zoom)
    x2, y2 = get_tile(lat2, lng2, zoom)

    min_x, max_x = min(x1, x2), max(x1, x2)
    min_y, max_y = min(y1, y2), max(y1, y2)

    return [(x, y) for x in range(min_x, max_x + 1) for y in range(min_y, max_y + 1)]
def get_tile(lat, lng, zoom):
    tile_size = 256
    num_tiles = 1 << zoom

    point_x = (tile_size / 2 + lng * tile_size / 360.0) * num_tiles // tile_size
    sin_y = math.sin(lat * (math.pi / 180.0))
    point_y = ((tile_size / 2) + 0.5 * math.log((1 + sin_y) / (1 - sin_y)) * -(tile_size / (2 * math.pi))) * num_tiles // tile_size

    return int(point_x), int(point_y)

def download_tiles(coordinates, zoom, directory):
    os.makedirs(directory, exist_ok=True)

    for x, y in coordinates:
        lat, lng = get_lat_lng_for_tile(x, y, zoom)
        url = f'https://mt0.google.com/vt?lyrs=s&x={x}&y={y}&z={zoom}'
        filename = os.path.join(directory, f'tile_{lat}_{lng}_{zoom}.png')

        # Download the tile
        urllib.request.urlretrieve(url, filename)

def get_lat_lng_for_tile(x, y, zoom):
    tile_size = 256
    num_tiles = 1 << zoom

    lng = x / num_tiles * 360.0 - 180
    n = math.pi - 2 * math.pi * y / num_tiles
    lat = 180.0 / math.pi * math.atan(0.5 * (math.exp(n) - math.exp(-n)))

    return lat, lng

def get_dataframe(tile_coordinates, zoom):
    # Create a dataframe to hold the coordinates
    df = pd.DataFrame(columns=['lat', 'lng', 'filename'])

    # Iterate over the tile coordinates
    for x, y in tile_coordinates:
        # Calculate the latitudes and longitudes of the tile
        lat, lng = get_lat_lng_for_tile(x, y, zoom)
        filename = f'tile_{lat}_{lng}_{zoom}.png'

        # Add the coordinates and filename to the dataframe
        df = df.append({'lat': lat, 'lng': lng, 'filename': filename}, ignore_index=True)

    return df

def main():
    # Coordinates for the opposite corners of the rectangular area
    lat1, lng1 = 32.013533269028436, 35.92912375369814
    lat2, lng2 = 31.988425707953432, 35.95037724675171
    zoom_level = 19  # Adjust the zoom level as needed
    output_directory = "test3"  # Specify the directory name

    # Calculate tile coordinates
    tile_coordinates = get_tile_coordinates(lat1, lng1, lat2, lng2, zoom_level)

    # Get dataframe
    df = get_dataframe(tile_coordinates, zoom_level)
    df.to_csv('test3.csv')
    # Download tiles
    download_tiles(tile_coordinates, zoom_level, output_directory)

if __name__ == "__main__":
    main()

# Labeling Images for PV Solar Panel Classification

In [None]:
# Install dependencies
!pip install pyqt5 lxml
%conda create --name=labelme python=3
%conda activate labelme
%pip install labelme

In [None]:
!labelme

# Splitting Dataset
Splitting the labeled images into a train, test, and validation sets

In [None]:
def splitter(ValTestRatio):
  import os
  import shutil
  from sklearn.model_selection import train_test_split

  !rm -r /content/Dataset/Dataset/Images/train
  !rm -r /content/Dataset/Dataset/Images/val
  !rm -r /content/Dataset/Dataset/Images/test


  # Define your dataset directories
  source_images_dir = '/content/Dataset/Dataset/Images'
  source_labels_dir = '/content/Dataset/Dataset/Labels'
  train_images_dir = '/content/Dataset/Dataset/Images/train'
  val_images_dir = '/content/Dataset/Dataset/Images/val'
  test_images_dir = '/content/Dataset/Dataset/Images/test'

  # Create directories for train, validation, and test sets
  os.makedirs(train_images_dir, exist_ok=True)
  os.makedirs(val_images_dir, exist_ok=True)
  os.makedirs(test_images_dir, exist_ok=True)

  # Get all image filenames (assuming they are PNG files)
  all_images = [f for f in os.listdir(source_images_dir) if f.endswith('.png')]

  # Split the dataset into train, validation, and test sets
  train_images, remaining_images = train_test_split(all_images, test_size=ValTestRatio*2, random_state=42)
  val_images, test_images = train_test_split(remaining_images, test_size=0.5, random_state=42)

  # Copy image files and corresponding label files to their respective directories
  for image in train_images:
      shutil.copy(os.path.join(source_images_dir, image), train_images_dir)
      # Copy corresponding label file
      label_file = image.replace('.png', '.txt')
      shutil.copy(os.path.join(source_labels_dir, label_file), train_images_dir)

  for image in val_images:
      shutil.copy(os.path.join(source_images_dir, image), val_images_dir)
      # Copy corresponding label file
      label_file = image.replace('.png', '.txt')
      shutil.copy(os.path.join(source_labels_dir, label_file), val_images_dir)

  for image in test_images:
      shutil.copy(os.path.join(source_images_dir, image), test_images_dir)
      # Copy corresponding label file
      label_file = image.replace('.png', '.txt')
      shutil.copy(os.path.join(source_labels_dir, label_file), test_images_dir)
      import os

  import yaml

  # Define the data
  data = {
      'train': '/content/Dataset/Dataset/Images/train',
      'val': '/content/Dataset/Dataset/Images/val',
      'test': '/content/Dataset/Dataset/Images/test',
      'nc': 1,
      'names': ['0']  # 'names' should be a list of class names
  }

  # Write the data to a YAML file
  with open('/content/yolov5/dataset.yaml', 'w') as file:
      yaml.dump(data, file)

  # Define the paths to your train, val, and test folders
  train_folder = '/content/Dataset/Dataset/Images/train'
  val_folder = '/content/Dataset/Dataset/Images/val'
  test_folder = '/content/Dataset/Dataset/Images/test'

  # Function to count files and divide by two
  def count_and_divide_by_two(folder):
      file_count = len(os.listdir(folder))
      divided_count = file_count // 2
      return divided_count, divided_count / file_count if file_count > 0 else 0

  # Count and divide by two for each folder
  train_count, train_ratio = count_and_divide_by_two(train_folder)
  val_count, val_ratio = count_and_divide_by_two(val_folder)
  test_count, test_ratio = count_and_divide_by_two(test_folder)

  # Print the results
  print(f"Number of images for train: {train_count}, Ratio: {train_count/(train_count+val_count+test_count)}")
  print(f"Number of images for val: {val_count}, Ratio: {val_count/(train_count+val_count+test_count)}")
  print(f"Number of images for test: {test_count}, Ratio: {test_count/(train_count+val_count+test_count)}")

  import glob
  import os

  def count_objects(path):
    count = 0
    for file in glob.glob(os.path.join(path, "*.txt")):
      with open(file) as f:
        count += len(f.readlines())
    return count

  print(f"Number of objects in test set: {count_objects('/content/Dataset/Dataset/Images/test')}")
  print(f"Number of objects in validation set: {count_objects('/content/Dataset/Dataset/Images/val')}")
  print(f"Number of objects in training set: {count_objects('/content/Dataset/Dataset/Images/train')}")
  print("dataset.yaml file contains:\n---------------")
  !cat /content/yolov5/dataset.yaml
  print("---------------")

