# Libraries

In [None]:
import matplotlib.pyplot as plt
import os
from PIL import Image
import PIL
import numpy as np
import time
import pandas as pd
from sklearn.cluster import KMeans
from sklearn import preprocessing
import seaborn as sns

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Functions

## Resize image

In [None]:
def resize_image(dir_image, dir_output, compare):
  os.chdir(dir_image)
  size = 360, 771 # This size varies depending on your selected area


  valid_images = [".jpg",".gif",".png",".tga"]
  for file in os.listdir():
    ext = os.path.splitext(file)[1]
    name = os.path.splitext(file)[0]
    band = name.split("_")[0]
    if ext.lower() not in valid_images:
      continue
    if (band != compare):
      continue
    img = Image.open(file, "r")
    img.thumbnail(size, Image.ANTIALIAS)
    img.save(dir_output)


## Get data

In [None]:
def get_data(dir_image, dir_output, tipo):
  valid_images = [".jpg",".gif",".png",".tga"]
  channel = 1

  os.chdir(dir_image)  # Change the directory

  for file in os.listdir():
    ext = os.path.splitext(file)[1]
    if ext.lower() not in valid_images:
      continue
    if tipo == "cloud":
      compare = f'band0{channel}'
      dir_output_cloud = f'{dir_output}/GOES-16_Ch{channel}_cloud.png'
      resize_image(dir_image, dir_output_cloud, compare)
    elif tipo == "vegetation":
      compare = f'band0{channel}'
      dir_output_veg = f'{dir_output}/GOES-16_Ch{channel}_vegetation.png'
      resize_image(dir_image, dir_output_veg, compare)
    elif tipo == "water":
      compare = f'band0{channel}'
      dir_output_water = f'{dir_output}/GOES-16_Ch{channel}_water.png'
      resize_image(dir_image, dir_output_water, compare)
    elif tipo == "goias":
      compare = f'band0{channel}'
      dir_output_goias = f'{dir_output}/GOES-16_Ch{channel}_goias.png'
      resize_image(dir_image, dir_output_goias, compare)
    else:
      compare = f'band0{channel}'
      dir_output_random = f'{dir_output}/GOES-16_Ch{channel}_random.png'
      resize_image(dir_image, dir_output_random, compare)
    compare = f'band0{channel}'
    dir_output_aux = f'{dir_output}/GOES-16_Ch{channel}_{tipo}.png'
    resize_image(dir_image, dir_output_aux, compare)
    channel += 1

  all_pixels = []
  channel = 0
  os.chdir(dir_output)  # Change the directory

  for channel in range (7):
    for file in os.listdir():
      if file != f'GOES-16_Ch{channel}_{tipo}.png':
        continue
      print(file)
      image = Image.open(file, "r")
      pix_val = list(image.getdata())
      pix_vals = [round(sets[0] / 255, 3) for sets in pix_val]
      all_pixels.append(pix_vals)

  return all_pixels

  return all_pixels


#Plotting surface spectral reflectance and creating datasets

In [None]:
central_wavelength = [0, 0.47, 0.64, 0.86, 1.37, 1.6, 2.2] # 0 + channel 1 ~ 6, 0 for readjust

dir_images = 'images_path'  # In this directory, there are folders for each surface
dir_datasets = 'datasets_path'
final_df = pd.DataFrame()

for dir in os.listdir(dir_images):
  aux = 0
  dir_image = os.path.join(dir_images, dir, 'original')
  dir_resized = os.path.join(dir_images, dir, 'resized')
  datas = get_data(dir_image, dir_resized, dir)

  # Creating dataset
  df = pd.DataFrame(datas).T
  df.columns = ['1', '2', '3', '4', '5', '6']
  df.to_csv(f'{dir_datasets}/{dir}.csv')
  # Appending all datasets
  df['label'] = np.zeros(df.shape[0])+aux  # labeling all surfaces
  final_df = pd.concat([final_df, df], axis=0)

  # Plotting surface spectral reflectance
  fig, ax = plt.subplots()
  plt.title(f"{dir} Spectral Reflectance")
  pixel = np.zeros(6)
  for i in range(len(datas[0])): # pixels
    for j in range(len(datas)): # channels
      pixel[j] = datas[j][i] * 100
    ax.plot(pixel)

  ax.set_xticklabels(central_wavelength)
  plt.xlabel("wavelength [$\mu$m]")
  plt.ylabel("Reflectance %")
  plt.ylim([0,100])
  plt.savefig(f'{dir_images}/{dir}.png')
  plt.show()
  plt.close()

final_df.to_csv(f'{dir_datasets}/final_dataframe.csv')

# Clustering

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

## Datas

In [None]:
labeled_dataset = pd.read_csv(f'{dir_datasets}/final_dataframe.csv')
random_area_dataset = pd.read_csv('random_area_csv_path')

## SVM

In [None]:
target = labeled_dataset["label"]
features = labeled_dataset.drop(["Unnamed: 0","label"], axis=1)
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size = 0.2, random_state = 10)

# Building a Support Vector Machine on train data
svc_model = SVC(C= .1, kernel='linear', gamma= 1)
svc_model.fit(X_train, y_train)

prediction = svc_model.predict(X_test)
# check the accuracy on the training set
print(svc_model.score(X_train, y_train))
print(svc_model.score(X_test, y_test))

In [None]:
pred = svc_model.predict(random_area_dataset.drop(["Unnamed: 0"], axis=1))
print(pred)

In [None]:
image_size = (769, 360)  # Your random area image size

pred_image = np.reshape(pred, image_size)
plt.imshow(pred_image)

## KNN

In [None]:
target = labeled_dataset["label"]
features = labeled_dataset.drop(["Unnamed: 0","label"], axis=1)
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size = 0.2, random_state = 10)

In [None]:
classifier = KNeighborsClassifier(n_neighbors=5)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)

In [None]:
pred = classifier.predict(random_area_dataset.drop(["Unnamed: 0"], axis=1))

In [None]:
image_size = (769, 360)  # Your random area image size

pred_image = np.reshape(pred, image_size)
plt.axis('off')
plt.imshow(pred_image)