# Installs and imports

In [1]:
%%capture
!pip install rioxarray;
!pip install geopandas;
!pip install spectral;
!pip install netCDF4;

In [33]:
import numpy as np
import matplotlib.pyplot as plt
import spectral.io.envi as envi
from skimage import io
import h5py
import netCDF4 as nc
import pandas as pd
from PIL import Image
import xarray as xr
import rioxarray as rio
from pathlib import Path
import rasterio as rio
from rasterio.mask import mask
import geopandas as gpd
from shapely.geometry import mapping
import time

In [28]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


# Ortofoto robot

In [None]:
plot_shapes = gpd.read_file('/content/drive/MyDrive/Master/ground_truth/robot-2022-with-grain-yield.geojson')
ds = xr.open_dataset('/content/drive/MyDrive/Master/ground_truth/robot-2022-P4M-50m.nc', decode_coords='all')
plot_shapes = plot_shapes.loc[(plot_shapes["plot_id"] < 9000) & (plot_shapes["plot_id"] != 0)]

# convert GeoSeries to GeoDataFrame
plot_shapes = gpd.GeoDataFrame(plot_shapes)


clipped_polygons = []
for polygon in plot_shapes["geometry"]:
  clipped_ds = ds.rio.clip([mapping(polygon)])
  clipped_polygons.append(clipped_ds.to_array())

In [34]:
# Desired size of the images
new_shape = (256,256)

# Initialize list to store resized and rotated images
resized_and_rotated_polygons = []

# Loop through each image in the list
for polygon in clipped_polygons:
    # Reshape image using the new shape
    resized_polygon = np.zeros((polygon.shape[0], polygon.shape[1], new_shape[1], new_shape[0]))
    for i in range(polygon.shape[0]):
        for j in range(polygon.shape[1]):
            # Convert DataArray to numpy array
            polygon_array = polygon[i, j].values
            # Resize image
            resized_image = np.array(Image.fromarray(polygon_array).resize(new_shape))
            # Rotate image
            rotated_image = Image.fromarray(resized_image).rotate(39.6, resample=Image.BICUBIC)
            # Convert rotated image back to numpy array
            rotated_image_array = np.array(rotated_image)
            resized_polygon[i, j] = rotated_image_array
    # Add resized and rotated image to list
    resized_and_rotated_polygons.append(resized_polygon)

In [35]:
final_plots = []
for image in resized_and_rotated_polygons:
  final_plots.append(image[:,:,:,102:154])

# Ortofoto nobal nue

In [None]:
import geopandas as gpd
import xarray as xr
import rioxarray as rio
from shapely.wkt import loads

plot_shapes_nobal = gpd.read_file('/content/drive/MyDrive/Master/ground_truth/nobal_nue_gy.geojson')
ds_nobal = xr.open_dataset('/content/drive/MyDrive/Master/ground_truth/nobalnue-2022-P4M-20m.nc', decode_coords='all')
plot_shapes_nobal = plot_shapes_nobal.loc[(plot_shapes_nobal["plot_id"] < 9000) & (plot_shapes_nobal["plot_id"] != 0)]

# convert GeoSeries to GeoDataFrame
plot_shapes_nobal = gpd.GeoDataFrame(plot_shapes_nobal)

from shapely.geometry import mapping

clipped_polygons_nobalnue = []
for polygon in plot_shapes_nobal["geometry"]:
  clipped_ds = ds_nobal.rio.clip([mapping(polygon)])
  clipped_polygons_nobalnue.append(clipped_ds.to_array())

In [None]:
import numpy as np
from PIL import Image

# Desired size of the images
new_shape = (256,256)

# Initialize list to store resized and rotated images
resized_and_rotated_polygons_nobal = []

# Loop through each image in the list
for polygon in clipped_polygons_nobalnue:
    # Reshape image using the new shape
    resized_polygon = np.zeros((polygon.shape[0], polygon.shape[1], new_shape[1], new_shape[0]))
    for i in range(polygon.shape[0]):
        for j in range(polygon.shape[1]):
            # Convert DataArray to numpy array
            polygon_array = polygon[i, j].values
            # Resize image
            resized_image = np.array(Image.fromarray(polygon_array).resize(new_shape))
            # Rotate image
            rotated_image = Image.fromarray(resized_image).rotate(39.6, resample=Image.BICUBIC)
            # Convert rotated image back to numpy array
            rotated_image_array = np.array(rotated_image)
            resized_polygon[i, j] = rotated_image_array
            #print(resized_polygon.shape)
    # Add resized and rotated image to list
    resized_and_rotated_polygons_nobal.append(resized_polygon)

In [None]:
final_plots_nobal = []
for image in resized_and_rotated_polygons_nobal:
  final_plots_nobal.append(image[:,:,:,102:154])

# Ground truth both robot and nobal

In [None]:
df_robot = pd.read_csv('/content/drive/MyDrive/Master/info_fil/robot_info.csv', sep=';')
df_nobal = pd.read_csv('/content/drive/MyDrive/Master/info_fil/nobal_info.csv', sep=';')
# Replace comma with period in 'num_str' column and convert to float type
df_robot['GY'] = df_robot['GY'].str.replace(',', '.').astype(float)
df_nobal['GY'] = df_nobal['GY'].str.replace(',', '.').astype(float)

In [None]:
mean_value_gy = round(df_robot['GY'].mean(), 2)
df_robot['GY'].fillna(mean_value_gy, inplace=True)
df_robot['GY'].isna().sum()

In [None]:
mean_value_gy_nobal = round(df_nobal['GY'].mean())
df_nobal['GY'].fillna(mean_value_gy_nobal, inplace=True)
df_nobal['GY'].isna().sum()

In [None]:
target_nobal = np.array(df_nobal['GY'])
target_robot = np.array(df_robot['GY'])

# Data Augmentation

## Nobal nue dict

In [None]:
from collections import defaultdict

date_images_nobal = defaultdict(list)

for image in final_plots_nobal:
    #print(image)
    for date in range(image.shape[1]):
      date_images_nobal[f'nobal_{date}'].append(image[1:,date,:, :])

In [None]:
X_mirrored_v_nobal = {}
for date, lst_of_images in date_images_nobal.items(): 
  X_mirrored_v_nobal[f'{date}'] = []
  for image in lst_of_images:
    mirrored_image = image[:,::-1]
    X_mirrored_v_nobal[f'{date}'].append(mirrored_image)

In [None]:
X_mirrored_h_nobal = {}
for date, lst_of_images in date_images_nobal.items(): 
  X_mirrored_h_nobal[f'{date}'] = []
  for image in lst_of_images:
    mirrored_image = image[::-1,:]
    X_mirrored_h_nobal[f'{date}'].append(mirrored_image)

## Robot dict

In [None]:
from collections import defaultdict

date_images = defaultdict(list)

for image in final_plots:
    #print(image)
    for date in range(image.shape[1]):
      date_images[f'robot_{date}'].append(image[1:,date,:, :])

In [None]:
X_mirrored_v = {}
for date, lst_of_images in date_images.items(): 
  X_mirrored_v[f'{date}'] = []
  for image in lst_of_images:
    mirrored_image = image[:,::-1]
    X_mirrored_v[f'{date}'].append(mirrored_image)

In [None]:
X_mirrored_h = {}
for date, lst_of_images in date_images.items(): 
  X_mirrored_h[f'{date}'] = []
  for image in lst_of_images:
    mirrored_image = image[::-1,:]
    X_mirrored_h[f'{date}'].append(mirrored_image)

## The final dict


In [None]:
#ROBOT
date_images_final_robot = {}
for date, images in date_images.items():
    date_images_final_robot[f'{date}'] = images
for date, images in X_mirrored_v.items():
    if date in date_images_final_robot:
        date_images_final_robot[f'{date}'].extend(images)
    else:
        date_images_final_robot[f'{date}'] = images
for date, images in X_mirrored_h.items():
    if date in date_images_final_robot:
        date_images_final_robot[f'{date}'].extend(images)
    else:
        date_images_final_robot[f'{date}'] = images



In [None]:
#NOBAL
date_images_final_nobal = {}
for date, images in date_images_nobal.items():
    date_images_final_nobal[f'{date}'] = images
for date, images in X_mirrored_v_nobal.items():
    if date in date_images_final_nobal:
        date_images_final_nobal[f'{date}'].extend(images)
    else:
        date_images_final_nobal[f'{date}'] = images
for date, images in X_mirrored_h_nobal.items():
    if date in date_images_final_nobal:
        date_images_final_nobal[f'{date}'].extend(images)
    else:
        date_images_final_nobal[f'{date}'] = images

# Write to HDF5 file

In [None]:
y_robot = target_robot + target_robot + target_robot
y_nobal = target_nobal + target_nobal + target_nobal

In [None]:
import h5py

for date, image in date_images_final_robot.items():
  X = image
  #X_test = image[200:]

  y = target_robot
  #y_test = final_lst_target[200:] 
  # Define the HDF5 file name and location
  hdf5_file = f"/datasets/datetime_{date}.h5"

  # Define the number of images
  num_images = len(X)
  # Create the HDF5 file
  with h5py.File(hdf5_file, "w") as hf:
      # Create the datasets for the images and targets
      hf.create_dataset("X", data=X)
      #hf.create_dataset("X_test", data=X_test)
      hf.create_dataset("y", data=y)
     # hf.create_dataset("y_test", data=y_test)  

In [None]:
import h5py

for date, image in date_images_final_nobal.items():
  X = image
  y = target_nobal

  # Define the HDF5 file name and location
  hdf5_file = f"/datasets/datetime_{date}.h5"

  # Define the number of images
  num_images = len(X)

  # Create the HDF5 file
  with h5py.File(hdf5_file, "w") as hf:
      # Create the datasets for the images and targets
      hf.create_dataset("X", data=X)
      hf.create_dataset("y", data=y)