In [None]:
pip install rasterio

Collecting rasterio
  Downloading rasterio-1.3.10-cp310-cp310-manylinux2014_x86_64.whl.metadata (14 kB)
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting snuggs>=1.4.1 (from rasterio)
  Downloading snuggs-1.4.7-py3-none-any.whl.metadata (3.4 kB)
Downloading rasterio-1.3.10-cp310-cp310-manylinux2014_x86_64.whl (21.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.5/21.5 MB[0m [31m46.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading snuggs-1.4.7-py3-none-any.whl (5.4 kB)
Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Installing collected packages: snuggs, affine, rasterio
Successfully installed affine-2.4.0 rasterio-1.3.10 snuggs-1.4.7


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import cv2 as cv
import rasterio
import os
import glob
import warnings
import random
import numpy as np
from tqdm import tqdm
import tensorflow as tf
from google.colab import files
from sklearn.preprocessing import OneHotEncoder
import joblib  # To save and load the encoder

# Suppress runtime warnings
warnings.filterwarnings("ignore", message="invalid value encountered in scalar divide")

In [None]:
def get_filepath_target(data, Date, image_path_location):

  All_filepath=[]
  All_target=[]
  All_location=[]
  All_Ninfo=[]
  for Rownumber in range(data.shape[0]):
    location=data.loc[Rownumber]['location']
    row=data.loc[Rownumber]['row']
    rangeno=data.loc[Rownumber]['range']
    experiment=data.loc[Rownumber]['experiment']
    genotype=data.loc[Rownumber]['genotype']
    groundtruth=data.loc[Rownumber][13:].to_string(index=True)

    # print('\n')
    #print('if NaN, it means we have no data.')
    #print('\n')
    #print(f'The experimental plot with genotype name {genotype} was at: {location} grown with experimental code in column "experiment" in dataframe: {experiment} in range {rangeno} and row {row}. The ground truth data available are:')

    #print('\n')
    #print(groundtruth)
    #print('\n')
    #print('Image file names belonging to the plot and their relative paths are:')


    Final_path=[]

    for files in image_path_location[0]:
      # print(files)
      if not files==location:
          continue
      finallocation=location
      finalimagefolder=[os.path.join(x,finallocation) for x in paths_to_image_files]


    for locationfolder in finalimagefolder:

      timepointfolder=os.listdir(locationfolder)
      timepointfolder=sorted(timepointfolder)
      imagetype=locationfolder.split('/')[1]
      # print(imagetype)

      timepointpath=[os.path.join(locationfolder,x) for x in timepointfolder]

      for timepointpath_ in timepointpath:

          imagefiles=os.listdir(timepointpath_)

          for images in imagefiles:
              range_=images.split('_')[1]
              row_=images.split('_')[2].split('.')[0]
              experiment_=images.split('_')[0].split('-')[2]

              # print(range_, row_,experiment_)
              # print(rangeno, row, experiment)
              if str(range_)==str(rangeno) and str(row_)==str(row) and str(experiment)==str(experiment_):

                  timepoint=images.split('_')[0].split('-')[1]

                  date_=Date.loc[(Date['Location']==location) & (Date['Image']==imagetype) & (Date['time']==timepoint)]['Date'].to_string(index=False)
                  finalpath=os.path.join(timepointpath_,images)
                  print(f'{finalpath}, Timepoint of image {timepoint} = {date_}')

                  Final_path.append(finalpath)

    All_filepath.append(Final_path)
    All_target.append(data.loc[Rownumber]['yieldPerAcre'])
    All_location.append(data.loc[Rownumber]['location'])
    All_Ninfo.append(data.loc[Rownumber]['poundsOfNitrogenPerAcre'])
  return All_filepath, All_target, All_location, All_Ninfo

In [None]:
img_height=11
img_width=21
num_channels=6
batch_size=32

def load_image(file_path):
    """Load a raster image into a numpy array and resize it."""
    with rasterio.open(file_path) as src:
        # Read all bands (channels) into a 3D numpy array
        image = np.stack([src.read(i + 1) for i in range(src.count)], axis=-1)

    # Resize image
    if image.shape[:2] != (img_height, img_width):
        image = tf.image.resize(image, [img_height, img_width])
        image = image.numpy()  # Convert Tensor back to numpy array

    return image

def average_images(file_paths):
    """Compute the average of multiple images."""
    # Load the first image to get its shape
    first_image = load_image(file_paths[0])

    # Initialize an array to hold the sum of images
    sum_image = np.zeros_like(first_image, dtype=np.float64)

    # Load and accumulate the images
    for file_path in file_paths:
        image = load_image(file_path)
        sum_image += image

    # Compute the average
    avg_image = sum_image / len(file_paths)
    # Resize the image if necessary
    avg_image = tf.image.resize(avg_image, [img_height, img_width])
     # Normalize pixel values (Adjust based on your data)
    avg_image = avg_image / 4096.0
    return avg_image

def image_and_target_generator(file_paths, locations, nitrogen, targets):
    """Generate images and targets from file paths and target values."""
    for file_path, loc, nitrogen, target in zip(file_paths, locations, nitrogen, targets):
        if not file_path:
            # Skip empty file paths
            continue
        # print(file_path, target)
        image = average_images(file_path)
        yield image, loc, nitrogen, target

def create_dataset(file_paths, locations, nitrogen, targets):
    """Create a TensorFlow dataset from a generator with progress reporting."""

    dataset = tf.data.Dataset.from_generator(
        generator=lambda: tqdm(image_and_target_generator(file_paths, locations, nitrogen, targets),
                               total=len(file_paths),
                               desc="Processing dataset"),
        output_signature=(
            tf.TensorSpec(shape=(img_height, img_width, num_channels), dtype=tf.float32),   #images
            tf.TensorSpec(shape=(encoded_training_locations.shape[1],), dtype=tf.float32),  # Location
            tf.TensorSpec(shape=(), dtype=tf.float32),                                      # Nitrogen
            tf.TensorSpec(shape=(), dtype=tf.float32)                                       # Target
        )
    )

    # Batch and shuffle the dataset
    dataset = dataset.batch(batch_size).shuffle(buffer_size=100).prefetch(tf.data.AUTOTUNE)
    return dataset

dataset = create_dataset(All_filepath, encoded_training_locations, All_Ninfo, All_target)