In [2]:
import os
import numpy as np
import pandas as pd
import cv2
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

In [3]:
# --- Load CSV ---
csv_path = "/scratch/pawsey1157/rtrivedi/dataset/Phenotypes/Images_GIDs_GRYLD.csv"
npy_dir = "/scratch/pawsey1157/rtrivedi/dataset/Phenotypes/stacked_npy/"
data_df = pd.read_csv(csv_path, header=None, names=['filename', 'GID', 'GRYLD'])

type((data_df.loc[1, 'GRYLD']))
#type(row['GRYLD'])

numpy.float64

In [4]:
# --- Load all .npy images into X, yield into Y ---
X = []
Y = []

In [5]:
# Define target size (width, height)
target_size = (61, 145)  # (width, height)

# Function to resize each band
def resize_multiband(img, target_size):
    bands = []
    for i in range(img.shape[2]):  # loop over bands (8)
        band_resized = cv2.resize(img[:, :, i], target_size, interpolation=cv2.INTER_AREA)
        bands.append(band_resized)
    return np.stack(bands, axis=-1)

In [6]:
for idx, row in data_df.iterrows():
    npy_path = os.path.join(npy_dir, str(data_df.loc[idx, 'filename']))
    if os.path.exists(npy_path):
        img = np.load(npy_path)
        img_resized = resize_multiband(img, target_size)
        YLD = ((data_df.loc[idx, 'GRYLD']))
        X.append(img_resized)
        Y.append(YLD)
    else:
        print(f"Warning: {npy_path} not found, skipping.")

In [26]:
type(YLD)
#for i, img in enumerate(X):
#    print(f"Image {i} shape: {img.shape}")

numpy.float64

In [7]:
X = np.array(X)
Y = np.array(Y)

print("Loaded data shape:", X.shape)  # (samples, height, width, bands)
print("Yield shape:", Y.shape)


Loaded data shape: (85855, 145, 61, 8)
Yield shape: (85855,)


In [32]:
print("Any NaNs in X?", np.isnan(X).any())
print("Any infinities in X?", np.isinf(X).any())
print("X min/max:", np.min(X), np.max(X))

print("Any NaNs in Y?", np.isnan(Y).any())
print("Y min/max:", np.min(Y), np.max(Y))

Any NaNs in X? False
Any infinities in X? False
X min/max: -0.8261975 3.6003518
Any NaNs in Y? False
Y min/max: 0.861 10.192


In [8]:
# --- Train-test split ---
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# --- Build CNN model ---
input_shape = X_train.shape[1:]  # (height, width, bands)

model = keras.Sequential([
    layers.Input(shape=input_shape),
    
    layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),
    
    layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),
    
    layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),
    
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(1)  # Single regression output for grain yield
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

model.summary()

# --- Train model ---
history = model.fit(X_train, Y_train, epochs=50, batch_size=16, validation_split=0.2)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 145, 61, 32)       2336      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 72, 30, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 72, 30, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 36, 15, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 36, 15, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 18, 7, 128)       0

In [9]:
# --- Evaluate ---
test_loss, test_mae = model.evaluate(X_test, Y_test)
print(f"Test MAE: {test_mae:.4f}")



Test MAE: 0.3267


In [10]:
# --- Predict + check R² ---
Y_pred = model.predict(X_test).flatten()
r2 = r2_score(Y_test, Y_pred)
rmse = np.sqrt(mean_squared_error(Y_test, Y_pred))

print(f"R²: {r2:.3f}, RMSE: {rmse:.3f}")

R²: 0.645, RMSE: 0.437
