<a href="https://colab.research.google.com/github/ostroskianais/yield-prediction/blob/main/yield_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data

In [4]:
import tensorflow as tf
import numpy as np
import pandas as pd # For reading .csv
from datetime import datetime # For knowing how long does each read/write take

train_csv = pd.read_csv("/content/train.csv")
test_csv = pd.read_csv("/content/test.csv")

In [118]:
def df_to_dataset(dataframe, shuffle=True, batch_size=1):
  #Creates a tf.data dataset from pandas dataframe
  dataframe = dataframe.copy()
  labels = dataframe.pop('yield')
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  # Process a batch-size at a time
  ds = ds.batch(batch_size)
  # Prefetching allows us to optimise fethcing of data and training
  ds = ds.prefetch(batch_size)
  return ds

In [119]:
train_ds = df_to_dataset(train_csv)
test_ds = df_to_dataset(test_csv)
train_ds

<PrefetchDataset element_spec=({'NDVI': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'tmean': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'medianArea': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'vpdmax': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'ppt': TensorSpec(shape=(None,), dtype=tf.float64, name=None)}, TensorSpec(shape=(None,), dtype=tf.float64, name=None))>

In [120]:
[(train_features, label_batch)] = train_ds.take(1)
print('Every feature:', list(train_features.keys()))
print('A batch of NDVI: ', train_features['NDVI'])
print('A batch of labels: ', label_batch)

Every feature: ['NDVI', 'tmean', 'medianArea', 'vpdmax', 'ppt']
A batch of NDVI:  tf.Tensor([0.89607665], shape=(1,), dtype=float64)
A batch of labels:  tf.Tensor([42.8], shape=(1,), dtype=float64)


In [121]:
all_inputs = []

columns = ['NDVI', 'tmean', 'medianArea', 'vpdmax', 'ppt']
for column_name in columns:
  column = tf.keras.Input(shape=(1,), name=column_name)
  all_inputs.append(column)

all_inputs

[<KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'NDVI')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'tmean')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'medianArea')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'vpdmax')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'ppt')>]

In [140]:
all_features = tf.keras.layers.concatenate(all_inputs)
# output_layer = tf.keras.layers.Dense(300)
x = tf.keras.layers.Dense(10, activation="relu")(all_features)
x = tf.keras.layers.Dense(20, activation="relu")(x)
x = tf.keras.layers.Dropout(0.1)(x)
output = tf.keras.layers.Dense(1)(x)

# Model

In [141]:

model = tf.keras.Model(all_inputs, output)

model.compile(optimizer="adam",
              loss=tf.keras.losses.MeanSquaredError(),
              metrics=["mean_squared_error"])

In [142]:
model.fit(train_ds, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fa5e279b1d0>

In [143]:
model.evaluate(test_ds)



[65.8049087524414, 65.8049087524414]

In [144]:
model.summary()

Model: "model_17"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 NDVI (InputLayer)              [(None, 1)]          0           []                               
                                                                                                  
 tmean (InputLayer)             [(None, 1)]          0           []                               
                                                                                                  
 medianArea (InputLayer)        [(None, 1)]          0           []                               
                                                                                                  
 vpdmax (InputLayer)            [(None, 1)]          0           []                               
                                                                                           