# Regression Agrarwissenschaften und Ernte

In [None]:
import os,sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from mpl_toolkits.mplot3d import Axes3D #gehört zu matplotlib.pyplot
import seaborn as sns
#import mpl_toolkits.mplot3d

import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
print(tf.__version__)

## Helper Funktions

In [None]:
def plot_loss(history):
    plt.figure()
    plt.plot(history.history['loss'], label='Training loss')
    
    if 'val_loss' in history.history:
        plt.plot(history.history['val_loss'], label='Test loss')
    
    plt.xlabel('Epoch')
    plt.ylabel('Loss (Mean Squared Error)')
    plt.legend()
    plt.grid(True)
    plt.show()

def plot_loss_german(history):
    plt.figure()
    
    plt.plot(history.history['loss'], label='Trainings Loss')
    plt.plot(history.history['val_loss'], label='Test Loss')
    
    plt.title('Training und Test Loss')
    plt.xlabel('Epoche')
    plt.ylabel('Verlust (Mean Squared Error)')
    
    plt.legend()
    plt.grid(True)
    plt.show()


In [None]:
notebooks_folder = os.getcwd()
projekt_folder = os.path.dirname(notebooks_folder)
source_data=os.path.join(projekt_folder,"data","harvest.csv")

df_source=pd.read_csv(source_data)
df_source.info()
#print(df_source.head(5))


In [None]:
df_source.head(5)

In [None]:
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111,projection="3d")

ax.scatter(
    df_source["Dünger"],
    df_source["Niederschlag"],
    df_source["Ertrag"]
)

ax.set_xlabel("Dünger")
ax.set_ylabel("Niederschlag")
ax.set_zlabel("Ertrag")
ax.set_title("3D-Plot der Ertragsdaten")
ax.set_box_aspect(None, zoom=0.85)
plt.tight_layout()
plt.show()



In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

sns.scatterplot(
    data=df_source,
    x="Dünger",
    y="Ertrag",
    ax=axes[0]
)
axes[0].set_title("Dünger vs. Ertrag")

sns.scatterplot(
    data=df_source,
    x="Niederschlag",
    y="Ertrag",
    ax=axes[1]
)
axes[1].set_title("Niederschlag vs. Ertrag")

plt.tight_layout()
plt.show()


# Clean the Data

In [None]:
df_source.isna().sum()
#dataset = dataset.dropna() #nicht nötig da kein Nan

# Splitting Data in Train & Test
seperate Data sets for learning 80% Training, 20% Test

In [None]:
train_dataset = df_source.sample(frac=0.8, random_state=0)
test_dataset = df_source.drop(train_dataset.index)



Set the Labels

In [None]:
train_features = train_dataset[["Dünger", "Niederschlag"]]
test_features  = test_dataset[["Dünger", "Niederschlag"]]

train_labels = train_dataset["Ertrag"]
test_labels  = test_dataset["Ertrag"]

#Alternative
#train_features = train_dataset.copy()
#test_features = test_dataset.copy()

#train_labels = train_features.pop('Ertrag')
#test_labels = test_features.pop('Ertrag')

#

# Normalization

In [None]:
train_dataset.describe().transpose()[['mean', 'std']]

In [None]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(np.array(train_features))
print("Features Normalized!")
print(normalizer.mean.numpy())
print()

first = np.array(train_features[:1])
with np.printoptions(precision=2, suppress=True):
  print('First example:', first)
  print()
  print('Normalized:', normalizer(first).numpy())

#Speicher für alle Ergebnisse wenn mehrere Modelle getestet
test_results = {}

# Regression with deep neural network

## Build the model Function!
Erstelle in Keras ein Modell mit 2 Hidden Layers mit
jeweils 4 Knoten und Relu-Aktivierung. Nimm den Mean Squared Error als Loss-
Funktion, und den Adam-Optimizer.

In [None]:
def build_and_compile_model(norm):
  model = keras.Sequential([
      norm,
      layers.Dense(4, activation='relu'),
      layers.Dense(4, activation='relu'),
      layers.Dense(1)
  ])

  model.compile(loss='mean_squared_error',
                optimizer='adam')#tf.keras.optimizers.Adam(0.001))
  return model

In [None]:
dnn_model = build_and_compile_model(normalizer)

In [None]:
# No Model ist Build yet!
dnn_model.summary()

## Traing des Models
Trainiere das Modell 100 Epochen lang, mit einer
Batch-Größe von 50

In [None]:
history = dnn_model.fit(
    train_features,
    train_labels,
    epochs=100,
    batch_size=50,
    validation_split=0.2)#,verbose=0)

In [None]:
dnn_model.summary()

In [None]:
plot_loss_german(history)
#plot_loss(history)

In [None]:
test_results['dnn_model'] = dnn_model.evaluate(test_features, test_labels, verbose=0)

# Vorhersagen für Trainings- und Testdaten
#train_predictions = dnn_model.predict(train_features)
#test_predictions  = dnn_model.predict(test_features)
#pd.DataFrame(test_results, index=['Mean absolute error [MPG]']).T


In [None]:
# Wertebereich aus den Daten
duenger_range = np.linspace(df_source["Dünger"].min(), df_source["Dünger"].max(), 30)
niederschlag_range = np.linspace(df_source["Niederschlag"].min(), df_source["Niederschlag"].max(), 30)

D, N = np.meshgrid(duenger_range, niederschlag_range)

# Grid in DataFrame-Form bringen
grid = np.column_stack([D.ravel(), N.ravel()])

Z = dnn_model.predict(grid).reshape(D.shape)


In [None]:
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111, projection="3d")

train_scatter=ax.scatter(
    train_features["Dünger"],
    train_features["Niederschlag"],
    train_labels,
    color="blue",
    label="Training Daten",
    alpha=0.6
)

test_scatter=ax.scatter(
    test_features["Dünger"],
    test_features["Niederschlag"],
    test_labels,
    color="green",
    label="Test Daten",
    alpha=0.6
)

ax.plot_surface(
    D, N, Z,
    color="red",
    alpha=0.4
)


# Proxy für Fläche
surface_proxy= Patch(
    facecolor="red",
    edgecolor="red",
    alpha=0.4,
    label="Vorhersage"
)

ax.set_xlabel("Dünger")
ax.set_ylabel("Niederschlag")
ax.set_zlabel("Ertrag")
ax.set_title("3D Plot der Ertragsdaten und Modellvorhersagen")

ax.legend(handles=[
    train_scatter,
    test_scatter,
    surface_proxy
])
#ax.legend()
ax.set_box_aspect(None, zoom=0.85)
plt.tight_layout()
plt.show()

## Predictions on the test Data Set

In [None]:
test_predictions = dnn_model.predict(test_features).flatten()

a = plt.axes(aspect='equal')
plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values ')
plt.ylabel('Predictions ')
lims = [-1, 8]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims, lims)

## Prediction on new Data
Berechne zuletzt die Vorhersagewerte des Modells für folgende Wertepaare:
Niederschlag/Dünger: [0.25, 0.25], [0.85, 0.75]

In [None]:
daten=[
    [0.25, 0.25], 
    [0.85, 0.75]
]

prediction_data = pd.DataFrame(daten, columns=['Niederschlag', 'Dünger'])
prediction_data

In [None]:
predictions = dnn_model.predict(prediction_data)
print(predictions)

In [None]:
#Speichern
dnn_model.save('dnn_model_ertrag.keras')

#Laden
#reloaded = tf.keras.models.load_model('dnn_model.keras')

#wenn weiter trainieren interessant!
#history = reloaded.fit(
#    train_features,
#    train_labels,
#    epochs=100,
#    batch_size=50,
#    validation_split=0.2)

#test_results['reloaded'] = reloaded.evaluate(
#    test_features, test_labels, verbose=0)