## This program predicts weather tempreture based on jena_climate_2009_2016 dataset

## The dataset contains 14 different features but this program only considers the tempreture for prediction.

### 0. download  jena dataset  from https://www.kaggle.com/datasets/stytch16/jena-climate-2009-2016

### 0. import relevant libraries


In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers,callbacks
import matplotlib.pyplot as plt
import pandas as pd

### 0. This function plot histroy of neural network model to analyze the result.

In [None]:
def plot(data):
    loss=data['mae']
    val=data['val_mae']
    epochs=range(1,len(loss)+1)
    plt.figure()
    plt.yscale='log'
    plt.plot(epochs,loss,"bo",label="Training")
    plt.plot(epochs,val,"b", label="Validation")
    plt.title("Training and validation")
    plt.legend()
    plt.show()

---
## 1. Preparing Data

### 1.1 load dataset

In [None]:
fname = os.path.join("jena_climate_2009_2016.csv")
df = pd.read_csv(fname)

### 1.2 remove all feature but temprture

In [None]:
df= df['T (degC)']

### 1.3 divide dataset to 3 parts (train / Validation / Test) and normalize them

In [None]:
l=len(df)
num_train_sample= int(l*0.5)
num_val_sample= int(l*0.25)

#### 1.4.1 normalizing data according to train data

In [None]:
mean=df[:num_train_sample].mean(axis=0)
std = df[:num_train_sample].std(axis=0)
df-=mean
df/=std

#### 1.4.2 divide dataset to 3 parts (train / Validation / Test)

In [None]:
sample_rate= 6
sequence_length = 120
delay = sample_rate*(sequence_length+24-1)
batchsize = 255

train_dataset = keras.utils.timeseries_dataset_from_array(
    df[:-delay][:],
    targets=df[delay:],
    sampling_rate=sample_rate,
    sequence_length = sequence_length,
    shuffle=True,
    batch_size=batchsize,
    start_index=0,
    end_index=num_train_sample
)

val_dataset = keras.utils.timeseries_dataset_from_array(
    df[:-delay][:],
    targets=df[delay:],
    sampling_rate=sample_rate,
    sequence_length = sequence_length,
    shuffle=True,
    batch_size=batchsize,
    start_index=num_train_sample,
    end_index=num_train_sample+num_val_sample
)
test_dataset = keras.utils.timeseries_dataset_from_array(
    df[:-delay],
    targets=df[delay:],
    sampling_rate=sample_rate,
    sequence_length = sequence_length,
    shuffle=True,
    batch_size=batchsize,
    start_index=num_train_sample+num_val_sample
)


### 1.5 Calculate the base error

In [None]:
error=0
num_sample=0
for samples,target in train_dataset:
    error+=np.sum(np.abs(samples[:,-1]-target))
    num_sample+=samples.shape[0]

base_error = error/num_sample
print(base_error)

## Base Error(normalaized data): 0.307

---
## 2. Train

### 2.1 create model

In [None]:
def create_dense(sequence_length):
    inputs = keras.Input(shape=(sequence_length))
    x = layers.Flatten()(inputs)
    x = layers.Dense(16, activation='relu')(x)
    outputs = layers.Dense(1)(x)
    model = keras.Model(inputs=inputs,outputs=outputs)
    model.compile(optimizer='rmsprop',loss='mse',metrics=['mae'])
    return model

### 2.2 define callback

In [None]:
callbacks=[
    keras.callbacks.ModelCheckpoint("jena-single-Dense-16",save_best_only=True)
]

### 2.3 train model based on train data

In [None]:
model = create_dense(sequence_length)


h=model.fit(train_dataset,
            epochs=10,
            validation_data=val_dataset,
            callbacks=callbacks)

### 2.3 analyze the model history 

In [None]:
plot(h.history)

---
## 3. Test

### 3.1 Load best model

In [None]:
model.load_weights("jena-single-Dense-16")

### 3.2 Evaluate

In [None]:
eval_result=model.evaluate(test_dataset)

### 3.3 calculate error 

In [None]:
eval_result