In [6]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [7]:
zip_path = tf.keras.utils.get_file(
    origin='https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip',
    fname='jena_climate_2009_2016.csv.zip',
    extract=True
)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip
[1m13568290/13568290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [8]:
csvpath,_ = os.path.splitext(zip_path)

In [9]:
df = pd.read_csv(csvpath)

In [10]:
df.head()

Unnamed: 0,Date Time,p (mbar),T (degC),Tpot (K),Tdew (degC),rh (%),VPmax (mbar),VPact (mbar),VPdef (mbar),sh (g/kg),H2OC (mmol/mol),rho (g/m**3),wv (m/s),max. wv (m/s),wd (deg)
0,01.01.2009 00:10:00,996.52,-8.02,265.4,-8.9,93.3,3.33,3.11,0.22,1.94,3.12,1307.75,1.03,1.75,152.3
1,01.01.2009 00:20:00,996.57,-8.41,265.01,-9.28,93.4,3.23,3.02,0.21,1.89,3.03,1309.8,0.72,1.5,136.1
2,01.01.2009 00:30:00,996.53,-8.51,264.91,-9.31,93.9,3.21,3.01,0.2,1.88,3.02,1310.24,0.19,0.63,171.6
3,01.01.2009 00:40:00,996.51,-8.31,265.12,-9.07,94.2,3.26,3.07,0.19,1.92,3.08,1309.19,0.34,0.5,198.0
4,01.01.2009 00:50:00,996.51,-8.27,265.15,-9.04,94.1,3.27,3.08,0.19,1.92,3.09,1309.0,0.32,0.63,214.3


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 420551 entries, 0 to 420550
Data columns (total 15 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   Date Time        420551 non-null  object 
 1   p (mbar)         420551 non-null  float64
 2   T (degC)         420551 non-null  float64
 3   Tpot (K)         420551 non-null  float64
 4   Tdew (degC)      420551 non-null  float64
 5   rh (%)           420551 non-null  float64
 6   VPmax (mbar)     420551 non-null  float64
 7   VPact (mbar)     420551 non-null  float64
 8   VPdef (mbar)     420551 non-null  float64
 9   sh (g/kg)        420551 non-null  float64
 10  H2OC (mmol/mol)  420551 non-null  float64
 11  rho (g/m**3)     420551 non-null  float64
 12  wv (m/s)         420551 non-null  float64
 13  max. wv (m/s)    420551 non-null  float64
 14  wd (deg)         420551 non-null  float64
dtypes: float64(14), object(1)
memory usage: 48.1+ MB


In [12]:
df.describe()

Unnamed: 0,p (mbar),T (degC),Tpot (K),Tdew (degC),rh (%),VPmax (mbar),VPact (mbar),VPdef (mbar),sh (g/kg),H2OC (mmol/mol),rho (g/m**3),wv (m/s),max. wv (m/s),wd (deg)
count,420551.0,420551.0,420551.0,420551.0,420551.0,420551.0,420551.0,420551.0,420551.0,420551.0,420551.0,420551.0,420551.0,420551.0
mean,989.212776,9.450147,283.492743,4.955854,76.008259,13.576251,9.533756,4.042412,6.022408,9.640223,1216.062748,1.702224,3.056555,174.743738
std,8.358481,8.423365,8.504471,6.730674,16.476175,7.73902,4.184164,4.896851,2.656139,4.235395,39.975208,65.446714,69.016932,86.681693
min,913.6,-23.01,250.6,-25.01,12.95,0.95,0.79,0.0,0.5,0.8,1059.45,-9999.0,-9999.0,0.0
25%,984.2,3.36,277.43,0.24,65.21,7.78,6.21,0.87,3.92,6.29,1187.49,0.99,1.76,124.9
50%,989.58,9.42,283.47,5.22,79.3,11.82,8.86,2.19,5.59,8.96,1213.79,1.76,2.96,198.1
75%,994.72,15.47,289.53,10.07,89.4,17.6,12.35,5.3,7.8,12.49,1242.77,2.86,4.74,234.1
max,1015.35,37.28,311.34,23.11,100.0,63.77,28.32,46.01,18.13,28.82,1393.54,28.49,23.5,360.0


In [13]:
df.shape

(420551, 15)

In [17]:
df.isna().sum()

Unnamed: 0,0
Date Time,0
p (mbar),0
T (degC),0
Tpot (K),0
Tdew (degC),0
rh (%),0
VPmax (mbar),0
VPact (mbar),0
VPdef (mbar),0
sh (g/kg),0


In [19]:
df["Date Time"].value_counts()

Unnamed: 0_level_0,count
Date Time,Unnamed: 1_level_1
21.03.2014 12:50:00,2
01.07.2010 03:50:00,2
01.07.2010 04:10:00,2
01.07.2010 04:20:00,2
01.07.2010 04:30:00,2
...,...
01.09.2011 09:50:00,1
01.09.2011 09:40:00,1
01.09.2011 09:30:00,1
01.09.2011 09:20:00,1


In [46]:
# consider only 2009 data, i.e. first 52557 rows

In [35]:
df=df.iloc[:52557]

In [47]:
# scale column to be in same range

In [40]:
df1 = df[['T (degC)']].dropna()

In [42]:
scaler = MinMaxScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df1), columns=df1.columns)

In [45]:
# dot shuffle, since time series order is important

In [44]:
train,test = train_test_split(df_scaled, test_size=0.2,shuffle=False)

In [48]:
model = Sequential()

In [49]:
model.add(LSTM(50, activation='relu', input_shape=(None,1)))
model.add(Dense(1))

  super().__init__(**kwargs)


In [50]:
model.compile(optimizer='adam', loss='mean_squared_error')

In [51]:
windowsize = 10
x_train = np.array([train.iloc[i-windowsize:i].values for i in range(windowsize, len(train))])

In [53]:
y_train = train.iloc[windowsize:, 0].values

In [54]:
y_train

array([0.25450973, 0.25236649, 0.2529023 , ..., 0.39364172, 0.39006966,
       0.38846223])

In [55]:
x_test = np.array([test.iloc[i-windowsize:i].values for i in range(windowsize, len(test))])

In [56]:
y_test = test.iloc[windowsize:, 0].values

In [57]:
x_train = x_train.reshape(x_train.shape[0], windowsize,1)

In [58]:
x_test = x_test.reshape(x_test.shape[0], windowsize,1)

In [59]:
model.fit(x_train,y_train,epochs=10,validation_data=(x_test,y_test))

Epoch 1/10
[1m1314/1314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - loss: 0.0319 - val_loss: 4.2709e-05
Epoch 2/10
[1m1314/1314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - loss: 8.2806e-05 - val_loss: 3.9389e-05
Epoch 3/10
[1m1314/1314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - loss: 7.1838e-05 - val_loss: 3.1596e-05
Epoch 4/10
[1m1314/1314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - loss: 5.9734e-05 - val_loss: 2.4150e-05
Epoch 5/10
[1m1314/1314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - loss: 5.6059e-05 - val_loss: 2.5182e-05
Epoch 6/10
[1m1314/1314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - loss: 5.1129e-05 - val_loss: 2.1419e-05
Epoch 7/10
[1m1314/1314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - loss: 4.5554e-05 - val_loss: 2.1126e-05
Epoch 8/10
[1m1314/1314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - l

<keras.src.callbacks.history.History at 0x7ec31ca64940>

In [60]:
trainloss = model.evaluate(x_train, y_train)
testloss = model.evaluate(x_test, y_test)

[1m1314/1314[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 2.3974e-05
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1.3878e-05


In [61]:
recent_data = np.array(df_scaled['T (degC)'].tail(windowsize)).reshape(1, windowsize,1)

In [62]:
predict = model.predict(recent_data)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 349ms/step


In [63]:
predicted_value = scaler.inverse_transform(predict)

In [64]:
actual = np.array(df['T (degC)'].iloc[-1]).reshape(-1,1)

In [66]:
print(predicted_value)
print(actual)

[[-2.8045347]]
[[-2.82]]


In [69]:
recent_data

array([[[0.36881586],
        [0.36774424],
        [0.36756564],
        [0.36631541],
        [0.36595821],
        [0.36470798],
        [0.36345776],
        [0.36274335],
        [0.36202893],
        [0.36060011]]])