In [1]:
!pip install ucimlrepo pandas scikit-learn tensorflow

Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 23.1.2 -> 23.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
# ------- Import Dataset -------------------

from ucimlrepo import fetch_ucirepo

# fetch dataset
air_quality = fetch_ucirepo(id=360)

# data (as pandas dataframes)
X = air_quality.data.features
y = air_quality.data.targets # unused?

In [None]:
# Combine Date and Time into a single DateTime column -> only 14 features
import pandas as pd
import numpy as np

X['DateTime'] = pd.to_datetime(X['Date'] + ' ' + X['Time']) # Format (JJJJ-MM-DD HH:MM:SS, e.g.: 2004-03-10 18:00:00)
X = X.set_index('DateTime')
X.drop(['Date', 'Time'], axis=1, inplace=True)  # Remove the original Date and Time columns
X.replace(-200, np.nan, inplace=True)
X.replace(-200.0, np.nan, inplace=True)

In [None]:
pd.set_option('display.width', 1000)
print(X.head())

In [None]:
# Invert PT08.S3(NOx)
X['PT08.S3(NOx)'] = -1 * X['PT08.S3(NOx)']  # Reversing the sign

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Initialize the Min-Max Scaler
scaler = MinMaxScaler()

# Select only the columns to be scaled
features_to_scale = X.columns  # As 'DateTime' is an index, it won't be included

# Apply the scaler to the features
X_scaled = pd.DataFrame(scaler.fit_transform(X[features_to_scale]), columns=features_to_scale, index=X.index)

# Now X_scaled contains the normalized data, with 'DateTime' as the index

In [None]:
#changes nothing for linear regression -> forward fill NA values
X_scaled.fillna(method='ffill', inplace=True)

In [None]:
#Define LSTM Mode
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense

def create_sequences(data, seq_length):
    xs = []
    ys = []
    for i in range(len(data)-seq_length-1):
        x = data[i:(i+seq_length)]
        y = data[i+seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

#Prepere our data 
# data = np.array([[i+j] for i in range (100) for j in range (2)])
data = X

look_back = 15
xs, ys = create_sequences(data, look_back)

#Reshape of the input
xs = np.reshape(xs, (xs.shape[0], xs.shape[1], 1))

#Define LSTM model
model = Sequential()
model.add(LSTM(4, activation='relu', input_shape=(look_back, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

#Fit the model
model.fit(xs, ys, epochs = 100, batch_size=1, verbose =2)

In [None]:
# LSTM prepare Data
import numpy as np
def create_sequences(data, seq_length):
    xs = []
    ys = []

    for i in range(len(data)-seq_length-1):
        x = data.iloc[i:(i+seq_length)]
        y = data.iloc[i+seq_length]
        xs.append(x)
        ys.append(y)

    return np.array([x.values for x in xs]), np.array(ys)

# Define sequence length
seq_length = 5  # Example: 5 time steps

# Prepare the sequences using X_scaled
X, y = create_sequences(X_scaled, seq_length)

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense


# Function to create sequences
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        x = data[i:(i + seq_length)]
        y = data[i + seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

# Define sequence length
seq_length = 5  # Example: 5 time steps

# Prepare the sequences
X_s, y_s = create_sequences(X_scaled, seq_length)

# Split the data into training and testing (customize the ratio as needed)
train_size = int(len(X) * 0.8)
X_train, X_test = X_s[:train_size], X_s[train_size:]
y_train, y_test = y_s[:train_size], y_s[train_size:]

# Define the LSTM model
model = Sequential([
    LSTM(50, activation='relu', input_shape=(seq_length, X.shape[2])),
    Dense(y.shape[1])
])

model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)

In [None]:
# LSTM prepare Model
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

n_features = X_scaled.shape[1]  # Number of features

model = Sequential()
model.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(seq_length, n_features)))
model.add(LSTM(50, activation='relu'))
model.add(Dense(n_features))

model.compile(optimizer='adam', loss='mse')

In [None]:
# LSTM train Model
model.fit(X, y, epochs=30, batch_size=32)

In [None]:
# LSTM predict with Model
last_sequence = X_scaled[-seq_length:]
last_sequence = np.expand_dims(last_sequence.values, axis=0)

predicted = model.predict(last_sequence)

# Inverse transform the predicted value
predicted = scaler.inverse_transform(predicted)

In [None]:
from datetime import datetime

# Assuming df is your original DataFrame and model is your trained LSTM model

# Convert your input DateTime to a format that matches your DataFrame
input_datetime = datetime.strptime('2005-04-11 18:00:00', '%Y-%m-%d %H:%M:%S')

# Find the index of the input DateTime
index = X.index.get_loc(input_datetime)

# Extract the sequence leading up to the input DateTime
sequence = X.iloc[index-seq_length:index].drop('DateTime', axis=1)

# Normalize the sequence
sequence_scaled = scaler.transform(sequence)

# Reshape the sequence for the LSTM (adding sample dimension)
sequence_scaled = np.expand_dims(sequence_scaled, axis=0)

# Make the prediction
predicted_value_scaled = model.predict(sequence_scaled)

# Inverse scaling (if necessary)
predicted_value = scaler.inverse_transform(predicted_value_scaled)

# The predicted value
print(predicted_value)

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from ucimlrepo import fetch_ucirepo

# Fetch dataset
air_quality = fetch_ucirepo(id=360)

# Data as pandas DataFrame
X = air_quality.data.features
# y = air_quality.data.targets # This can be used if you have specific target variables

# Convert to DateTime and set as index
X['DateTime'] = pd.to_datetime(X['Date'] + ' ' + X['Time'])
X = X.set_index('DateTime')
X.drop(['Date', 'Time'], axis=1, inplace=True)

# Normalize the data
scaler = MinMaxScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns, index=X.index)

In [None]:
# Function to create sequences
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        x = data.iloc[i:(i + seq_length)].values
        y = data.iloc[i + seq_length].values
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

# Define sequence length
seq_length = 5  # Example: 5 time steps

# Prepare the sequences
X_sequences, y_sequences = create_sequences(X_scaled, seq_length)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_sequences, y_sequences, test_size=0.2, random_state=42)

# Define LSTM model
model = Sequential([
    LSTM(50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])),
    Dense(y_train.shape[1])
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=40, batch_size=32, validation_split=0.1)

# Evaluate the model
loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")

In [None]:
last_sequence = X.iloc[-seq_length:]

# Convert the sequence to a NumPy array
last_sequence = last_sequence.values

# Reshape the sequence to match the input shape for the LSTM
# The shape should be (1, seq_length, number_of_features)
last_sequence = last_sequence.reshape((1, seq_length, last_sequence.shape[1]))

In [None]:
# Let's assume `last_sequence` is your input sequence leading up to the time of prediction
# and `scaler` is the MinMaxScaler instance used for normalizing the data

# Normalize the input
last_sequence_scaled = scaler.transform(last_sequence)

# Reshape the input for LSTM
# LSTM expects input shape [samples, time steps, features]
last_sequence_scaled = last_sequence_scaled.reshape((1, last_sequence_scaled.shape[0], last_sequence_scaled.shape[1]))

# Make the prediction
predicted_values_scaled = model.predict(last_sequence_scaled)

# Inverse transform the prediction to original scale
predicted_values = scaler.inverse_transform(predicted_values_scaled)

# predicted_values now contains the prediction for the specified future time

In [None]:


# Print the last date
print("Last date in the dataset:", X.index[-1])

In [None]:
print(X.loc[X.index[-1], 'CO(GT)'])

In [None]:
import pandas as pd
from datetime import timedelta

# Assuming 'model' is your trained LSTM model
# And 'last_known_sequence' is the last sequence from your dataset (from end of 2005)

# Normalize last_known_sequence as done during training
# ...

last_known_sequence = X.iloc[-seq_length:].values

# Make sure to reshape it to the format expected by the LSTM model
# (1, seq_length, number_of_features)
last_known_sequence = last_known_sequence.reshape((1, seq_length, X.shape[1]))

# Reshape for the model
current_sequence = last_known_sequence.reshape((1, seq_length, 13))

# Initialize the current prediction date
current_prediction_date = pd.to_datetime("2005-04-02 14:00:00")  # Adjust to your dataset's last date

# Desired prediction date
desired_date = pd.to_datetime("2005-04-04 14:00:00")

# Iterate predictions
while current_prediction_date < desired_date:
    # Predict the next step
    next_step_prediction = model.predict(current_sequence)
    
    # Update the sequence: remove the oldest step and add the predicted step
    current_sequence = np.roll(current_sequence, -1, axis=1)
    current_sequence[0, -1, :] = next_step_prediction

    # Update the prediction date (assuming hourly data, adjust as needed)
    current_prediction_date += timedelta(hours=1)

# The last prediction is for the desired_date
final_prediction = current_sequence[0, -1, :]
print(final_prediction)

In [None]:
LSTM 
#Define LSTM Mode
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense

def create_sequences(data, feature, seq_length):
    xs = []
    ys = []
    for i in range(len(data)-seq_length-1):
        x = data[feature].iloc[i:(i+seq_length)]
        y = data[feature].iloc[i+seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

# Choose a feature to predict
feature = 'CO(GT)'

# Create sequences from your data
xs, ys = create_sequences(X, feature, seq_length)

# Reshape the input to be [samples, time steps, features]
xs = np.reshape(xs, (xs.shape[0], xs.shape[1], 1))

#Define LSTM model
model = Sequential()
model.add(LSTM(4, activation='relu', input_shape=(look_back, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

#Fit the model
model.fit(xs, ys, epochs = 10, batch_size=1, verbose =2)