In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense


# Uncomment this and replace 'your_dataset.csv' with your actual dataset
data = pd.read_csv('hu.csv')

# Convert 'time' column to datetime format
data['time'] = pd.to_datetime(data['time'], format='%d %m %Y')
data.set_index('time', inplace=True)

# Ensure 'time' column is in datetime format
# data['time'] = pd.to_datetime(data['time'], format='%d %m %Y')
# data.set_index('time', inplace=True)
# data = data[['tavg', 'humid']]  # Assuming the dataset has these columns

# Preprocess the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

# Create sequences
def create_sequences(data, seq_length):
    sequences = []
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i+seq_length])
    return np.array(sequences)

seq_length = 30  # Using 30 days of historical data to predict the next day
sequences = create_sequences(scaled_data, seq_length)

# Split the data into training and testing sets
train_size = int(len(sequences) * 0.8)
train_sequences = sequences[:train_size]
test_sequences = sequences[train_size:]

# Split sequences into input (X) and output (y)
X_train = train_sequences[:, :-1, :]
y_train = train_sequences[:, -1, :]
X_test = test_sequences[:, :-1, :]
y_test = test_sequences[:, -1, :]

# Build the LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(seq_length-1, 2)))
model.add(LSTM(50))
model.add(Dense(2))

model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()

# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

# Predict the next 5 days
def predict_next_days(model, data, days=5):
    predictions = []
    current_sequence = data[-(seq_length-1):]  # Start with the last available sequence
    for _ in range(days):
        current_sequence_scaled = scaler.transform(current_sequence)
        pred_scaled = model.predict(np.expand_dims(current_sequence_scaled, axis=0))
        pred = scaler.inverse_transform(pred_scaled)
        predictions.append(pred[0])
        current_sequence = np.vstack((current_sequence[1:], pred))
    return np.array(predictions)

predictions = predict_next_days(model, data.values, days=5)

# Plot the predictions
plt.plot(range(len(data)), data['tavg'], label='tavg')
plt.plot(range(len(data)), data['humid'], label='humid')
plt.plot(range(len(data), len(data) + 5), predictions[:, 0], label='Predicted tavg', linestyle='--')
plt.plot(range(len(data), len(data) + 5), predictions[:, 1], label='Predicted humid', linestyle='--')
plt.legend()
plt.show()

# Print predicted values
predicted_dates = pd.date_range(data.index[-1] + pd.Timedelta(days=1), periods=5)
predicted_df = pd.DataFrame(predictions, index=predicted_dates, columns=['Predicted tavg', 'Predicted humid'])
print(predicted_df)


ValueError: time data "01-01-1990" doesn't match format "%d %m %Y", at position 0. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.