In [5]:
# Parameters
location = "Tokyo"
date = "2025-05-07"  # This will be overridden by papermill

# Ensure the date is in YYYY-MM-DD format
from datetime import datetime
import sys

# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
import requests


date = pd.to_datetime(datetime.today().strftime('%Y-%m-%d'))
sys.stdout.write(f"Starting forecast process for Location: {location}, Date: {date}\n")


with open("D:\\Documents\\OE\\Szakdolgozat\\szakdolgozat\\weather-warehouse\\backend\\src\\notebooks\\lstm_forecast.ipynb", "rb") as f:
    content = f.read()
    if b"\x00" in content:
        print("Null bytes found in the file!")
    else:
        print("No null bytes found.")
        

# Step 1: Query historical data
def useHistoricalDataQuery(location_name, country, date):
    print(f"Fetching historical data for location: {location_name}, country: {country}, date: {date}")
    api_url = "http://127.0.0.1:4000/historical/historicalData"
    payload = {
        "location": {"name": location_name, "country": country},
        "date": date
    }
    try:
        response = requests.post(api_url, json=payload)
        response.raise_for_status()
        historical_data = response.json()
        print("Historical data fetched successfully.")
        return pd.DataFrame(historical_data)
    except requests.exceptions.RequestException as e:
        print(f"Error calling historicalData API: {e}")
        return pd.DataFrame()  # Return an empty DataFrame on error




# Example usage
location_name = "Tokyo"
country = "JP"
date = "2016-01-01"
data = useHistoricalDataQuery(location_name, country, date)
print(data.head())
print("-------------------------------------------------")
print(data)




# Step 2: Preprocess data
print("Starting data preprocessing...")
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data[['Temperature']])
print(f"Scaled data (first 5 rows):\n{scaled_data[:5]}")

# Prepare sequences
def create_sequences(data, seq_length):
    print(f"Creating sequences with sequence length: {seq_length}")
    x, y = [], []
    for i in range(len(data) - seq_length):
        x.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    print(f"Created {len(x)} sequences.")
    return np.array(x), np.array(y)

seq_length = 30
x, y = create_sequences(scaled_data, seq_length)
print(f"Input shape: {x.shape}, Output shape: {y.shape}")

# Reshape for LSTM
x = x.reshape((x.shape[0], x.shape[1], 1))
print(f"Reshaped input for LSTM: {x.shape}")

# Step 3: Build LSTM model
print("Building LSTM model...")
model = Sequential()
model.add(Input(shape=(seq_length, 1)))  # Explicitly define the input shape
model.add(LSTM(50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
print("LSTM model built successfully.")

# Step 4: Train model
print("Training LSTM model...")
model.fit(x, y, epochs=10, batch_size=16, verbose=1)
print("Model training completed.")

# Step 5: Predict next 7 days
print("Generating forecast for the next 7 days...")
last_seq = scaled_data[-seq_length:]
forecast = []
input_seq = last_seq.copy()

for i in range(7):
    pred = model.predict(input_seq.reshape(1, seq_length, 1))
    forecast.append(pred[0, 0])
    input_seq = np.append(input_seq[1:], pred).reshape(seq_length, 1)
    print(f"Day {i+1} forecast: {forecast[-1]}")

# Inverse scale
forecast = scaler.inverse_transform(np.array(forecast).reshape(-1, 1)).flatten()
print(f"Forecast after inverse scaling: {forecast}")

# Output forecast values
forecast_dates = pd.date_range(start=date + pd.Timedelta(days=1), periods=7)
forecast_data = [{"date": str(d.date()), "Temperature": float(f)} for d, f in zip(forecast_dates, forecast)]
print("Final forecast data:")
for entry in forecast_data:
    print(entry)

# Explicitly output the forecast data
forecast_data

Starting forecast process for Location: Tokyo, Date: 2025-05-08 00:00:00
No null bytes found.
Fetching historical data for location: Tokyo, country: JP, date: 2016-01-01
Historical data fetched successfully.
   WeatherID  LocationKey   DateKey                  FullDate CityName  \
0  194828537         5472  20160101  2016-01-01T00:00:00.000Z    Tokyo   

  CountryCode  Temperature  MinTemperature  MaxTemperature Humidity  \
0          JP          7.3             3.9            12.1     None   

   WindSpeed  Precipitation  Pressure CloudCover     Source  \
0        9.6              0    1025.2       None  Meteostat   

            ObservationTime  
0  2016-01-01T00:00:00.000Z  
-------------------------------------------------
   WeatherID  LocationKey   DateKey                  FullDate CityName  \
0  194828537         5472  20160101  2016-01-01T00:00:00.000Z    Tokyo   

  CountryCode  Temperature  MinTemperature  MaxTemperature Humidity  \
0          JP          7.3             3.9 

IndexError: tuple index out of range