In [1]:
# IMPORTING NECESSARY LIBRARIES


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras_tuner import RandomSearch, Hyperband, Objective
import datetime
import os


In [2]:
# DATA LOADING


temperature_df = pd.read_csv(r'C:\Users\HP\OneDrive\Desktop\WEATHER PREDICTION PROJECT\temperature.csv')
humidity_df = pd.read_csv(r'C:\Users\HP\OneDrive\Desktop\WEATHER PREDICTION PROJECT\humidity.csv')
pressure_df = pd.read_csv(r'C:\Users\HP\OneDrive\Desktop\WEATHER PREDICTION PROJECT\pressure.csv')
wind_speed_df = pd.read_csv(r'C:\Users\HP\OneDrive\Desktop\WEATHER PREDICTION PROJECT\wind_speed.csv')
wind_direction_df = pd.read_csv(r'C:\Users\HP\OneDrive\Desktop\WEATHER PREDICTION PROJECT\wind_direction.csv')

# Combine dataframes
combined_df = pd.concat([temperature_df, humidity_df, pressure_df, wind_speed_df, wind_direction_df], axis=1)


In [5]:
# Convert 'datetime' to datetime type and set as index for all dataframes
dfs = [temperature_df, humidity_df, pressure_df, wind_speed_df, wind_direction_df]
for df in dfs:
    df['datetime'] = pd.to_datetime(df['datetime'])
    df.set_index('datetime', inplace=True)

# Combine dataframes on datetime index
combined_df = pd.concat(dfs, axis=1)

# Handle missing values - fill with median or drop, depending on your analysis needs
combined_df.fillna(combined_df.median(), inplace=True)

# Remove any infinite values if they exist
combined_df.replace([np.inf, -np.inf], np.nan, inplace=True)
combined_df.dropna(inplace=True)

# Normalization with MinMaxScaler
scaler = MinMaxScaler()
combined_df_scaled = pd.DataFrame(scaler.fit_transform(combined_df), columns=combined_df.columns)

# Checking for any remaining missing values
print("Remaining NaN values in dataset:", combined_df_scaled.isna().sum().sum())

Remaining NaN values in dataset: 0


In [7]:
# Select only temperature columns for simplicity and prediction
temperature_columns = [col for col in combined_df_scaled.columns if 'temp' in col.lower()]
features = combined_df_scaled[temperature_columns]

# Assuming the data is in hourly intervals and we predict the next hour based on the current hour
# Shifting the target data by -1 to predict the next hour's temperature
target = features.shift(-1)

# Drop the last row which now contains NaN values in the target
features = features.iloc[:-1]
target = target.iloc[:-1]

# Reshape features for LSTM [samples, time steps, features]
# Each sample will contain only one time step and multiple features (temperatures of all cities)
X = np.reshape(features.values, (features.shape[0], 1, features.shape[1]))

# Split data into training and testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, target.values, test_size=0.2, random_state=42)


In [8]:


# Define the model building function for hyperparameter tuning
def build_model(hp):
    model = Sequential([
        LSTM(units=hp.Int('units', min_value=50, max_value=200, step=50), return_sequences=True, input_shape=(1, X_train.shape[2])),
        Dropout(hp.Float('dropout_1', min_value=0.0, max_value=0.3, step=0.1)),
        LSTM(units=hp.Int('units', min_value=50, max_value=200, step=50)),
        Dropout(hp.Float('dropout_2', min_value=0.0, max_value=0.3, step=0.1)),
        Dense(units=X_train.shape[2])  # Output layer with a unit for each city
    ])
    model.compile(optimizer=Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
                  loss='mean_squared_error',
                  metrics=['mae', 'mse'])
    return model


In [2]:

# Import Keras Tuner libraries
from keras_tuner import RandomSearch, Hyperband, Objective

# Setting up Keras Tuner for hyperparameter optimization
tuner = RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=10,
    executions_per_trial=2,
    directory='model_tuning',
    project_name='WeatherPrediction'
)

# Start hyperparameter tuning
tuner.search(X_train, y_train, epochs=50, validation_split=0.2, callbacks=[EarlyStopping(monitor='val_loss', patience=5)])

# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]


NameError: name 'build_model' is not defined