In [3]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load data
data = pd.read_csv('../Data/csv/WashingtonWeather.csv')

# Filter necessary columns
columns_to_use = ['YEAR', 'MONTH', 'MIN', 'LAT', 'LON', 'ELEV(M)', 'TEMP', 'DEWP', 'PRCP', 'USAF']
data = data[columns_to_use]
# Assuming 'df' is your DataFrame
data = data.sort_values(by=['YEAR', 'MONTH'], ascending=[True, True])

# Scaling features
scaler = StandardScaler()
data[['MIN', 'LAT', 'LON', 'ELEV(M)', 'TEMP', 'DEWP']] = scaler.fit_transform(data[['MIN', 'LAT', 'LON', 'ELEV(M)', 'TEMP', 'DEWP']])

import numpy as np

def create_sequences(data, n_steps):
    X, y = [], []
    for i in range(len(data)):
        # find the end of the current sequence
        end_ix = i + n_steps
        # check if we are beyond the dataset
        if end_ix > len(data)-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = data[i:end_ix, :-1], data[end_ix, -1]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

# Define number of time steps (e.g., 12 months)
n_steps = 6

# Prepare the data for each station
final_data = []
for station in data['USAF'].unique():
    station_data = data[data['USAF'] == station].drop('USAF', axis=1)
    X, y = create_sequences(station_data.values, n_steps)
    final_data.append((X, y))

In [4]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd

# Define LSTM model
def build_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=input_shape))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Initialize an empty DataFrame to store predictions
predictions_list = []

# Train and evaluate the model for each station
for X, y in final_data:
    # Split data into training and testing sets
    train_X, test_X = X[:-4], X[-4:]  # Assuming last 4 records are for Jan-Apr 2019
    train_y, test_y = y[:-4], y[-4:]

    # Reshape input to be 3D [samples, timesteps, features]
    n_features = train_X.shape[2]
    train_X = train_X.reshape((train_X.shape[0], train_X.shape[1], n_features))
    test_X = test_X.reshape((test_X.shape[0], test_X.shape[1], n_features))

    # Build the LSTM model
    model = build_model(train_X.shape[1:])

    # Fit model
    model.fit(train_X, train_y, epochs=50, verbose=0)

    # Predicting the PRCP values
    predicted = model.predict(test_X)
    predictions_list.append(predicted)
    # Evaluating the model
    mse = mean_squared_error(test_y, predicted)
    print(f'Mean Squared Error: {mse}')
    # print(predicted)
    # Append predictions to the DataFrame

Mean Squared Error: 0.05457141866352819
Mean Squared Error: 0.5213190337358411
Mean Squared Error: 18.034833811680684
Mean Squared Error: 0.0018891680182893849
Mean Squared Error: 1.1231826943009018
Mean Squared Error: 0.2532891930585597
Mean Squared Error: 0.02160919620388816
Mean Squared Error: 0.09725485549027102
Mean Squared Error: 21.63060471757729
Mean Squared Error: 0.9876566484660424
Mean Squared Error: 0.1318844244050976
Mean Squared Error: 0.28615904276346843
Mean Squared Error: 1.1436667822544435
Mean Squared Error: 0.08447298087156124
Mean Squared Error: 0.019593740184141183
Mean Squared Error: 0.978475255637788
Mean Squared Error: 0.25136562755370995
Mean Squared Error: 0.372177185926379
Mean Squared Error: 0.6977916958102333
Mean Squared Error: 0.05716459828482673
Mean Squared Error: 0.6083601038017782
Mean Squared Error: 92.21250644629214


In [None]:
# Load data
data = pd.read_csv('../Data/csv/WashingtonWeather.csv')

# Filter necessary columns
columns_to_use = ['YEAR', 'MONTH', 'MIN', 'LAT', 'LON', 'ELEV(M)', 'TEMP', 'DEWP', 'PRCP', 'USAF']
data = data[columns_to_use]
# Assuming 'df' is your DataFrame
data = data.sort_values(by=['YEAR', 'MONTH'], ascending=[True, True])
data = data[data['YEAR'] == 2019]

output_data = []
i = 0
for station in data['USAF'].unique():
    station_data = data[data['USAF'] == station]
    station_data['Predicted_PRCP'] = predictions_list[i]
    output_data.append(station_data)
    i+=1
output_df = pd.concat(output_data, ignore_index=True)
output_df['Predicted_PRCP'][output_df['Predicted_PRCP'] < 0.0] = 0.0
mse = mean_squared_error(output_df['PRCP'], output_df['Predicted_PRCP'])
print(mse)
# output_df.to_csv("../Data/csv/WashingtonWeather2019.csv")
original_df = output_df.drop('Predicted_PRCP', axis = 1)
original_df.to_csv("../Data/csv/Original2019.csv")
predicted_df = output_df.drop('PRCP', axis = 1)
predicted_df = predicted_df.rename(columns={'Predicted_PRCP': 'PRCP'})
predicted_df.to_csv("../Data/csv/Predicted2019.csv")