In [23]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

# Load data into pandas
data = pd.read_csv('/Users/parkercarrus/Desktop/Weather/LSTM/Data/lstm_dataset.csv')
X = data[['time_sin', 'time_cos', 'year_day_sin', 'year_day_cos', 'temp', 'pressure', 'humidity']]
y = data[['temp', 'pressure', 'humidity']]

# Normalize all features and targets
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

# Load the trained model
model = tf.keras.models.load_model('/Users/parkercarrus/Desktop/Weather/Working App/allvalues.keras')

# Function to predict the next 'n' hours of temperature, pressure, and humidity
def predict_future_sequence(model, current_sequence, n_hours, scaler_X, scaler_y):
    predictions = []
    current_sequence = np.array(current_sequence)  # Ensure current sequence is a numpy array

    for _ in range(n_hours):
        # Convert to DataFrame to include feature names
        current_sequence_df = pd.DataFrame(current_sequence, columns=['time_sin', 'time_cos', 'year_day_sin', 'year_day_cos', 'temp', 'pressure', 'humidity'])
        
        # Reshape the current sequence to fit the model input
        current_sequence_scaled = scaler_X.transform(current_sequence_df)
        current_sequence_scaled = np.expand_dims(current_sequence_scaled, axis=0)

        # Print the current input sequence
        print("Current input sequence:")
        print(current_sequence_df)

        # Predict the next hour's values
        next_values_scaled = model.predict(current_sequence_scaled)
        next_values = scaler_y.inverse_transform(next_values_scaled)[0]

        # Append the prediction to the list
        predictions.append(next_values)

        # Create the next input sequence by preserving the original features
        next_input = np.concatenate([current_sequence[-1, :4], next_values])  # Combine original features with predictions
        next_input_df = pd.DataFrame([next_input], columns=['time_sin', 'time_cos', 'year_day_sin', 'year_day_cos', 'temp', 'pressure', 'humidity'])
        next_input_scaled = scaler_X.transform(next_input_df)
        current_sequence = np.append(current_sequence[1:], [next_input_scaled[0]], axis=0)

    return np.array(predictions)

# Example usage:
# Assuming 'current_sequence' is the last 24 hours of data available (same shape as the sequences used for training)
# current_sequence should be a 2D array with shape (sequence_length, number_of_features)
sequence_length = 24
current_sequence = X_scaled[-sequence_length:]  # Last sequence from the scaled data
n_hours = 48  # Number of hours to predict

future_predictions = predict_future_sequence(model, current_sequence, n_hours, scaler_X, scaler_y)

# Convert predictions to a DataFrame for better readability
future_predictions_df = pd.DataFrame(future_predictions, columns=['Temp', 'Pressure', 'Humidity'])

print(future_predictions_df)


Current input sequence:
    time_sin  time_cos  year_day_sin  year_day_cos      temp  pressure  \
0   0.933013  0.250000           0.5           1.0  0.420691  0.507937   
1   0.853553  0.146447           0.5           1.0  0.411314  0.507937   
2   0.853553  0.146447           0.5           1.0  0.411314  0.507937   
3   0.750000  0.066987           0.5           1.0  0.418503  0.523810   
4   0.750000  0.066987           0.5           1.0  0.418503  0.523810   
5   0.629410  0.017037           0.5           1.0  0.417096  0.539683   
6   0.500000  0.000000           0.5           1.0  0.419284  0.555556   
7   0.370590  0.017037           0.5           1.0  0.417565  0.555556   
8   0.370590  0.017037           0.5           1.0  0.417565  0.555556   
9   0.250000  0.066987           0.5           1.0  0.409752  0.571429   
10  0.250000  0.066987           0.5           1.0  0.409752  0.571429   
11  0.146447  0.146447           0.5           1.0  0.406782  0.587302   
12  0.146447  

In [27]:
import numpy as np
import pandas as pd
import joblib
from processing import get_cyclical_datetime

class RandomForest:
    def __init__(self, data, model_path, past_weather_path):
        self.data = data
        self.model_path = model_path
        self.past_weather_path = past_weather_path
        # load random forest model
        try:
            self.rf_model = joblib.load(model_path)
        except FileNotFoundError:
            raise FileNotFoundError(f'Random Forest Classifier could not be accessed from {model_path}')
        except Exception as e:
            raise Exception(f'An error occured while loading the Random Forest Classifier: {e}')

    def process_data(self, data):
        try:
            temp = data.get('temp')
            humidity = data.get('humidity')
            pressure = data.get('pressure')

            if temp is None or humidity is None or pressure is None:
                raise ValueError('Missing required weather data')

            trig_dt = get_cyclical_datetime()
            time_sin = trig_dt.get('time_sin')
            time_cos = trig_dt.get('time_cos')
            year_day_sin = trig_dt.get('year_day_sin')
            year_day_cos = trig_dt.get('year_day_cos')

            features_list = [time_sin, time_cos, year_day_sin, year_day_cos, temp, humidity, pressure]

            past_weather_csv = pd.read_csv(self.past_weather_path).tail(3)
            if past_weather_csv.shape[0] < 3:
                raise ValueError(f'Insufficient past weather data stored in {self.past_weather_path}')

            for i in range(3):
                for j in range(3):
                    features_list.append(past_weather_csv.values[2-i][j])

            arr = np.array(features_list)
            arr = arr.reshape(1,16)     

            return arr

        except Exception as e:
            raise RuntimeError(f'Error processing data: {e}')
        
    def is_Raining(self) -> bool:
        try:
            inputs = self.process_data(self.data)
            eval = self.rf_model.predict(inputs)
            return bool(eval[0])
        except Exception as e:
            raise Exception(f'An error occured while evaluating the Random Forest Classifier: {e}')

# Function to predict precipitation for the next 48 hours
def predict_precipitation(next_48_hours_data, past_data_path, model_path):
    results = []
    for i in range(len(next_48_hours_data)):
        # Ensure future data is in the correct format
        future_data = {
            'temp': next_48_hours_data.iloc[i]['Temp'],
            'humidity': next_48_hours_data.iloc[i]['Humidity'],
            'pressure': next_48_hours_data.iloc[i]['Pressure']
        }
        rf = RandomForest(future_data, model_path, past_data_path)
        prediction = rf.is_Raining()
        results.append(prediction)
    return results

In [28]:


predict_precipitation(future_predictions_df, '/Users/parkercarrus/Desktop/Weather/Working App/data/past_weather.csv', '/Users/parkercarrus/Desktop/Weather/Working App/models/rf1_temp.pkl')




[False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False]