In [8]:
from utils import *
from models.utils import *
from models.training_utils import *
import torch
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)

# I found that loading the data initially is not the most consistent, so I did not include it in the run.py file.
# I have util functions that can be used to load the bulk of the data, but I will not include them here.
# For downloading the bulk data initially, I would recommend using the load_Source_data() function in the utils.py file.
# And using it in a python notebook for a more controlled/manual initial download.

In [9]:
load_model('./models/weights/Austin_attn_lstm.pt')

ModuleNotFoundError: No module named 'training_utils'

In [4]:
# Step 1: Update the data
# This will update the data from the sources and include entries up until yesterday's date.
# This will provide us with a data entry that can be used for prediction of the current day.
num_retries = 10
for i in range(num_retries):
    try:
        #update_NOAA_data()
        break
    except Exception as e:
        print(f"Error: {e}")
        print(f"Retrying... {i+1}/{num_retries}")
        time.sleep(10)
        
for i in range(num_retries):
    try:
        #update_OM_data()
        break
    except Exception as e:
        print(f"Error: {e}")
        print(f"Retrying... {i+1}/{num_retries}")
        time.sleep(10)
        
for i in range(num_retries):
    try:
        #update_WRH_data()
        break
    except Exception as e:
        print(f"Error: {e}")
        print(f"Retrying... {i+1}/{num_retries}")
        time.sleep(10)
        
for i in range(num_retries):
    try:
        #update_Solar_Soil_data()
        break
    except Exception as e:
        print(f"Error: {e}")
        print(f"Retrying... {i+1}/{num_retries}")
        time.sleep(10)
        
for i in range(num_retries):
    try:
        #update_Air_Quality_data()
        break
    except Exception as e:
        print(f"Error: {e}")
        print(f"Retrying... {i+1}/{num_retries}")
        time.sleep(10)
        
print("Data updated successfully!")

Data updated successfully!


In [4]:
# Step 2: Get city info and iterate through the cities
# The workflow will be as follows:
# 1. Load the data for the city - done
# 2. Load the models for the city - done
# 3. Look at the new data entry
# 4. Predict the new data entry, update model weights, and save the model weights
# 5. Predict the next day's data entry
# 6. Place a bet on Kalshi
city_info = get_city_info()

for city in city_info.keys():
    if city != 'Austin':
        continue
    # Get paths for the city data
    noaa_path = city_info[city]['noaa']
    om_path = city_info[city]['om']
    wrh_path = city_info[city]['wrh']
    aq_path = city_info[city]['aq']
    solar_path = city_info[city]['ss']
    attn_lstm_path = city_info[city]['attn_lstm']
    lstm_path = city_info[city]['lstm']
    scaler_features_path = city_info[city]['scaler']
    
    # Load the data for the city
    # all_df is the main dataframe that contains all the data combined. I included subsets of the dataframes as well,
    # in case I need them in the future. Predictor is simply the last row. I extracted it so that it does not get
    # deleted by dropna.   
    daily_df, daily_df_2, daily_df_3, all_df, predictor_final = load_all_dfs(noaa_path, om_path, solar_path, wrh_path, aq_path)
    
    # Load the models for the city
    attn_lstm_model = load_model(attn_lstm_path)
    lstm_model = load_model(lstm_path)
    scaler_features = load_scaler(scaler_features_path)
    
    # Prepare the data for prediction (using prep_data for ease sequence creation)
    columns_to_ignore = ['date', 'next_day_max_temp']
    target_column = 'next_day_max_temp'
    data = all_df
    features = data.drop(columns=columns_to_ignore).columns
    target = target_column
    # Split the data into training, validation, and testing sets
    train_size = int(len(data) * 0.7)
    val_size = int(len(data) * 0.15)
    train_data = data[:train_size]
    val_data = data[train_size:train_size+val_size]
    test_data = data[train_size+val_size:]
    # Create sequences
    def create_sequences(features, target, seq_length):
        X = []
        y = []
        for i in range(len(features) - seq_length):
            X.append(features[i:i+seq_length])
            y.append(target[i+seq_length])
        return np.array(X), np.array(y)
    # Create sequences for the training, validation, and testing sets
    seq_length = 20  # Number of previous days to use as input
    test_features = scaler_features.transform(test_data[features])
    X_test, y_test = create_sequences(test_features, test_data[target].values, seq_length)
    # Get the last sequence for prediction
    last_sequence = X_test[-1]
    last_result = y_test[-1]
    # turn the last sequence into a tensor
    last_sequence = torch.FloatTensor(last_sequence).unsqueeze(0)
    
    # Predict the new data entry
    # Predict using the attention LSTM model, then update its weights
    attn_lstm_model.eval()
    with torch.no_grad():
        attn_lstm_output = attn_lstm_model(last_sequence)
    attn_lstm_output = attn_lstm_output.item()

    # Predict using the LSTM model, then update its weights
    lstm_model.eval()
    with torch.no_grad():
        lstm_output = lstm_model(last_sequence)
    lstm_output = lstm_output.item()
    
    # Update the last sequence, by popping the first element and appending the new prediction
    last_sequence = last_sequence.squeeze(0).tolist()
    last_sequence.pop(0)
    predictor_final = predictor_final.to_numpy().tolist()
    last_sequence.append(predictor_final)
    last_sequence = torch.FloatTensor([last_sequence])
    
    # predict using the updated models
    attn_lstm_model.eval()
    with torch.no_grad():
        attn_lstm_output = attn_lstm_model(last_sequence)
    attn_lstm_output = attn_lstm_output.item()
    lstm_model.eval()
    with torch.no_grad():
        lstm_output = lstm_model(last_sequence)
    lstm_output = lstm_output.item()
    
    # mean of the two predictions
    prediction = (attn_lstm_output + lstm_output) / 2



ModuleNotFoundError: No module named 'training_utils'