In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
import logging
import openai

logging.basicConfig(level=logging.INFO)

def build_exogenous_variable(capacity_data, library_hours):
    capacity_data['timestamp'] = pd.to_datetime(capacity_data['timestamp'])
    capacity_data.set_index('timestamp', inplace=True)
    hours_dict = {}
    for _, row in library_hours.iterrows():
        date_str = row['date'].strftime('%Y-%m-%d')
        open_dt = pd.to_datetime(date_str + ' ' + row['open_time'])
        close_dt = pd.to_datetime(date_str + ' ' + row['close_time'])
        if close_dt < open_dt:
            close_dt += pd.Timedelta(days=1)
        hours_dict[date_str] = (open_dt, close_dt)
    is_open = []
    for ts in capacity_data.index:
        date_str = ts.strftime('%Y-%m-%d')
        if date_str in hours_dict:
            open_dt, close_dt = hours_dict[date_str]
            if open_dt <= ts <= close_dt:
                is_open.append(1)
            else:
                is_open.append(0)
        else:
            is_open.append(0)
    capacity_data['is_open'] = is_open
    return capacity_dat


In [None]:
def add_academic_features(capacity_data, academic_calendar):
    academic_calendar['start_date'] = pd.to_datetime(academic_calendar['start_date'])
    academic_calendar['end_date'] = pd.to_datetime(academic_calendar['end_date'])
    period_types = academic_calendar['period_type'].unique()
    for p_type in period_types:
        col_name = f"is_{p_type}"
        capacity_data[col_name] = 0
    for _, row in academic_calendar.iterrows():
        p_type = row['period_type']
        col_name = f"is_{p_type}"
        mask = (capacity_data.index >= row['start_date']) & (capacity_data.index <= row['end_date'])
        capacity_data.loc[mask, col_name] = 1
    return capacity_data

In [None]:
def apply_gpt_adjustment(prediction_df, context):
    openai.api_key = "your_openai_api_key"
    predictions = prediction_df['forecast'].tolist()
    prompt = (
        f"Given the following capacity forecasts: {predictions}, and the context: "
        f"'{context}', provide adjusted capacity predictions considering surges or drops."
    )
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=1000,
        temperature=0.2
    )
    raw_text = response['choices'][0]['text'].strip()
    adjusted_predictions_str = [x.strip() for x in raw_text.split(',') if x.strip()]
    if len(adjusted_predictions_str) != len(predictions):
        prediction_df['forecast_adjusted'] = predictions
        logging.warning("GPT returned unexpected format; using original predictions.")
    else:
        try:
            adjusted_vals = [float(val) for val in adjusted_predictions_str]
            prediction_df['forecast_adjusted'] = adjusted_vals
        except ValueError:
            prediction_df['forecast_adjusted'] = predictions
            logging.warning("GPT returned non-numeric data; using original predictions.")
    return prediction_df

In [None]:


def train_and_forecast_arima(capacity_data, exog_columns):
    train_size = int(len(capacity_data) * 0.8)
    train_df = capacity_data.iloc[:train_size]
    test_df = capacity_data.iloc[train_size:]
    train_endog = train_df['capacity']
    train_exog = train_df[exog_columns]
    test_exog = test_df[exog_columns]
    model = SARIMAX(
        endog=train_endog,
        exog=train_exog,
        order=(1, 1, 1),
        enforce_stationarity=False,
        enforce_invertibility=False
    )
    results = model.fit(disp=False)
    forecast = results.predict(
        start=test_df.index[0],
        end=test_df.index[-1],
        exog=test_exog,
        typ='levels'
    )
    forecast_df = pd.DataFrame({"timestamp": test_df.index, "forecast": forecast})
    forecast_df.set_index("timestamp", inplace=True)
    return forecast_df


In [None]:
def main():
    capacity_data = pd.read_csv('capacity_data.csv')
    library_hours = pd.read_csv('library_hours.csv')
    library_hours['date'] = pd.to_datetime(library_hours['date'])
    academic_calendar = pd.read_csv('academic_calendar.csv')
    merged_df = build_exogenous_variable(capacity_data, library_hours)
    merged_df = add_academic_features(merged_df, academic_calendar)
    exog_cols = ['is_open'] + [col for col in merged_df.columns if col.startswith("is_") and col != "is_open"]
    forecast_df = train_and_forecast_arima(merged_df, exog_cols)
    context_info = "Clemons Library at UVA, Finals Week, historically higher capacity."
    forecast_df = apply_gpt_adjustment(forecast_df, context_info)
    forecast_df.to_csv('forecast_results.csv')
    print(forecast_df.tail(10))

if __name__ == "__main__":
    main()
