In [1]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [10]:
import json
import numpy as np
import pickle
import pandas as pd
from datetime import datetime
import pytz
import time

In [24]:
model_file_path = '/gdrive/My Drive/trip_model.pkl'
scaler_file_path = '/gdrive/My Drive/scaler_model.pkl'
data_file_path = '/gdrive/My Drive/station_data.json'
output_file_path_pred = '/gdrive/My Drive/station_data.json'
output_file_path_pandas = "/gdrive/My Drive/output_priority.json"

In [4]:
def get_current_datetime():
    eastern = pytz.timezone('US/Eastern')
    return datetime.now(eastern).strftime("%Y-%m-%d %H:%M:%S")
eastern = pytz.timezone('US/Eastern')

In [5]:
def load_model(file_path):
    with open(file_path, 'rb') as model_file:
        return pickle.load(model_file)

def load_scaler(file_path):
    with open(file_path, 'rb') as scaler_file:
        return pickle.load(scaler_file)

def load_station_data(file_path):
    with open(file_path, 'r') as data_file:
        return json.load(data_file)

def scale_features(data_point, scaler):
    return scaler.transform([data_point])[0]

def predict_traffic(model, scaled_data_point):
    return model.predict([scaled_data_point])[0]


def add_predictions_to_data(station_data, model, scaler):
    # Convert the station_data list of dictionaries to a pandas DataFrame
    df = pd.DataFrame(station_data)

    # Extract features into a separate DataFrame and rename columns
    features = df[['day_of_week', 'hour', 'local_id']]
    features = features.rename(columns={'day_of_week': 'day', 'hour': 'hour', 'local_id': 'station_id'})

    # Scale the features using the loaded scaler
    scaled_features = scaler.transform(features)

    # Use the trained model to predict the target variable
    df['predicted_traffic'] = model.predict(scaled_features)

    # Convert the DataFrame back to a list of dictionaries
    updated_station_data = df.to_dict('records')

    return updated_station_data


def save_data_with_predictions(station_data, output_file_path):
    with open(output_file_path, 'w') as output_file:
        json.dump(station_data, output_file)

In [6]:
def runPrediction():
    rf_model = load_model(model_file_path)
    scaler = load_scaler(scaler_file_path)
    station_data = load_station_data(data_file_path)

    station_data_with_predictions = add_predictions_to_data(station_data, rf_model, scaler)

    save_data_with_predictions(station_data_with_predictions, output_file_path_pred)

In [7]:
runPrediction()

In [20]:
def process_batch():
    # Read the JSON file into a Pandas DataFrame
    df = pd.read_json(output_file_path_pred)

    # Create a new column maintenance_priority
    df["maintenance_priority"] = (df["num_docks_disabled"] + df["num_vehicles_disabled"]) / \
                                 (df["num_docks_available"] + df["num_vehicles_available"])

    # Order the DataFrame in descending order of maintenance_priority
    df = df.sort_values(by="maintenance_priority", ascending=False)

    df.to_json(output_file_path_pandas, orient="records", lines=True)

In [27]:
df = pd.read_json(output_file_path_pred)

In [9]:
current_hour_global = datetime.now(eastern).hour
print(current_hour_global)

1


In [30]:
# Set the interval in seconds
interval_seconds = 15

try:
    # Run indefinitely
    while True:
        # Record the start time for each iteration
        iteration_start_time = time.time()

        current_datetime = datetime.now(eastern)
        if current_datetime.hour != current_hour_global:
          runPrediction()
          current_hour_global = current_datetime.hour

        # Process the batch
        process_batch()

        # Calculate and print the time taken for the iteration
        iteration_end_time = time.time()
        iteration_elapsed_time = iteration_end_time - iteration_start_time
        print(f"Time taken for iteration: {iteration_elapsed_time:.2f} seconds")

        # Wait for the specified interval
        time.sleep(interval_seconds)

except KeyboardInterrupt:
    # Handle keyboard interrupt (e.g., press Ctrl+C to stop the loop)
    print("Stopping the application")


Time taken for iteration: 12.23 seconds
Time taken for iteration: 0.06 seconds
Time taken for iteration: 0.06 seconds
Time taken for iteration: 0.05 seconds
Time taken for iteration: 0.09 seconds
Time taken for iteration: 0.05 seconds
Time taken for iteration: 0.05 seconds
Time taken for iteration: 0.05 seconds
Time taken for iteration: 0.08 seconds
Time taken for iteration: 0.08 seconds
Time taken for iteration: 0.08 seconds
Time taken for iteration: 0.08 seconds
Time taken for iteration: 0.06 seconds
Time taken for iteration: 0.05 seconds
Time taken for iteration: 0.05 seconds
Time taken for iteration: 0.05 seconds
Time taken for iteration: 0.06 seconds
Time taken for iteration: 0.06 seconds
Stopping the application
