###Import libraries

In [8]:
import os
import pandas as pd
import numpy as np
import ollama

###Helper functions

In [9]:
# Parameters
PREDICTION_MODE = "end_volume"
TEMPERATURE = 0.8
TOP_P = 0.9
OUTPUT_FOLDER = "predictions"

# Create output folder if it doesn't exist
if not os.path.exists(OUTPUT_FOLDER):
    os.makedirs(OUTPUT_FOLDER)

# Normalization function
def normalize_data(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data))

# Tokenization function
def tokenize_data(data):
    # Replace NaN values with 0
    data = np.nan_to_num(data, nan=0.0)
    # Tokenize by scaling to 0-99 range and converting to formatted strings
    return ", ".join(f"{int(value * 100):02d}" for value in data)

# Parse predicted values
def parse_predicted_values(predicted_values):
    return [int(value) / 100 for value in predicted_values.split(", ") if value]

# Save predictions to CSV
def save_predictions(region_id, predictions, ground_truth, mode):
    file_name = f"{OUTPUT_FOLDER}/predictions_{mode}_region_{region_id}.csv"
    df = pd.DataFrame({
        'Prediction': predictions,
        'Ground_Truth': ground_truth
    })
    df.to_csv(file_name, index=False)
    # print(f"Saved predictions for region {region_id} to {file_name}")

###Ollama prediction

In [10]:
def make_predictions(input_text, n_predictions=20):
    predictions = []

    for i in range(n_predictions):
        # print(f"Generating prediction {i + 1}/{n_predictions}...")
        response = ollama.chat(model='llama2:13b-text', messages=[
            {
                'role': 'user',
                'content': input_text,
            },
        ], options={
            "temperature": TEMPERATURE,
            "top_p": TOP_P,
            "num_predict": 4  #  single tokenized value
        })

        # Extract the predicted values
        predicted_values = response['message']['content']
        # print(f"Prediction {i + 1}: {predicted_values}")

        # Parse the prediction and add the first value to the list
        parsed = parse_predicted_values(predicted_values)
        if parsed:
            predictions.append(parsed[0])  # Only take the first value (484th)

    return predictions

###Main Processing Function

In [13]:
def process_regions(data, prediction_mode, test_region=None, regions=None):
    if test_region is not None:
        regions = [test_region]
    elif regions is None:
        regions = data['region'].unique()
    
    # Uncomment this to start from a specific region
    # regions = [region for region in regions if region >= 53]
    # print(f"Processing regions: {regions}")

    for region_id in regions:
        print(f"Processing region {region_id}...")
        # get region
        # region_data = data[data['region'] == region_id][prediction_mode]
        region_data = data[data['region'] == region_id][prediction_mode].values 

        # print("--- Region data ---")
        # print(region_data)
        
        # Ensure sufficient data
        if len(region_data) < 483 + 1: # Only need 484 values for this task
            print(f"Region {region_id} does not have enough data. Skipping.")
            continue

        # Normalize data
        normalized_data = normalize_data(region_data)

        # print("--- Normalized data ---")
        # print(normalized_data)

        # Select first 483 timeslots to use as input for the model
        input_data = normalized_data[:483]
        tokenized_input = tokenize_data(input_data)

        # print("--- Tokenized input ---")
        # print(tokenized_input)

        # Save ground truth (484th value)
        ground_truth = [normalized_data[483]]

        # Skip if ground_truth is 0 or nan
        if (ground_truth == [0.0] or np.isnan(ground_truth[0])):
            print(f"Ground truth  in region {region_id} is 0. Skipping prediction.")
            predictions = [0.0] * 20
            ground_truth = [0.0] * 20
            save_predictions(region_id, predictions, ground_truth, prediction_mode)
            continue

        # Make predictions (20 predictions for the 484th value)
        predictions = make_predictions(tokenized_input, n_predictions=20)

        # Save predictions
        save_predictions(region_id, predictions, ground_truth * len(predictions), prediction_mode)

###Load and Execute

In [14]:
# Load the traffic volume dataset
file_path = 'llmtime_data/bike_volume_test.csv'
data = pd.read_csv(file_path)

# Note: to process a single region, set test_region_id  otherwise set it to None
test_region_id = None

# print(data.head())

process_regions(data, PREDICTION_MODE, test_region=test_region_id)

Processing region 0...
Processing region 1...
Processing region 2...
Processing region 3...
Ground truth  in region 3 is 0. Skipping prediction.
Processing region 4...
Ground truth  in region 4 is 0. Skipping prediction.
Processing region 5...
Ground truth  in region 5 is 0. Skipping prediction.
Processing region 6...
Ground truth  in region 6 is 0. Skipping prediction.
Processing region 7...
Ground truth  in region 7 is 0. Skipping prediction.
Processing region 8...
Ground truth  in region 8 is 0. Skipping prediction.
Processing region 9...
Ground truth  in region 9 is 0. Skipping prediction.
Processing region 10...
Ground truth  in region 10 is 0. Skipping prediction.
Processing region 11...
Ground truth  in region 11 is 0. Skipping prediction.
Processing region 12...
Ground truth  in region 12 is 0. Skipping prediction.
Processing region 13...
Ground truth  in region 13 is 0. Skipping prediction.
Processing region 14...
Ground truth  in region 14 is 0. Skipping prediction.
Processin

  return (data - np.min(data)) / (np.max(data) - np.min(data))


Processing region 21...
Processing region 22...
Processing region 23...
Processing region 24...
Processing region 25...
Processing region 26...
Processing region 27...
Ground truth  in region 27 is 0. Skipping prediction.
Processing region 28...
Ground truth  in region 28 is 0. Skipping prediction.
Processing region 29...
Ground truth  in region 29 is 0. Skipping prediction.
Processing region 30...
Ground truth  in region 30 is 0. Skipping prediction.
Processing region 31...
Ground truth  in region 31 is 0. Skipping prediction.
Processing region 32...
Ground truth  in region 32 is 0. Skipping prediction.
Processing region 33...
Ground truth  in region 33 is 0. Skipping prediction.
Processing region 34...
Ground truth  in region 34 is 0. Skipping prediction.
Processing region 35...
Ground truth  in region 35 is 0. Skipping prediction.
Processing region 36...
Ground truth  in region 36 is 0. Skipping prediction.
Processing region 37...
Ground truth  in region 37 is 0. Skipping prediction