In [1]:
import json
import random
import openai # Install the OpenAI Python package with `pip install openai`
import time
import os
import socketio
import pandas as pd

sio = socketio.Client()

# Connect to the server
@sio.event
def connect():
    print('Connected to server')

# Define a function to send a message to the server
def send_message_to_server(message):
    sio.emit('message_from_notebook', message)
# Define a function to send the CSV data to the server
def send_csv_data(csv_data):
    sio.emit('csv_data', csv_data)
# Connect to the server
sio.connect('http://localhost:5000')
send_message_to_server("Evaluation started")

Connected to server


In [2]:
# Add this near the beginning of your notebook, after the imports
import requests

# Fetch the model name
def get_model_name():
    try:
        response = requests.get('http://localhost:5000/model-name')
        return response.json()['modelName']
    except Exception as e:
        print(f"Error fetching model name: {e}")
        return "Unknown"

model_name = get_model_name()
print(f"Using model: {model_name}")
send_message_to_server(f"Using model: {model_name}")

Using model: ft:gpt-3.5-turbo-0125:personal::9mnYuuez


In [3]:
import pandas as pd
import numpy as np
import requests
import json
from sklearn.metrics import accuracy_score

eval_data_path = os.environ.get('EVAL_DATA_PATH', 'evaluation_data.csv')
print(f"Using evaluation data from {eval_data_path}")
send_message_to_server(f"Using evaluation data from {eval_data_path}")

def query_model(user_query):
    response = requests.post('http://localhost:5000/query', data={'user_query': user_query})
    return response.text

# Load the evaluation data using the provided path
eval_data = pd.read_csv(eval_data_path)

# Define the columns to use for querying (you can adjust these)
query_columns = ['Method', 'Temp.']

actual_results = []
predicted_results = []

for index, row in eval_data.iterrows():
    # Construct the query using the specified columns
    query_parts = []
    for col in query_columns:
        if pd.notna(row[col]):
            query_parts.append(f"{col}: {row[col]}")
    
    query = ", ".join(query_parts)
    
    # Query the model
    predicted_result = query_model(query)
    
    # Store results
    actual_results.append(row.to_dict())
    predicted_results.append(predicted_result)
    
    print(f"Processed row {index + 1}/{len(eval_data)}")
    send_message_to_server(f"Processed row {index + 1}/{len(eval_data)}")



Using evaluation data from evaluation_data\evaluation_data.csv


Processed row 1/4


Processed row 2/4


Processed row 3/4


Processed row 4/4


In [4]:
from collections import Counter
from time import sleep
def parse_result(result):
    parsed = {}
    for item in result.split(','):
        parts = item.split(':')
        if len(parts) == 2:
            key = parts[0].strip()
            value = parts[1].strip()
            parsed[key] = value
    return parsed

def compare_results(actual, predicted):
    actual_items = set(actual.items())
    predicted_dict = parse_result(predicted)
    predicted_items = set(predicted_dict.items())
    
    correct_items = actual_items.intersection(predicted_items)
    return {
        'accuracy': len(correct_items) / len(actual_items),
        'correct_items': len(correct_items),
        'total_items': len(actual_items),
        'missing_items': len(actual_items) - len(correct_items),
        'extra_items': len(predicted_items) - len(correct_items)
    }

# Calculate metrics
comparisons = [compare_results(actual, predicted) for actual, predicted in zip(actual_results, predicted_results)]

overall_accuracy = sum(comp['accuracy'] for comp in comparisons) / len(comparisons)
total_correct = sum(comp['correct_items'] for comp in comparisons)
total_items = sum(comp['total_items'] for comp in comparisons)
total_missing = sum(comp['missing_items'] for comp in comparisons)
total_extra = sum(comp['extra_items'] for comp in comparisons)

# Calculate per-field accuracy
all_fields = set(field for result in actual_results for field in result.keys())
field_correct = Counter()
field_total = Counter()

for actual, predicted in zip(actual_results, predicted_results):
    predicted_dict = parse_result(predicted)
    for field in all_fields:
        if field in actual:
            field_total[field] += 1
            if field in predicted_dict and actual[field] == predicted_dict[field]:
                field_correct[field] += 1

field_accuracy = {field: field_correct[field] / field_total[field] for field in all_fields}

# Create a results dictionary
results = {
    'model_name': model_name,
    'overall_accuracy': overall_accuracy,
    'total_correct_items': total_correct,
    'total_items': total_items,
    'total_missing_items': total_missing,
    'total_extra_items': total_extra,
    'per_field_accuracy': field_accuracy,
    'individual_comparisons': comparisons
}

# Save results to a file
with open('evaluation_results.json', 'w') as f:
    json.dump(results, f)

with open('last_evaluation_result.json', 'w') as f:
    json.dump(results, f)


print("Evaluation completed. Results saved to evaluation_results.json")
send_message_to_server("Evaluation Completeed, showing results...")
sleep(3)
send_message_to_server("Evaluation completed. Results saved to evaluation_results.json")

Evaluation completed. Results saved to evaluation_results.json


In [5]:
def restart_server():    
    # Trigger server restart
    try:
        requests.post('http://localhost:5000/restart')
        print("Restart signal sent to server")
    except requests.exceptions.ConnectionError:
        print("Server is restarting...")
    
    # Wait for server to come back online
    server_up = False
    while not server_up:
        try:
            response = requests.get('http://localhost:5000/healthcheck')
            if response.status_code == 200:
                server_up = True
                print("Server is back online")
        except requests.exceptions.ConnectionError:
            print("Waiting for server to restart...")
            time.sleep(5)
restart_server()

Server is restarting...


Waiting for server to restart...


Server is back online


Connected to server
