In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from openai import OpenAI
import json

In [13]:
# Load the evaluation data
eval_file = os.environ.get('EVAL_FILE')
eval_data = pd.read_csv(eval_file)

# Load the fine-tuned model ID
with open('model_config.json', 'r') as f:
    config = json.load(f)
    model_id = config.get('model_id')

if not model_id:
    raise ValueError("No fine-tuned model ID found in model_config.json")

print(f"Using model: {model_id}")

ValueError: Invalid file path or buffer object type: <class 'NoneType'>

In [3]:
client = OpenAI()

def query_model(input_text):
    completion = client.chat.completions.create(
        model=model_id,
        messages=[
            {"role": "system", "content": "Marv is a factual chatbot that provides complete specifications based on user requirements."},
            {"role": "user", "content": f"Please provide the complete specification for the following requirements: {input_text}"}
        ]
    )
    return completion.choices[0].message.content

# Function to extract numerical values from the model's output
def extract_values(output):
    # This function should be adapted based on the expected format of your model's output
    # For example, if the output is "Temperature: 120, Pressure: 2.5", you might do:
    values = {}
    for pair in output.split(','):
        key, value = pair.split(':')
        try:
            values[key.strip()] = float(value.strip())
        except ValueError:
            pass  # Ignore non-numeric values
    return values

In [4]:
# Evaluate the model
predictions = []
actuals = []

for _, row in eval_data.iterrows():
    # Construct input text from a subset of columns (adjust as needed)
    input_columns = ['Method', 'Temp.']  # Add or remove columns as appropriate
    input_text = ", ".join([f"{col}: {row[col]}" for col in input_columns if pd.notna(row[col])])
    
    # Query the model
    model_output = query_model(input_text)
    
    # Extract numerical values from the model's output
    predicted_values = extract_values(model_output)
    
    # Extract actual values from the row
    actual_values = {col: row[col] for col in row.index if pd.notna(row[col]) and isinstance(row[col], (int, float))}
    
    # Append to lists
    predictions.append(predicted_values)
    actuals.append(actual_values)

# Calculate metrics
mse = mean_squared_error(actuals, predictions)
mae = mean_absolute_error(actuals, predictions)
r2 = r2_score(actuals, predictions)

print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")
print(f"R-squared Score: {r2}")

# Print a single evaluation score (you can adjust this based on your preference)
evaluation_score = r2  # Using R-squared as the overall evaluation score
print(f"Evaluation Score: {evaluation_score}")

KeyError: 'Method'