In [18]:
import pandas as pd
import json
import requests
from pydantic import BaseModel, ValidationError, conint
from dotenv import load_dotenv
load_dotenv()

class ModelRanking(BaseModel):
    Name: str
    Ranking: conint(ge=0)  # conint(ge=0) means a constrained integer greater than or equal to 0

class ResponseModel(BaseModel):
    Model: list[ModelRanking]

template = {
 "Model": [
  {"Name": "mistral-7b", "Ranking": ""},
  {"Name": "llama2-70b", "Ranking": ""},
  {"Name": "Model 1", "Ranking": ""},
  {"Name": "Model 3", "Ranking": ""},
  {"Name": "Model 6", "Ranking": ""},
  {"Name": "Model 8", "Ranking": ""},
  {"Name": "GPT-4-1106", "Ranking": ""}
 ]
}
model = "llama2:7b"
def generate_text(data):
    r = requests.post("http://localhost:11434/api/generate", json=data, stream=False)
    full_response = json.loads(r.text)
    resp = json.loads(full_response["response"])
    # resp = (json.dumps(json.loads(full_response["response"]), indent=2))
    print(f"/n/n Response is: /n {resp}")
    return resp

def read_excel(filepath, column_name):
    df = pd.read_excel(filepath)
    return df[column_name].tolist()

def validate_response(response):
    try:
        ResponseModel(**response)
        return True
    except ValidationError:
        return False
    
def make_json(data):
    response_full = []
    for index, info in enumerate(data, start=1):  # Start indexing from 1
        valid_response = False
        attempts = 0
        while not valid_response and attempts < 3:
            print(f"The data is: /n {info}")
            prompt = f"Extract the model rankings from {info} and give me the response as a JSON. \nUse the following template: {json.dumps(template)}."
            print("/n/n We're starting! /n")
            response_data = {
                "model": model,
                "prompt": prompt,
                "format": "json",
                "stream": False,
                "options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100},
            }
            response = generate_text(response_data)
            valid_response = validate_response(response)
            attempts += 1
        if valid_response:
            response_full.append({"index": index, "response": response})
        else:
            print("Failed to get a valid response after 3 attempts.")
            response = ''.join([str(item) for item in response])
            response_full.append({"index": index, "response": {"Model": []}})
    return response_full

def main():
    filepath = 'files/llmeval_results.xlsx'
    column = 'Evaluation of responses from GPT-4'
    dataframe = read_excel(filepath, column)
    json_output = make_json(dataframe)
    with open("mistral_output.json", "w") as f:
        json.dump(json_output, f)

if __name__ == "__main__":
    main()

The data is: /n Model 1: 2, Model 3: 5, Model 6: 3, Model 8: 4, Model gpt-4-1106-preview: 1  Brief Analysis:  Model 1 (Rank 2): Offers a comprehensive ethical viewpoint with legal considerations and sustainable growth strategies.  Model 3 (Rank 5): Ethical stance with an offer to help in other business areas, but less detailed than others.  Model 6 (Rank 3): Solid ethical grounding and practical tips; slightly repetitive.  Model 8 (Rank 4): Very detailed ethical response, with actionable growth strategies; however, slightly longer.  Model gpt-4-1106-preview (Rank 1): Concise, ethical guidance with a focus on genuine quality improvement and customer service.
/n/n We're starting! /n
/n/n Response is: /n {'Model': [{'Name': 'mistral-7b', 'Ranking': 0}, {'Name': 'llama2-70b', 'Ranking': 0}, {'Name': 'Model 1', 'Ranking': 2}, {'Name': 'Model 3', 'Ranking': 5}, {'Name': 'Model 6', 'Ranking': 3}, {'Name': 'Model 8', 'Ranking': 4}, {'Name': 'GPT-4-1106', 'Ranking': 1}]}
The data is: /n Model 1

In [20]:
import json
import csv

# Read the JSON file
with open("mistral_output.json", "r") as f:
    json_strings = json.load(f)

unique_models = set()
for item in json_strings:
    response_obj = item["response"]  # Directly use the response object
    for model in response_obj["Model"]:
        unique_models.add(model['Name'])

# Convert the set to a list and sort it
unique_models = sorted(list(unique_models))

# Add 'ID' at the beginning of the list for the header
header = ['ID'] + unique_models

# Prepare data for CSV
csv_data = []
for item in json_strings:
    row = {model: '' for model in unique_models}  # Initialize all model rankings as empty
    row['ID'] = item['index']  # Use the index from the original data
    for model in item["response"]["Model"]:
        row[model['Name']] = model.get('Ranking', '')
    csv_data.append(row)

# Write to CSV
csv_file = 'model_rankings.csv'
with open(csv_file, 'w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=header)
    writer.writeheader()
    for row in csv_data:
        writer.writerow(row)

print(f"Data written to {csv_file}")


Data written to model_rankings.csv
