In [1]:
from fastapi import FastAPI
from bs4 import BeautifulSoup
import requests
import uvicorn
import asyncio
import json
import pandas as pd
import numpy as np
from io import StringIO  
from typing import List, Dict
from fastapi.middleware.cors import CORSMiddleware
from functools import lru_cache#

In [2]:
app = FastAPI()

In [3]:
origins = [
    "http://localhost:5173",  # Frontend running locally
]

In [4]:
app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,  # Allow these origins
    allow_credentials=True,  # Allow cookies and credentials
    allow_methods=["*"],  # Allow all HTTP methods (GET, POST, etc.)
    allow_headers=["*"],  # Allow all headers
)

In [5]:
@app.get("/")
async def test():
    return {"message": "Hello World"}

In [6]:
@lru_cache(maxsize=1000)
def parse_trustbit_open_leaderboard_data():
    base_url = 'https://www.trustbit.tech'
    html = requests.get(f'{base_url}/en/llm-benchmarks').text
    soup = BeautifulSoup(html, 'html.parser')
    
    links = [a['href'] for a in soup.find_all('a', href=True) if a['href'].startswith('/en/llm-leaderboard')]
    u_links = list(set(links))
    urls = [base_url + uri for uri in links]
    data = []
    for url in urls:
        response = requests.get(url)
    
        benchmark_html = response.text
    
        soup = BeautifulSoup(benchmark_html, 'html.parser')
        html_table = soup.find('table', class_='custom-model-table')
        if html_table == None: continue
        dfs = pd.read_html(StringIO(str(html_table)))
        # dfs is a list of DataFrames, one for each table found in the HTML
        if dfs:
            df = dfs[0]  # Assuming you want the first table

        title = url.split("-")[-2:]
        df.replace([np.nan, float('inf'), float('-inf')], None, inplace=True)
        df['leaderboard'] = "-".join(title)

        # Print the DataFrame
        data.append(df.to_dict(orient='records'))

    return {"data": data}

In [7]:
@app.get("/llm_benchmarks/trustbit")
async def get_llm_benchmark_data_from_trustbit():
    return parse_trustbit_open_leaderboard_data()

In [8]:
result = parse_trustbit_open_leaderboard_data()
for d in result["data"]:
    print(d)

[{'model': None, 'code': None, 'crm': None, 'docs': None, 'integrate': None, 'marketing': None, 'reason': None, 'final 🏆': None, 'Cost': None, 'Speed': None, 'leaderboard': 'juli-2024'}, {'model': 'GPT-4o ☁️', 'code': 90.0, 'crm': 95.0, 'docs': 100.0, 'integrate': 90.0, 'marketing': 82.0, 'reason': 75.0, 'final 🏆': 89.0, 'Cost': '1.21 €', 'Speed': '1.50 rps', 'leaderboard': 'juli-2024'}, {'model': 'GPT-4 Turbo v5/2024-04-09 ☁️', 'code': 86.0, 'crm': 99.0, 'docs': 98.0, 'integrate': 93.0, 'marketing': 88.0, 'reason': 45.0, 'final 🏆': 85.0, 'Cost': '2.45 €', 'Speed': '0.84 rps', 'leaderboard': 'juli-2024'}, {'model': 'Google Gemini Pro 1.5 0801 ☁️', 'code': 84.0, 'crm': 92.0, 'docs': 90.0, 'integrate': 100.0, 'marketing': 70.0, 'reason': 72.0, 'final 🏆': 85.0, 'Cost': '1.48 €', 'Speed': '0.83 rps', 'leaderboard': 'juli-2024'}, {'model': 'GPT-4 v1/0314 ☁️', 'code': 90.0, 'crm': 88.0, 'docs': 98.0, 'integrate': 52.0, 'marketing': 88.0, 'reason': 50.0, 'final 🏆': 78.0, 'Cost': '7.04 €', 'Sp

In [9]:
if __name__ == "__main__":
    config = uvicorn.Config(app)
    server = uvicorn.Server(config)
    loop = asyncio.get_running_loop()
    loop.create_task(server.serve())

INFO:     Started server process [67804]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


INFO:     127.0.0.1:62937 - "GET /llm_benchmarks/trustbit HTTP/1.1" 200 OK
