In [1]:
from dotenv import load_dotenv
import os
from src.agents.api_agent import LocalApiAgent, OpenAIApiAgent

load_dotenv()
mixtral = LocalApiAgent("http://localhost:8881/v1/", "ollama", "mixtral")
llama3_70 = LocalApiAgent("http://localhost:8881/v1/", "ollama", "llama3:70b")
llama2_70 = LocalApiAgent("http://localhost:8881/v1/", "ollama", "llama2:70b")
llama3_8 = LocalApiAgent("http://localhost:8881/v1/", "ollama", "llama3:8b")
gpt3 = OpenAIApiAgent("https://api.openai.com/v1", os.getenv("OPENAI_API_KEY"), "gpt-3.5-turbo-0125")
gpt4 = OpenAIApiAgent("https://api.openai.com/v1", os.getenv("OPENAI_API_KEY"), "gpt-4-turbo")

In [2]:
import pandas as pd

df_path = '/home/rinaen/PycharmProjects/communication_agent/testing/eval_chunks.csv'
df = pd.read_csv(df_path)

In [3]:
from src.data_acquisition.content_processing.content_parsing import get_parsed_content_by_function_call
from src.agents.api_agent import ApiAgent


def get_classified(model: ApiAgent, data_df):
    classified_types = []
    parsed_chunks = []
    for index, row in data_df.iterrows():
        url = row['url']
        chunk = row['chunk']
        try:
            typ, parsed = get_parsed_content_by_function_call(model, url, chunk)
            classified_types.append(typ)
            parsed_chunks.append(parsed)
        except Exception:
            classified_types.append(None)
            parsed_chunks.append(None)
    return classified_types, parsed_chunks

In [4]:
mixtral_t, mixtral_p = get_classified(mixtral, df)

In [5]:
llama3_70_t, llama3_70_p = get_classified(llama3_70, df)

In [6]:
llama2_70_t, llama2_70_p = get_classified(llama2_70, df)

In [7]:
llama3_8_t, llama3_8_p = get_classified(llama3_8, df)

In [8]:
gpt3_t, gpt3_p = get_classified(gpt3, df)

In [9]:
gpt4_t, gpt4_p = get_classified(gpt4, df)

In [10]:
df['mixtral'] = mixtral_t
df['llama3_70'] = llama3_70_t
df['llama2_70'] = llama2_70_t
df['llama3_8'] = llama3_8_t
df['gpt3'] = gpt3_t
df['gpt4'] = gpt4_t

df['mixtral_parsed'] = mixtral_p
df['llama3_70_parsed'] = llama3_70_p
df['llama2_70_parsed'] = llama2_70_p
df['llama3_8_parsed'] = llama3_8_p
df['gpt3_parsed'] = gpt3_p
df['gpt4_parsed'] = gpt4_p

df.to_csv('divided-function-call.csv')

In [11]:
df_path = '/home/rinaen/PycharmProjects/communication_agent/testing/divided-function-call.csv'
df = pd.read_csv(df_path)

In [12]:
df['mixtral_same'] = df['mixtral'] == df['type']
df['llama3_70_same'] = df['llama3_70'] == df['type']
df['llama2_70_same'] = df['llama2_70'] == df['type']
df['llama3_8_same'] = df['llama3_8'] == df['type']
df['gpt3_same'] = df['gpt3'] == df['type']
df['gpt4_same'] = df['gpt4'] == df['type']

In [13]:
mixtral_same = df['mixtral_same'].sum()
llama3_70_same = df['llama3_70_same'].sum()
llama2_70_same = df['llama2_70_same'].sum()
llama3_8_same = df['llama3_8_same'].sum()
gpt3_same = df['gpt3_same'].sum()
gpt4_same = df['gpt4_same'].sum()

In [14]:
mixtral_none = df['mixtral'].isna().sum()
llama3_70_none = df['llama3_70'].isna().sum()
llama2_70_none = df['llama2_70'].isna().sum()
llama3_8_none = df['llama3_8'].isna().sum()
gpt3_none = df['gpt3'].isna().sum()
gpt4_none = df['gpt4'].isna().sum()

In [15]:
place_df = df[df['type'] == 'place']
mixtral_same_place = place_df['mixtral_same'].sum()
llama3_70_same_place = place_df['llama3_70_same'].sum()
llama2_70_same_place = place_df['llama2_70_same'].sum()
llama3_8_same_place = place_df['llama3_8_same'].sum()
gpt3_same_place = place_df['gpt3_same'].sum()
gpt4_same_place = place_df['gpt4_same'].sum()

In [16]:
static_df = df[df['type'] == 'static']
mixtral_same_static = static_df['mixtral_same'].sum()
llama3_70_same_static = static_df['llama3_70_same'].sum()
llama2_70_same_static = static_df['llama2_70_same'].sum()
llama3_8_same_static = static_df['llama3_8_same'].sum()
gpt3_same_static = static_df['gpt3_same'].sum()
gpt4_same_static = static_df['gpt4_same'].sum()

In [17]:
administration_df = df[df['type'] == 'administration']
mixtral_same_administration = administration_df['mixtral_same'].sum()
llama3_70_same_administration = administration_df['llama3_70_same'].sum()
llama2_70_same_administration = administration_df['llama2_70_same'].sum()
llama3_8_same_administration = administration_df['llama3_8_same'].sum()
gpt3_same_administration = administration_df['gpt3_same'].sum()
gpt4_same_administration = administration_df['gpt4_same'].sum()

In [18]:
event_df = df[df['type'] == 'event']
mixtral_same_event = event_df['mixtral_same'].sum()
llama3_70_same_event = event_df['llama3_70_same'].sum()
llama2_70_same_event = event_df['llama2_70_same'].sum()
llama3_8_same_event = event_df['llama3_8_same'].sum()
gpt3_same_event = event_df['gpt3_same'].sum()
gpt4_same_event = event_df['gpt4_same'].sum()

In [19]:
mixtral_same = str(mixtral_same) + ' %'
llama3_70_same = str(llama3_70_same) + ' %'
llama2_70_same = str(llama2_70_same) + ' %'
llama3_8_same = str(llama3_8_same) + ' %'
gpt3_same = str(gpt3_same) + ' %'
gpt4_same = str(gpt4_same) + ' %'

mixtral_none = str(mixtral_none) + ' %'
llama3_70_none = str(llama3_70_none) + ' %'
llama2_70_none = str(llama2_70_none) + ' %'
llama3_8_none = str(llama3_8_none) + ' %'
gpt3_none = str(gpt3_none) + ' %'
gpt4_none = str(gpt4_none) + ' %'

mixtral_same_place = str(mixtral_same_place / len(place_df) * 100) + ' %'
llama3_70_same_place = str(llama3_70_same_place / len(place_df) * 100) + ' %'
llama2_70_same_place = str(llama2_70_same_place / len(place_df) * 100) + ' %'
llama3_8_same_place = str(llama3_8_same_place / len(place_df) * 100) + ' %'
gpt3_same_place = str(gpt3_same_place / len(place_df) * 100) + ' %'
gpt4_same_place = str(gpt4_same_place / len(place_df) * 100) + ' %'

mixtral_same_static = str(mixtral_same_static / len(static_df) * 100) + ' %'
llama3_70_same_static = str(llama3_70_same_static / len(static_df) * 100) + ' %'
llama2_70_same_static = str(llama2_70_same_static / len(static_df) * 100) + ' %'
llama3_8_same_static = str(llama3_8_same_static / len(static_df) * 100) + ' %'
gpt3_same_static = str(gpt3_same_static / len(static_df) * 100) + ' %'
gpt4_same_static = str(gpt4_same_static / len(static_df) * 100) + ' %'

mixtral_same_administration = str(mixtral_same_administration / len(administration_df) * 100) + ' %'
llama3_70_same_administration = str(llama3_70_same_administration / len(administration_df) * 100) + ' %'
llama2_70_same_administration = str(llama2_70_same_administration / len(administration_df) * 100) + ' %'
llama3_8_same_administration = str(llama3_8_same_administration / len(administration_df) * 100) + ' %'
gpt3_same_administration = str(gpt3_same_administration / len(administration_df) * 100) + ' %'
gpt4_same_administration = str(gpt4_same_administration / len(administration_df) * 100) + ' %'

mixtral_same_event = str(mixtral_same_event / len(event_df) * 100) + ' %'
llama3_70_same_event = str(llama3_70_same_event / len(event_df) * 100) + ' %'
llama2_70_same_event = str(llama2_70_same_event / len(event_df) * 100) + ' %'
llama3_8_same_event = str(llama3_8_same_event / len(event_df) * 100) + ' %'
gpt3_same_event = str(gpt3_same_event / len(event_df) * 100) + ' %'
gpt4_same_event = str(gpt4_same_event / len(event_df) * 100) + ' %'

In [27]:
eval_df = pd.DataFrame({
    'Model': ['mixtral', 'llama3_70b', 'llama2_70b', 'llama3_8b', 'gpt-3.5', 'gpt-4'],
    'Celkem správně': [mixtral_same, llama3_70_same, llama2_70_same, llama3_8_same, gpt3_same, gpt4_same],
    'Špatný formát': [mixtral_none, llama3_70_none, llama2_70_none, llama3_8_none, gpt3_none, gpt4_none],
    'place': [mixtral_same_place, llama3_70_same_place, llama2_70_same_place, llama3_8_same_place, gpt3_same_place,
              gpt4_same_place],
    'static': [mixtral_same_static, llama3_70_same_static, llama2_70_same_static, llama3_8_same_static,
               gpt3_same_static, gpt4_same_static],
    'administration': [mixtral_same_administration, llama3_70_same_administration, llama2_70_same_administration,
                       llama3_8_same_administration, gpt3_same_administration, gpt4_same_administration],
    'event': [mixtral_same_event, llama3_70_same_event, llama2_70_same_event, llama3_8_same_event, gpt3_same_event,
              gpt4_same_event],
})

eval_df.to_csv('eval_function_call_cls.csv')

In [21]:
display(eval_df)

In [22]:
mixtral_ok_p = str(df['mixtral_parsed'].notna().sum()) + ' %'
llama3_70_ok_p = str(df['llama3_70_parsed'].notna().sum()) + ' %'
llama2_70_ok_p = str(df['llama2_70_parsed'].notna().sum()) + ' %'
llama3_8_ok_p = str(df['llama3_8_parsed'].notna().sum()) + ' %'
gpt3_ok_p = str(df['gpt3_parsed'].notna().sum()) + ' %'
gpt4_ok_p = str(df['gpt4_parsed'].notna().sum()) + ' %'

In [23]:
mixtral_ok_p_place = str(df[df['type'] == 'place']['mixtral_parsed'].notna().sum() / len(event_df) * 100) + ' %'
llama3_70_ok_p_place = str(df[df['type'] == 'place']['llama3_70_parsed'].notna().sum() / len(event_df) * 100) + ' %'
llama2_70_ok_p_place = str(df[df['type'] == 'place']['llama2_70_parsed'].notna().sum() / len(event_df) * 100) + ' %'
llama3_8_ok_p_place = str(df[df['type'] == 'place']['llama3_8_parsed'].notna().sum() / len(event_df) * 100) + ' %'
gpt3_ok_p_place = str(df[df['type'] == 'place']['gpt3_parsed'].notna().sum() / len(event_df) * 100) + ' %'
gpt4_ok_p_place = str(df[df['type'] == 'place']['gpt4_parsed'].notna().sum() / len(event_df) * 100) + ' %'

mixtral_ok_p_static = str(df[df['type'] == 'static']['mixtral_parsed'].notna().sum() / len(event_df) * 100) + ' %'
llama3_70_ok_p_static = str(df[df['type'] == 'static']['llama3_70_parsed'].notna().sum() / len(event_df) * 100) + ' %'
llama2_70_ok_p_static = str(df[df['type'] == 'static']['llama2_70_parsed'].notna().sum() / len(event_df) * 100) + ' %'
llama3_8_ok_p_static = str(df[df['type'] == 'static']['llama3_8_parsed'].notna().sum() / len(event_df) * 100) + ' %'
gpt3_ok_p_static = str(df[df['type'] == 'static']['gpt3_parsed'].notna().sum() / len(event_df) * 100) + ' %'
gpt4_ok_p_static = str(df[df['type'] == 'static']['gpt4_parsed'].notna().sum() / len(event_df) * 100) + ' %'

mixtral_ok_p_administration = str(
    df[df['type'] == 'administration']['mixtral_parsed'].notna().sum() / len(event_df) * 100) + ' %'
llama3_70_ok_p_administration = str(
    df[df['type'] == 'administration']['llama3_70_parsed'].notna().sum() / len(event_df) * 100) + ' %'
llama2_70_ok_p_administration = str(
    df[df['type'] == 'administration']['llama2_70_parsed'].notna().sum() / len(event_df) * 100) + ' %'
llama3_8_ok_p_administration = str(
    df[df['type'] == 'administration']['llama3_8_parsed'].notna().sum() / len(event_df) * 100) + ' %'
gpt3_ok_p_administration = str(
    df[df['type'] == 'administration']['gpt3_parsed'].notna().sum() / len(event_df) * 100) + ' %'
gpt4_ok_p_administration = str(
    df[df['type'] == 'administration']['gpt4_parsed'].notna().sum() / len(event_df) * 100) + ' %'

mixtral_ok_p_event = str(df[df['type'] == 'event']['mixtral_parsed'].notna().sum() / len(event_df) * 100) + ' %'
llama3_70_ok_p_event = str(df[df['type'] == 'event']['llama3_70_parsed'].notna().sum() / len(event_df) * 100) + ' %'
llama2_70_ok_p_event = str(df[df['type'] == 'event']['llama2_70_parsed'].notna().sum() / len(event_df) * 100) + ' %'
llama3_8_ok_p_event = str(df[df['type'] == 'event']['llama3_8_parsed'].notna().sum() / len(event_df) * 100) + ' %'
gpt3_ok_p_event = str(df[df['type'] == 'event']['gpt3_parsed'].notna().sum() / len(event_df) * 100) + ' %'
gpt4_ok_p_event = str(df[df['type'] == 'event']['gpt4_parsed'].notna().sum() / len(event_df) * 100) + ' %'


In [32]:
mixtral_time = '29m 05s'
llama3_70_time = '116m 32s'
llama2_70_time = '45m 49s'
llama3_8_time = '105m 18s'
gpt3_time = '6m 02s'
gpt4_time = '11m 18s'

In [33]:
# eval parsing
eval_df = pd.DataFrame({
    'Model': ['mixtral', 'llama3_70b', 'llama2_70b', 'llama3_8b', 'gpt-3.5', 'gpt-4'],
    'Správný formát': [mixtral_ok_p, llama3_70_ok_p, llama2_70_ok_p, llama3_8_ok_p, gpt3_ok_p, gpt4_ok_p],
    'place': [mixtral_ok_p_place, llama3_70_ok_p_place, llama2_70_ok_p_place, llama3_8_ok_p_place, gpt3_ok_p_place,
              gpt4_ok_p_place],
    'static': [mixtral_ok_p_static, llama3_70_ok_p_static, llama2_70_ok_p_static, llama3_8_ok_p_static,
               gpt3_ok_p_static, gpt4_ok_p_static],
    'administration': [mixtral_ok_p_administration, llama3_70_ok_p_administration, llama2_70_ok_p_administration,
                       llama3_8_ok_p_administration, gpt3_ok_p_administration, gpt4_ok_p_administration],
    'event': [mixtral_ok_p_event, llama3_70_ok_p_event, llama2_70_ok_p_event, llama3_8_ok_p_event, gpt3_ok_p_event,
              gpt4_ok_p_event],
    'Čas': [mixtral_time, llama3_70_time, llama2_70_time, llama3_8_time, gpt3_time, gpt4_time]
})

eval_df.to_csv('eval_function_call_par.csv')

In [34]:
display(eval_df)