# Execution of Orca2 with Ollama to Entity Resolution of Abt-Buy

## Imports and Parameters

In [1]:
from datetime import datetime
from ollama import chat
from ollama import ChatResponse
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd

In [2]:
model_name = 'orca2-em'
dataset_file = 'test'

## Load Files

In [3]:
tableA = pd.read_csv('./abt-buy/tableA.csv', index_col=['id'])
tableB = pd.read_csv('./abt-buy/tableB.csv', index_col=['id'])
products_match = pd.read_csv(f'./abt-buy/{dataset_file}.csv')

In [4]:
tableA.head(2)

Unnamed: 0_level_0,name,description,price
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,sony turntable pslx350h,sony turntable pslx350h belt drive system 33-1...,
1,bose acoustimass 5 series iii speaker system a...,bose acoustimass 5 series iii speaker system a...,399.0


In [5]:
tableB.head(2)

Unnamed: 0_level_0,name,description,price
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,linksys etherfast ezxs88w ethernet switch ezxs88w,linksys etherfast 8-port 10/100 switch ( new/w...,
1,linksys etherfast ezxs55w ethernet switch,5 x 10/100base-tx lan,


In [6]:
products_match.head(2)

Unnamed: 0,tableA,tableB,label
0,445,910,0
1,719,812,0


## Create Testset

In [7]:
dataset = pd.DataFrame(columns=['tableA', 'tableB', 'label', 'prediction'])
dataset

Unnamed: 0,tableA,tableB,label,prediction


In [None]:
column_tableA = []
column_tableB = []
column_label = []

for _, row in tqdm(products_match.iterrows(), total=len(products_match), desc='rows'):
    product_tableA_row = tableA.loc[row['tableA']]
    product_tableA = f"{product_tableA_row['name']} - {product_tableA_row['description']}"

    product_tableB_row = tableB.loc[row['tableB']]
    product_tableB = f"{product_tableB_row['name']} - {product_tableB_row['description']}"

    column_tableA.append(product_tableA)
    column_tableB.append(product_tableB)
    column_label.append(row['label'])

dataset['tableA'] = column_tableA
dataset['tableB'] = column_tableB
dataset['label'] = column_label

In [9]:
dataset.head(2)

Unnamed: 0,tableA,tableB,label,prediction
0,sony pink cyber-shot 7.2 megapixel digital cam...,olympus fe-360 digital camera pink 226540 - 8 ...,0,
1,lg 2.0 cu . ft. over-the-range white microwave...,maytag 2.0 cu . ft. over-the-range microwave o...,0,


## Model Execution

In [10]:
metrics = {
    'all': {
        'precision': [],
        'recall': [],
        'f1-score': [],
    },
    'negative': {
        'precision': [],
        'recall': [],
        'f1-score': [],
    },
    'positive': {
        'precision': [],
        'recall': [],
        'f1-score': [],
    },
    'runtime': [],
}

In [11]:
executions_quantity = 10

In [None]:
for i in tqdm(range(executions_quantity), total=executions_quantity, desc='executions'):
    responses_column = []

    start_time = datetime.now().timestamp()

    for _, row in tqdm(dataset.iterrows(), total=len(products_match), desc='rows'):
        product_tableA = row['tableA']
        product_tableB = row['tableB']

        content = f'record 1: {product_tableA} record 2: {product_tableB}'

        response: ChatResponse = chat(model=model_name, messages=[
            {
                'role': 'user',
                'content': content,
            },
        ])

        responses_column.append(response.message.content)

    end_time = datetime.now().timestamp()

    prediction_value = {
        'False': 0,
        'True': 1,
    }

    predictions = []

    errors = 0

    for row in responses_column:
        row = row.replace('. system', '').replace('.', '').replace('system', '')
        try:
            predictions.append(prediction_value[row])
        except KeyError:
            # print(row)
            errors += 1
            predictions.append(0)

    # print('errors:', errors)

    # for i in range(10):
    #     print(responses_column[i])

    dataset['prediction'] = predictions

    metrics['all']['precision'].append(precision_score(dataset['label'], dataset['prediction'], average='weighted'))
    metrics['all']['recall'].append(recall_score(dataset['label'], dataset['prediction'], average='weighted'))
    metrics['all']['f1-score'].append(f1_score(dataset['label'], dataset['prediction'], average='weighted'))

    metrics['negative']['precision'].append(precision_score(dataset['label'], dataset['prediction'], pos_label=0))
    metrics['negative']['recall'].append(recall_score(dataset['label'], dataset['prediction'], pos_label=0))
    metrics['negative']['f1-score'].append(f1_score(dataset['label'], dataset['prediction'], pos_label=0))

    metrics['positive']['precision'].append(precision_score(dataset['label'], dataset['prediction']))
    metrics['positive']['recall'].append(recall_score(dataset['label'], dataset['prediction']))
    metrics['positive']['f1-score'].append(f1_score(dataset['label'], dataset['prediction']))

    metrics['runtime'].append(end_time-start_time)

## Average for All Executions

In [13]:
print(f"Average Results for 10 executions:")
print(f"\n   Negative:")
print(f"      Precision: {np.mean(metrics['negative']['precision'])*100:.2f} %")
print(f"      Recall: {np.mean(metrics['negative']['recall'])*100:.2f} %")
print(f"      F1-Score: {np.mean(metrics['negative']['f1-score'])*100:.2f} %")
print(f"\n   Positive:")
print(f"      Precision: {np.mean(metrics['positive']['precision'])*100:.2f} %")
print(f"      Recall: {np.mean(metrics['positive']['recall'])*100:.2f} %")
print(f"      F1-Score: {np.mean(metrics['positive']['f1-score'])*100:.2f} %")
print(f"\n   All Labels:")
print(f"      Precision: {np.mean(metrics['all']['precision'])*100:.2f} %")
print(f"      Recall: {np.mean(metrics['all']['recall'])*100:.2f} %")
print(f"      F1-Score: {np.mean(metrics['all']['f1-score'])*100:.2f} %")
print(f'\n   Runtime: {np.mean(metrics["runtime"])/60:.1f} minutes')

Average Results for 10 executions:

   Negative:
      Precision: 93.77 %
      Recall: 97.78 %
      F1-Score: 95.73 %

   Positive:
      Precision: 71.43 %
      Recall: 46.12 %
      F1-Score: 56.05 %

   All Labels:
      Precision: 91.37 %
      Recall: 92.22 %
      F1-Score: 91.47 %

   Runtime: 17.0 minutes
