In [12]:
import pandas as pd
import subprocess
import ollama

In [6]:
def load_data(file_path, delimiter=','):
    data = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            try:
                data.append(line.strip().split(delimiter))
            except Exception as e:
                print(f"Error processing line: {line}. Error: {e}")
    return pd.DataFrame(data)

In [3]:
def create_prompt(text_a, text_b):
    return f"{text_a} [MASK] {text_b}"

In [27]:
import ollama

def get_ollama_predictions(data):
    predictions = []
    for i, prompt in enumerate(data['prompt']):
        try:
            response = ollama.chat(model='llama3', messages=[
                {'role': 'user', 'content': prompt},
            ])
            predictions.append(response['message']['content'])
        except Exception as e:
            predictions.append(str(e))
        
        # Print progress every 100 steps
        if i % 100 == 0:
            print(f"Processed {i} prompts")
    return predictions

In [7]:
dev_data = load_data('dev-0/in_1.csv', delimiter=',')
dev_labels = load_data('dev-0/expected.tsv', delimiter='\t')
test_data = load_data('test-A/in_1.csv', delimiter=',')

In [8]:
dev_data['combined'] = dev_data.iloc[:, 0] + ' [MASK] ' + dev_data.iloc[:, 1]
test_data['combined'] = test_data.iloc[:, 0] + ' [MASK] ' + test_data.iloc[:, 1]

In [9]:
dev_data['prompt'] = dev_data.apply(lambda row: create_prompt(row.iloc[0], row.iloc[1]), axis=1)

In [10]:
test_data['prompt'] = test_data.apply(lambda row: create_prompt(row.iloc[0], row.iloc[1]), axis=1)

In [28]:
dev_data['prediction'] = get_ollama_predictions(dev_data)

Processed 0 prompts
Processed 100 prompts
Processed 200 prompts
Processed 300 prompts
Processed 400 prompts
Processed 500 prompts
Processed 600 prompts
Processed 700 prompts
Processed 800 prompts
Processed 900 prompts
Processed 1000 prompts
Processed 1100 prompts
Processed 1200 prompts
Processed 1300 prompts
Processed 1400 prompts
Processed 1500 prompts
Processed 1600 prompts
Processed 1700 prompts
Processed 1800 prompts
Processed 1900 prompts
Processed 2000 prompts
Processed 2100 prompts
Processed 2200 prompts
Processed 2300 prompts
Processed 2400 prompts
Processed 2500 prompts
Processed 2600 prompts
Processed 2700 prompts
Processed 2800 prompts
Processed 2900 prompts
Processed 3000 prompts
Processed 3100 prompts
Processed 3200 prompts
Processed 3300 prompts
Processed 3400 prompts
Processed 3500 prompts
Processed 3600 prompts
Processed 3700 prompts
Processed 3800 prompts
Processed 3900 prompts
Processed 4000 prompts
Processed 4100 prompts
Processed 4200 prompts
Processed 4300 prompts


In [21]:
test_data['prediction'] = get_ollama_predictions(test_data)

Processed 0 prompts
Processed 100 prompts
Processed 200 prompts
Processed 300 prompts
Processed 400 prompts
Processed 500 prompts
Processed 600 prompts
Processed 700 prompts
Processed 800 prompts
Processed 900 prompts
Processed 1000 prompts
Processed 1100 prompts
Processed 1200 prompts
Processed 1300 prompts
Processed 1400 prompts
Processed 1500 prompts
Processed 1600 prompts
Processed 1700 prompts
Processed 1800 prompts
Processed 1900 prompts
Processed 2000 prompts
Processed 2100 prompts
Processed 2200 prompts
Processed 2300 prompts
Processed 2400 prompts
Processed 2500 prompts
Processed 2600 prompts
Processed 2700 prompts
Processed 2800 prompts
Processed 2900 prompts
Processed 3000 prompts
Processed 3100 prompts
Processed 3200 prompts
Processed 3300 prompts
Processed 3400 prompts
Processed 3500 prompts
Processed 3600 prompts
Processed 3700 prompts
Processed 3800 prompts
Processed 3900 prompts
Processed 4000 prompts
Processed 4100 prompts
Processed 4200 prompts
Processed 4300 prompts


In [29]:
dev_data[['prediction']].to_csv('dev-0/out.tsv', sep='\t', index=False, header=False)
test_data[['prediction']].to_csv('test-A/out.tsv', sep='\t', index=False, header=False)
