In [None]:
from concurrent.futures import ThreadPoolExecutor
import pandas as pd
import numpy as np
from langchain_ollama import OllamaLLM
from sklearn.metrics import classification_report
from sklearn.metrics import matthews_corrcoef
from tqdm import tqdm

llm = OllamaLLM(model="deepseek_detector")

In [None]:
test_df = pd.read_csv("test_10k.csv")

In [None]:
# Funkcia na predikciu pre jeden text
def predict_single(text):
    # Volanie LLM API (predpokladáme, že vráti 0 alebo 1)
    return llm.invoke(text)

# Paralelné spracovanie pomocou vlákien
def parallel_prediction(data):
    predictions = []
    with ThreadPoolExecutor(max_workers=8) as executor:  # Nastavenie počtu vlákien
        # Použitie tqdm pre progress bar
        for result in tqdm(executor.map(predict_single, data), total=len(data), desc="Processing Predictions"):
            predictions.append(result)
    return predictions

if __name__ == '__main__':
    # Načítanie dát
    test_df = pd.read_csv("test_10k.csv")

    # Paralelné predikcie s progress barom
    test_df['predicted'] = parallel_prediction(test_df['data'].tolist())
    
    # Overenie výsledku
    print(test_df)
    test_df.to_csv('test_deepseek.csv', index=False)

In [None]:
test_df=pd.read_csv("test_deepseek.csv")

In [None]:
test_df= test_df.rename(columns={'predicted': 'skala1234'})
test_df

In [None]:
import re

def extract_number(response):
    match = re.match(r"(\d+)", response)  # Hľadá číslo na začiatku textu
    if match:
        return int(match.group(1))  # Vráti extrahované číslo ako integer
    return None  # Ak nie je číslo, vráti None

# Aplikovanie funkcie na stĺpec 'skala1234' a nahradenie hodnôt
test_df['skala1234'] = test_df['skala1234'].apply(extract_number)

# Výpis výsledku
print(test_df)

In [None]:
test_df['predicted'] = np.where(test_df['skala1234'].isin([1, 2]), 0, 1)
test_df

In [None]:
test_df.to_csv('test_deepseek_predicted.csv', index=False)