# Fixing question formatting

In [1]:
from datasets import load_dataset

uzlib = load_dataset('murodbek/uzlib', revision="67e185284f69232033591a31a1dc6d8e13d5bf6b")
uzlib

DatasetDict({
    correct_word: Dataset({
        features: ['question', 'option_a', 'option_b', 'option_c', 'option_d', 'answer', 'type'],
        num_rows: 1501
    })
    meaning: Dataset({
        features: ['question', 'option_a', 'option_b', 'option_c', 'option_d', 'answer', 'type'],
        num_rows: 236
    })
    meaning_in_context: Dataset({
        features: ['question', 'option_a', 'option_b', 'option_c', 'option_d', 'answer', 'type'],
        num_rows: 72
    })
    fill_in: Dataset({
        features: ['question', 'option_a', 'option_b', 'option_c', 'option_d', 'answer', 'type'],
        num_rows: 52
    })
})

In [2]:
uzlib.set_format('pandas')
df_correct_word = uzlib['correct_word'][:]
df_meaning = uzlib['meaning'][:]
df_meaning_in_context = uzlib['meaning_in_context'][:]
df_fill_in = uzlib['fill_in'][:]

In [3]:
CORRECT_WORD_PROMPT = "Berilgan variantlar orasida qaysi biri to‘g‘ri yozilgan?"
MEANING_PROMPT = "{question} jumlasining ma'nosi quyidagi variantlarning qaysi birida to'g'ri ko'rsatilgan?"
MEANING_IN_CONTEXT_PROMPT = "{context}\n\nBerilgan matnda ishlatilgan {word} so'zining kontekstdagi ma'nosini aniqlang."
FILL_IN_PROMPT = "{question}\n\nQuyidagi matnda <mask> oʻrniga qaysi variant mos keladi?"

def prepare_correct_word(question):
    return CORRECT_WORD_PROMPT

def prepare_meaning(question):
    return MEANING_PROMPT.format(question=question)

def prepare_meaning_in_context(question):
    context, word = question.replace('\n \n', '\n\n').split('\n\n')
    word = word.replace('— bu...', '').strip()

    return MEANING_IN_CONTEXT_PROMPT.format(context=context, word=word)

def prepare_fill_in(question):
    return FILL_IN_PROMPT.format(question=question)


df_correct_word['question'] = df_correct_word['question'].apply(prepare_correct_word)
df_meaning['question'] = df_meaning['question'].apply(prepare_meaning)
df_meaning_in_context['question'] = df_meaning_in_context['question'].apply(prepare_meaning_in_context)
df_fill_in['question'] = df_fill_in['question'].apply(prepare_fill_in)

In [7]:
def add_id_column(df, special_id):
    df['id'] = [f"{special_id}{ids:04d}" for ids in range(len(df))]
    df = df[['id', 'question', 'option_a', 'option_b', 'option_c', 'option_d', 'answer', 'type']]
    return df

df_correct_word = add_id_column(df_correct_word, 'CW')
df_meaning = add_id_column(df_meaning, 'MN')
df_meaning_in_context = add_id_column(df_meaning_in_context, 'MC')
df_fill_in = add_id_column(df_fill_in, 'FI')

In [8]:
from datasets import Dataset, DatasetDict

uzlib_new = DatasetDict({
    'correct_word': Dataset.from_pandas(df_correct_word),
    'meaning': Dataset.from_pandas(df_meaning),
    'meaning_in_context': Dataset.from_pandas(df_meaning_in_context),
    'fill_in': Dataset.from_pandas(df_fill_in),
})
uzlib_new

DatasetDict({
    correct_word: Dataset({
        features: ['id', 'question', 'option_a', 'option_b', 'option_c', 'option_d', 'answer', 'type'],
        num_rows: 1501
    })
    meaning: Dataset({
        features: ['id', 'question', 'option_a', 'option_b', 'option_c', 'option_d', 'answer', 'type'],
        num_rows: 236
    })
    meaning_in_context: Dataset({
        features: ['id', 'question', 'option_a', 'option_b', 'option_c', 'option_d', 'answer', 'type'],
        num_rows: 72
    })
    fill_in: Dataset({
        features: ['id', 'question', 'option_a', 'option_b', 'option_c', 'option_d', 'answer', 'type'],
        num_rows: 52
    })
})

In [10]:
uzlib_new.push_to_hub('murodbek/uzlib', commit_message='fixing question formatting')

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/murodbek/uzlib/commit/025e6337d1fc0a487a07ab4fad5074df91337842', commit_message='fixing question formatting', commit_description='', oid='025e6337d1fc0a487a07ab4fad5074df91337842', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/murodbek/uzlib', endpoint='https://huggingface.co', repo_type='dataset', repo_id='murodbek/uzlib'), pr_revision=None, pr_num=None)

# starting testing gemini

In [17]:
from datasets import load_dataset

uzlib = load_dataset('murodbek/uzlib', split='all')
df_original = uzlib.to_pandas()
df_original

Unnamed: 0,id,question,option_a,option_b,option_c,option_d,answer,type
0,CW0000,Berilgan variantlar orasida qaysi biri to‘g‘ri...,-dan qatʼi nazar,-dan qati nazar,-dan qatiy nazar,-dan qatʼiy nazar,A,correct_word
1,CW0001,Berilgan variantlar orasida qaysi biri to‘g‘ri...,“bola huquqlari toʻgʻrisida”gi Konvensiya,“Bola huquqlari toʻgʻrisida”gi Konvensiya,“Bola huquqlari” toʻgʻrisidagi konvensiya,Bola huquqlari toʻgʻrisidagi konvensiya,D,correct_word
2,CW0002,Berilgan variantlar orasida qaysi biri to‘g‘ri...,“Gurs” yetib yiqildi.,Gurs yetib yiqildi.,Gurs etib yiqildi.,“Gurs” etib yiqildi.,C,correct_word
3,CW0003,Berilgan variantlar orasida qaysi biri to‘g‘ri...,“Hozir uvulavoraman!” dedi boʻri.,“Hozir uvillavoraman!” dedi boʻri.,“Hozir uvullavoraman!” dedi boʻri.,“Hozir uvilavoraman!” dedi boʻri.,C,correct_word
4,CW0004,Berilgan variantlar orasida qaysi biri to‘g‘ri...,“Iliada” dostoni,“Illiada” dostoni,“Iliyada” dostoni,“Illiyada” dostoni,A,correct_word
...,...,...,...,...,...,...,...,...
1856,FI0047,U ogʻir vazifani uddalashga <mask> boʻldi.\n\n...,muvaffaq,muvofaq,muvoffiq,muvofiq,A,fill_in
1857,FI0048,U Qur’onni <mask> qildi.\n\nQuyidagi matnda <m...,hatim,xatim,hatm,xatm,D,fill_in
1858,FI0049,Uning to‘lin yuzi biroz <mask> edi.\n\nQuyidag...,solqigan,salqigon,salqigan,solqigon,A,fill_in
1859,FI0050,Xalq afsonalarida realistik <mask> shu tarzda ...,tamoyil,tamoil,Ikkovi ham to‘g‘ri,Ikkovi ham xato,A,fill_in


In [18]:
import os

available_models = [file for file in sorted(os.listdir('artifacts/')) if file.endswith(".jsonl")]
len(available_models)

13

In [None]:
import pandas as pd
from run_uzlib import calculate_accuracy

accuracy_info = []

for model_name in available_models:
    df = pd.read_json(f"artifacts/{model_name}", lines=True)

    if len(df)!=1861:
        continue

    df_result = df_original.merge(df, on=['id'])

    accuracy = calculate_accuracy(df_result)
    accuracy['model_name'] = model_name[:-6]

    accuracy_info.append(accuracy)

df = pd.DataFrame(accuracy_info)
df = df[['model_name', 'all', 'correct_word', 'meaning', 'meaning_in_context', 'fill_in']]
df

Unnamed: 0,all,correct_word,meaning,meaning_in_context,fill_in,model_name
0,0.445997,0.466356,0.389831,0.361111,0.230769,Llama-3.3-70B-Instruct-Turbo
1,0.39871,0.418388,0.29661,0.361111,0.346154,Meta-Llama-3.1-8B-Instruct-Turbo
2,0.270822,0.263824,0.305085,0.319444,0.25,Qwen2.5-72B-Instruct-Turbo
3,0.354111,0.372418,0.275424,0.277778,0.288462,Qwen2.5-7B-Instruct-Turbo
4,0.545943,0.558294,0.508475,0.541667,0.365385,gemini-1.5-flash-002
5,0.547555,0.556296,0.512712,0.583333,0.403846,gemini-1.5-pro-002
6,0.606126,0.612258,0.559322,0.694444,0.519231,gemini-2.0-flash-001
7,0.580333,0.59427,0.491525,0.625,0.519231,gemini-2.0-flash-lite-001
8,0.426652,0.449034,0.368644,0.263889,0.269231,gemma-2-27b-it
9,0.3928,0.416389,0.305085,0.277778,0.269231,gemma-2-9b-it


In [31]:
df = df[['model_name', 'all', 'correct_word', 'meaning', 'meaning_in_context', 'fill_in']]
df.sort_values(by='all', ascending=False)

Unnamed: 0,model_name,all,correct_word,meaning,meaning_in_context,fill_in
10,gpt-4o-2024-11-20,0.619022,0.624917,0.584746,0.666667,0.538462
6,gemini-2.0-flash-001,0.606126,0.612258,0.559322,0.694444,0.519231
7,gemini-2.0-flash-lite-001,0.580333,0.59427,0.491525,0.625,0.519231
5,gemini-1.5-pro-002,0.547555,0.556296,0.512712,0.583333,0.403846
4,gemini-1.5-flash-002,0.545943,0.558294,0.508475,0.541667,0.365385
11,gpt-4o-mini-2024-07-18,0.515314,0.523651,0.508475,0.444444,0.403846
0,Llama-3.3-70B-Instruct-Turbo,0.445997,0.466356,0.389831,0.361111,0.230769
8,gemma-2-27b-it,0.426652,0.449034,0.368644,0.263889,0.269231
1,Meta-Llama-3.1-8B-Instruct-Turbo,0.39871,0.418388,0.29661,0.361111,0.346154
9,gemma-2-9b-it,0.3928,0.416389,0.305085,0.277778,0.269231
