In [1]:
from ollama_help import check_ollama, check_models
from utils import load_json

if not check_ollama():
    print("Ollama não está instalado ou não está conectado.")
    exit()

questoes = load_json("./questoes/questoes.json")


In [2]:
# Questões de Texto

text_models = ["phi4", "llava", "deepseek-r1", "llama3.2", "gemma2", "mistral"]
avaliable_text_models = list(check_models(text_models))

text_questions   = list(filter(lambda x : x["type"] == "only-text", questoes))

print(f"Total de {len(text_questions)} questões apenas texto")

✅ phi4 disponível
✅ llava disponível
✅ deepseek-r1 disponível
✅ llama3.2 disponível
✅ gemma2 disponível
✅ mistral disponível
Total de 1608 questões apenas texto


In [None]:
# Teste das Questões no Ollama

from ollama_help.runner import test_ollama_models
from utils import test_table

test_ollama_models(
    models = sorted(avaliable_text_models, key=lambda x : len(x)),
    questions = text_questions,
    predict_file ="./predict_data/text_predictions.json",
)

test_table("./predict_data/text_predictions.json", len(text_questions))

Unnamed: 0,Model,Finsh,OK,Null,Err,Acc,Ttot,Tle,Tavg,Tmax,Tmin
0,deepseek-r1,1609 (100%),883,127,599,55%,09:06:24,-20.38,20.38,07:49,2.53
1,llama3.2,1609 (100%),426,28,1155,26%,08:53,-0.33,0.33,41.73,0.09
2,llava,1609 (100%),730,77,802,45%,15:22,-0.57,0.57,12.12,0.15
3,mistral,1609 (100%),931,30,648,58%,24:38,-0.92,0.92,20.85,0.17
4,phi4,1609 (100%),1337,12,260,83%,02:54:06,-6.49,6.49,01:46,0.32
5,gemma2,1609 (100%),1208,30,371,75%,16:39,-0.62,0.62,11.38,0.27
6,TOTAL,9654 (100%),5515,304,3835,57%,13:06:05,-39309.22,4.89,07:49,0.09


In [None]:
visions_models = ["llava", "llama3.2-vision", "llava-llama3", "minicpm-v","moondream", "llava-phi3"]
available_visions_models = list(check_models(visions_models))

context_questions = list(filter(lambda x : x["type"] == "context-image", questoes))

print(f"Total de {len(context_questions)} questões com imagem de contexto")

✅ llava disponível
✅ llama3.2-vision disponível
✅ llava-llama3 disponível
✅ minicpm-v disponível
✅ moondream disponível
✅ llava-phi3 disponível
Total de 795 questões com imagem de contexto


In [None]:
test_ollama_models(
    models = sorted(available_visions_models, key=lambda x : len(x)),
    questions = context_questions,
    predict_file ="./predict_data/correct_context.json",
)

test_table("./predict_data/correct_context.json", len(context_questions))

Unnamed: 0,Model,Finsh,OK,Null,Err,Acc,Ttot,Tle,Tavg,Tmax,Tmin
0,llava,795 (100%),198,109,488,25%,15:11,0.0,1.15,17.2,0.16
1,minicpm-v,795 (100%),243,59,493,31%,11:04,0.0,0.84,14.95,0.15
2,moondream,795 (100%),45,570,180,6%,01:22,0.0,0.1,3.91,0.04
3,llava-phi3,795 (100%),224,126,445,28%,10:26,0.0,0.79,22.89,0.1
4,llava-llama3,795 (100%),64,496,235,8%,03:47,0.0,0.29,6.55,0.15
5,llama3.2-vision,795 (100%),259,2,534,33%,06:40,0.0,0.5,19.12,0.31
6,TOTAL,4770 (100%),1033,1362,2375,22%,48:34,-2428.76,0.61,22.89,0.04


In [12]:
from ollama_help.runner import test_ollama_multi_models
import random

best_text_models = ["mistral", "gemma2", "phi4"]
random.seed(42)

# Coletando 25 Questões aleatórias de cada uma das disciplinas
example_context_questions = (
    list(random.sample(list(filter(lambda x : x["discipline"] == "ciencias-humanas", context_questions)), 25))[:10] +
    list(random.sample(list(filter(lambda x : x["discipline"] == "ciencias-natureza", context_questions)),25))[:10] +
    list(random.sample(list(filter(lambda x : x["discipline"] == "linguagens", context_questions)),25))[:10] +
    list(random.sample(list(filter(lambda x : x["discipline"] == "matematica", context_questions)),25))[:10]
)

test_ollama_multi_models(
    text_models = sorted(best_text_models),
    vision_models = sorted(visions_models),
    questions= example_context_questions,
    predict_file ="./predict_data/multi_models_context_prediction.json",
)

test_table("./predict_data/multi_models_context_prediction.json", len(example_context_questions))

Unnamed: 0,Model,Finsh,OK,Null,Err,Acc,Ttot,Tle,Tavg,Tmax,Tmin
0,llama3.2-vision+gemma2,75 (188%),36,3,36,48%,39:29,-1105.92,31.6,56.25,12.09
1,llama3.2-vision+mistral,40 (100%),13,1,26,32%,21:13,0.0,31.85,01:05,10.46
2,llama3.2-vision+phi4,40 (100%),24,0,16,60%,23:29,0.0,35.25,55.78,11.77
3,llava+gemma2,40 (100%),20,1,19,50%,05:39,0.0,8.48,14.25,5.87
4,llava+mistral,40 (100%),9,6,25,22%,06:31,0.0,9.79,23.85,5.13
5,llava+phi4,40 (100%),21,0,19,52%,11:53,0.0,17.84,38.28,8.34
6,llava-llama3+gemma2,40 (100%),18,0,22,45%,06:59,0.0,10.48,17.60,6.9
7,llava-llama3+mistral,40 (100%),15,5,20,38%,07:06,0.0,10.66,18.55,6.67
8,llava-llama3+phi4,40 (100%),21,0,19,52%,13:28,0.0,20.22,58.21,9.69
9,llava-phi3+gemma2,40 (100%),19,0,21,48%,05:38,0.0,8.46,13.89,5.49


In [None]:
# Questões com alternativas com Imagem (Apenas Visão)

answer_questions = list(filter(lambda x :x['type'] == 'answer-image'))

test_ollama_models(
    models = [],
    questions = answer_questions,
    predict_file = "./predict_data/answ-vision.json"
)

test_table("./predict_data/answ-vision.json", len(answer_questions))

In [None]:
# Questões com alternativas com Imagem (Multi Modelos)

test_ollama_multi_models(
    text_models = ['phi4', 'gemma2'],
    vision_models= [],
    questions=answer_questions,
    predict_file="./predict_data/answ-multi.json"
)

test_table("./predict_data/answ-multi.json",len(answer_questions))