## Load multiple models

In [1]:
from local_llm.local_llm import LocalLlm
from local_llm.constants import ModelEnum
import time
import pandas as pd
stats = pd.read_csv("benchmarks.csv")

In [2]:
stats

Unnamed: 0,model,avg_answer_time,answer_quality
0,TinyLlama1_1B_Q8,0.858762,1.0
1,Llama3_1B_Q8,0.269015,1.0
2,Llama3_3B_Q6,1.488254,4.0
3,Llama3_8B_Q6,2.273151,5.0
4,Qwen2_7B_Q8,1.981633,5.0
5,Qwen3_14B_Q5,7.718978,2.5
6,MistralNemo_8B_Q8,8.555013,5.0
7,Phi2_2B_Q8,2.945099,1.0
8,Phi4_3B_Q4,3.190574,3.5


In [3]:
instance = LocalLlm([ModelEnum.MistralNemo_8B_Q8], max_window_size=2048)

  0%|          | 0/1 [00:00<?, ?it/s]

Loading model MistralNemo_8B_Q8 from models/MistralNemo_8B_Q8/model.gguf.


llama_context: n_ctx_per_seq (2048) < n_ctx_train (8192) -- the full capacity of the model will not be utilized
100%|██████████| 1/1 [00:00<00:00,  1.60it/s]


## Ask them to analyze statements in order to find argumentative fallacies

In [4]:
#question = "Les vaccins sont dangereux car mon voisin a eu des effets secondaires après sa vaccination. De plus, les laboratoires pharmaceutiques ne cherchent qu'à faire du profit. Par conséquent, nous ne devrions pas faire confiance aux vaccins."
question = "Vaccines are dangerous because my neighbor experienced side effects after his vaccination. Furthermore, pharmaceutical companies are only interested in making a profit. Therefore, we should not trust vaccines."
#question = "Si l'IA devient plus intelligente que les humains, alors soit elle nous aidera, soit elle nous remplacera. Or, une IA superintelligente n'aura pas besoin de nous aider. Donc elle nous remplacera."
#question = "Votre argument sur le climat est invalide car vous n'êtes pas climatologue."
#question = "Si nous n'agissons pas maintenant contre le changement climatique, nos enfants vivront dans un monde invivable. Nous devons donc interdire immédiatement toutes les voitures."
#question = "Tous les cygnes que j'ai vus sont blancs, donc tous les cygnes sont blancs."
#question = "Terrorism is the best political weapon for nothing drives people harder than a fear of sudden death."

print(instance(ModelEnum.MistralNemo_8B_Q8, question, max_tokens=500))


Prompting...
 {
  "arguments": [
    {
      "text": "Vaccines are dangerous because my neighbor experienced side effects after his vaccination.",
      "is_valid": false,
      "fallacy_type": "Hasty Generalization",
      "explanation": "The argument is based on a single anecdotal experience, which is not sufficient to make a general claim about the safety of vaccines."
    },
    {
      "text": "Pharmaceutical companies are only interested in making a profit.",
      "is_valid": true,
      "fallacy_type": "null",
      "explanation": "This statement is generally true and does not contain a fallacy."
    },
    {
      "text": "Therefore, we should not trust vaccines.",
      "is_valid": false,
      "fallacy_type": "False Dilemma",
      "explanation": "The argument presents a false choice between trusting pharmaceutical companies and trusting vaccines. In reality, one can trust vaccines without trusting pharmaceutical companies' business practices."
    }
  ]
}



In [5]:
stats["answer_quality"][stats["model"] == ModelEnum.MistralNemo_8B_Q8.name] = 5
stats.to_csv("benchmarks.csv", index=False)
stats

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  stats["answer_quality"][stats["model"] == ModelEnum.MistralNemo_8B_Q8.name] = 5
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view

Unnamed: 0,model,load_time,avg_answer_time,answer_quality
0,TinyLlama1_1B_Q8,1.382449,0.858762,1.0
1,Llama3_1B_Q8,1.459934,0.269015,1.0
2,Llama3_3B_Q6,0.907212,1.488254,4.0
3,Llama3_8B_Q6,7.657012,2.273151,5.0
4,Qwen2_7B_Q8,9.364495,1.981633,5.0
5,Qwen3_14B_Q5,63.091192,7.718978,2.5
6,MistralNemo_8B_Q8,54.724201,8.555013,5.0
7,Phi2_2B_Q8,14.900199,2.945099,1.0
8,Phi4_3B_Q4,14.494035,3.190574,3.5


In [7]:
questions = [
    "Les vaccins sont dangereux car mon voisin a eu des effets secondaires après sa vaccination. De plus, les laboratoires pharmaceutiques ne cherchent qu'à faire du profit. Par conséquent, nous ne devrions pas faire confiance aux vaccins.",
    "Vaccines are dangerous because my neighbor experienced side effects after his vaccination. Furthermore, pharmaceutical companies are only interested in making a profit. Therefore, we should not trust vaccines.",
    "Si l'IA devient plus intelligente que les humains, alors soit elle nous aidera, soit elle nous remplacera. Or, une IA superintelligente n'aura pas besoin de nous aider. Donc elle nous remplacera.",
    "Votre argument sur le climat est invalide car vous n'êtes pas climatologue.",
    "Si nous n'agissons pas maintenant contre le changement climatique, nos enfants vivront dans un monde invivable. Nous devons donc interdire immédiatement toutes les voitures.",
    "Tous les cygnes que j'ai vus sont blancs, donc tous les cygnes sont blancs.",
    "Terrorism is the best political weapon for nothing drives people harder than a fear of sudden death.",
]


for entry in ModelEnum:
    t0 = time.time()
    instance = LocalLlm([entry], max_window_size=2048)
    load_time = time.time() - t0
    times = []
    t0 = time.time()
    for question in questions:
        answer = instance(entry, question, max_tokens=500)
    avg_answer_time = (time.time() - t0) / len(questions)
    stats[stats["model"] == entry.name][["model", "load_time", "avg_answer_time"]] = [entry.name, load_time, avg_answer_time]
    del instance

print(stats)

  0%|          | 0/1 [00:00<?, ?it/s]

Loading model TinyLlama1_1B_Q8 from models/TinyLlama1_1B_Q8/model.gguf.


100%|██████████| 1/1 [00:00<00:00,  1.89it/s]


Prompting...
Prompting...
Prompting...
Prompting...
Prompting...
Prompting...
Prompting...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stats[stats["model"] == entry.name][["model", "load_time", "avg_answer_time"]] = [entry.name, load_time, avg_answer_time]
  0%|          | 0/1 [00:00<?, ?it/s]

Loading model Llama3_1B_Q8 from models/Llama3_1B_Q8/model.gguf.


llama_context: n_ctx_per_seq (2048) < n_ctx_train (131072) -- the full capacity of the model will not be utilized
100%|██████████| 1/1 [00:01<00:00,  1.74s/it]


Prompting...
Prompting...
Prompting...
Prompting...
Prompting...
Prompting...
Prompting...


  0%|          | 0/1 [00:00<?, ?it/s]

Loading model Llama3_3B_Q6 from models/Llama3_3B_Q6/model.gguf.


llama_context: n_ctx_per_seq (2048) < n_ctx_train (131072) -- the full capacity of the model will not be utilized
100%|██████████| 1/1 [00:15<00:00, 15.21s/it]


Prompting...
Prompting...
Prompting...
Prompting...
Prompting...
Prompting...
Prompting...


  0%|          | 0/1 [00:00<?, ?it/s]

Loading model Llama3_8B_Q6 from models/Llama3_8B_Q6/model.gguf.


llama_context: n_ctx_per_seq (2048) < n_ctx_train (10240) -- the full capacity of the model will not be utilized
100%|██████████| 1/1 [00:17<00:00, 17.60s/it]


Prompting...
Prompting...
Prompting...
Prompting...
Prompting...
Prompting...
Prompting...


  0%|          | 0/1 [00:00<?, ?it/s]

Loading model Qwen2_7B_Q8 from models/Qwen2_7B_Q8/model.gguf.


llama_context: n_ctx_per_seq (2048) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
100%|██████████| 1/1 [00:38<00:00, 38.57s/it]


Prompting...
Prompting...
Prompting...
Prompting...
Prompting...
Prompting...
Prompting...


  0%|          | 0/1 [00:00<?, ?it/s]

Loading model Qwen3_14B_Q5 from models/Qwen3_14B_Q5/model.gguf.


llama_context: n_ctx_per_seq (2048) < n_ctx_train (40960) -- the full capacity of the model will not be utilized
100%|██████████| 1/1 [01:09<00:00, 69.43s/it]


Prompting...
Prompting...
Prompting...
Prompting...
Prompting...
Prompting...
Prompting...


  0%|          | 0/1 [00:00<?, ?it/s]

Loading model MistralNemo_8B_Q8 from models/MistralNemo_8B_Q8/model.gguf.


llama_context: n_ctx_per_seq (2048) < n_ctx_train (8192) -- the full capacity of the model will not be utilized
100%|██████████| 1/1 [00:57<00:00, 57.53s/it]


Prompting...
Prompting...
Prompting...
Prompting...
Prompting...
Prompting...
Prompting...


  0%|          | 0/1 [00:00<?, ?it/s]

Loading model Phi2_2B_Q8 from models/Phi2_2B_Q8/model.gguf.


100%|██████████| 1/1 [00:18<00:00, 18.83s/it]


Prompting...
Prompting...
Prompting...
Prompting...
Prompting...
Prompting...
Prompting...


  0%|          | 0/1 [00:00<?, ?it/s]

Loading model Phi4_3B_Q4 from models/Phi4_3B_Q4/model.gguf.


llama_context: n_ctx_per_seq (2048) < n_ctx_train (4096) -- the full capacity of the model will not be utilized
100%|██████████| 1/1 [00:13<00:00, 13.41s/it]


Prompting...
Prompting...
Prompting...
Prompting...
Prompting...
Prompting...
Prompting...
               model  load_time  avg_answer_time  answer_quality
0   TinyLlama1_1B_Q8   1.382449         0.858762             1.0
1       Llama3_1B_Q8   1.459934         0.269015             1.0
2       Llama3_3B_Q6   0.907212         1.488254             4.0
3       Llama3_8B_Q6   7.657012         2.273151             5.0
4        Qwen2_7B_Q8   9.364495         1.981633             5.0
5       Qwen3_14B_Q5  63.091192         7.718978             2.5
6  MistralNemo_8B_Q8  54.724201         8.555013             5.0
7         Phi2_2B_Q8  14.900199         2.945099             1.0
8         Phi4_3B_Q4  14.494035         3.190574             3.5


In [3]:
stats.to_csv("benchmarks.csv", index=False)