In [1]:
!nvidia-smi

Sun Dec  8 11:25:39 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 555.42.02              Driver Version: 555.42.02      CUDA Version: 12.5     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100 80GB PCIe          On  |   00000000:C1:00.0 Off |                    0 |
| N/A   31C    P0             45W /  300W |       1MiB /  81920MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                     

In [None]:
############# Load Template Sentences from Chroma DB ####################################

In [3]:
occupation = "politician"
CHROMA_SOURCE = "template_" + occupation
CHROMA_SOURCE_JSON = "json_" + occupation
CHROMA_CHUNK_SIZE = 1000
CHROMA_PERSIST_DIRECTORY = "./chroma_db"
CHROMA_COLLECTION_NAME = occupation + "_json_files"
EMBEDDINGS_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"

In [4]:
import os
os.environ["HF_HOME"] = "~/scratch/hf-cache"
from langchain.docstore.document import Document
from langchain_text_splitters import CharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma

In [None]:
############# Load LLama ######################

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 8192 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
with torch.no_grad():
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit",
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
        token = "<YOUR HF TOKEN>", # use one if using gated models like meta-llama/Llama-2-7b-hf
    )

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm


==((====))==  Unsloth 2024.11.6: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: NVIDIA A100 80GB PCIe. Max memory: 79.256 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu124. CUDA = 8.0. CUDA Toolkit = 12.4.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards: 100%|██████████| 6/6 [05:37<00:00, 56.30s/it]


In [None]:
########## Model HyperParameters #######################

In [6]:
gen_config = model.generation_config
print(gen_config)

GenerationConfig {
  "bos_token_id": 128000,
  "do_sample": true,
  "eos_token_id": [
    128001,
    128008,
    128009
  ],
  "max_length": 131072,
  "pad_token_id": 128004,
  "temperature": 0.6,
  "top_p": 0.9
}



In [None]:
################ Prompt Template ########################

In [7]:
my_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

In [10]:
######### Politicians to Query For ######################
all_entities = [['Wilfred', 'Elrington'], ['Ernst', 'Siemens'], ['Erich', 'Ludendorff'], ['Ernst', 'Röhm'], ['Daniel', 'Romanowitsch'], ['Franz', 'Müntefering'], ['Hans', 'Fritzsche'], ['Karl', 'Gebhardt'], ['Robert', 'Ley'], ['Otto', 'Braun'], ['Oskar', 'Dirlewanger'], ['Mikhail', 'Lomonosov'], ['Hanns', 'Schleyer'], ['Sandra', 'Perković'], ['Friedrich', 'von Humboldt'], ['Ludwig', 'Grimm'], ['Jean-Marie', 'Le Pen'], ['Zheng', 'Ying'], ['Rosa', 'Luxemburg'], ['Giacomo', 'Puccini'], ['Giuseppe', 'Verdi'], ['Jacques', 'Cartier'], ['Yury', 'Gagarin'], ['Lun', 'Sima'], ['Margaret', 'Roberts'], ['Jian', 'Yang'], ['Nina', 'Andreeva'], ['Irène', 'Joliot-Curie'], ['Willoughby', 'Newton'], ['Charles', 'Maurras'], ['Guy', 'Teissier'], ['William', 'Terry'], ['James', 'Gibson'], ['Allen', 'Sheppard'], ['Elisha', 'Meredith'], ['Henry', 'Alexander'], ['Todd', 'Platts'], ['David', 'Cairns'], ['Marit', 'Paulsen'], ['Michael', 'Shaw'], ['John', 'Gregg'], ['Leonid', 'Razvozzhayev'], ['Mohamed', 'Sheikh'], ['Albert', 'Grimaldi'], ['Jacqueline', 'Cochran'], ['V.', 'Kalairajan'], ['Louis', 'Aragon'], ['Volker', 'Beck'], ['Blaise', 'Compaoré'], ['Karl', 'Frank'], ['Danilo', 'Türk'], ['Dimitris', 'Christofias'], ['Ali', 'Khamenei'], ['Carlos', 'Funes'], ['Pierre', 'Nkurunziza'], ['Joseph', 'Fischer'], ['Ernst', 'Arndt'], ['Gustav', 'Noske'], ['Yahya', 'Jammeh'], ['Andry', 'Rajoelina'], ['Thomas', 'de Maizière'], ['Emomali', 'Rakhmonov'], ['Yoweri', 'Museveni'], ['Michael', 'Higgins'], ['Traian', 'Băsescu'], ['Jacob', 'Zuma'], ['Toomas', 'Ilves'], ['Abdullah', 'House of Saud'], ['Bamir', 'Topi'], ['Hans-Gert', 'Pöttering'], ['Karolos', 'Papoulias'], ['Jalal', 'Talabani'], ['Abdelaziz', 'Bouteflika'], ['Erich', 'Mielke'], ['Juan Manuel', 'Santos'], ['Sihamoni', 'Norodom'], ['Laura', 'Chinchilla'], ['Otto', 'Gessler']]

In [13]:
import time
import json
import ast
import time
from tqdm import tqdm
error = 0
SAVE_TO_DISK = 1
with open("Analysis_Article_Generation_Without_Template_v2.json", "w") as f:
    for entry in tqdm(all_entities):
        first_name = entry[0]
        last_name = entry[1]
        entity_name = first_name + " " + last_name
        print("Your are querying for: "+ entity_name)
    ################# Get JSON from Chroma Cache ###########################
        start_time = time.time()
        target_collection_name = occupation + "_json_files" 
        embeddings = SentenceTransformerEmbeddings(model_name=EMBEDDINGS_MODEL)
        print("Target Collection Is: " + target_collection_name)
        chroma_db_from_disk = Chroma(persist_directory=CHROMA_PERSIST_DIRECTORY,                             
                                    collection_name=target_collection_name,
                                    embedding_function=embeddings)
        results = chroma_db_from_disk.get(
            where={
                "$and": [
                    {"first_name": first_name},
                    {"last_name": last_name}
                ]
            },
            include=["documents", "metadatas"])
        documents = [
            Document(page_content=doc, metadata=meta)
            for doc, meta in zip(results["documents"], results["metadatas"])
        ]
        end_time = time.time()
        chroma_json_retrieval_time = end_time - start_time
        print(f"Chroma Retrieval Time: {chroma_json_retrieval_time:.4f} seconds")
        document = documents[0]
        content = ast.literal_eval(document.page_content)[0]
        response_start = content.index("### Response:")
        json_start = content.index("{", response_start)
        json_end = content.rindex("}")
        json_string = content[json_start:json_end+1]
        try:
            parsed_dict = ast.literal_eval(json_string)
            json_data = json.dumps(parsed_dict, ensure_ascii=False, indent=2)
            #print(json_data)
        except (SyntaxError, ValueError) as e:
            error += 1
            print(f"Error parsing JSON: {e}")
            continue
    ################ Inference for Article Generation ######################
        start_time = time.time()
        with torch.no_grad():
            FastLanguageModel.for_inference(model)
            inputs = tokenizer(
            [
                my_prompt.format(
                    f"Generate an Article in Hindi Language for {entity_name} using the details given in the json string below for this person. Do not any other extra English text after the article.", # instruction
                    f"JSON:\n{json_data}", # input
                    "", # output - leave this blank for generation!
                )
            ], return_tensors = "pt").to("cuda")
            print("Number of Tokens in Input: " + str(inputs['input_ids'].shape[1]))
            outputs = model.generate(**inputs, max_new_tokens = 8192)
            print("Number of Tokens in output: " + str(outputs.shape[1]))
            final_article = tokenizer.batch_decode(outputs)
            #print(final_article)
        end_time = time.time()
        article_generation_time = end_time - start_time
        print(f"Article Generation Time: {article_generation_time:.4f} seconds")
    ################## CHROMA SAVE ############################
        start_time = time.time()
        target_collection_name = occupation + "_generated_articles_without_template"
        if SAVE_TO_DISK == 1:
            docs_2 = [Document(page_content=str(final_article), metadata={"source": f"{occupation}_generated_articles_without_template", "first_name": first_name, "last_name": last_name })]
            text_splitter = CharacterTextSplitter(chunk_size=CHROMA_CHUNK_SIZE, chunk_overlap=0)
            docs_2 = text_splitter.split_documents(docs_2)
            embeddings = SentenceTransformerEmbeddings(model_name=EMBEDDINGS_MODEL)
            local_chroma = Chroma.from_documents(docs_2, embeddings, persist_directory=CHROMA_PERSIST_DIRECTORY, collection_name=target_collection_name)
            print("Your document is Saved to Chroma")
        else:
            print("WARNING: YOU CHOOSE NOT TO SAVE THE ARTICLE TO CHROMA") 
        end_time = time.time()
        chroma_save_time = end_time - start_time
        print(f"Execution time: {chroma_save_time:.4f} seconds")
        op = { "First Name": first_name, "Last Name": last_name, "Time for JSON Retrieval": chroma_json_retrieval_time,
              "Time For Chroma Write": chroma_save_time, "Input Tokens": inputs['input_ids'].shape[1], 
              "Output Tokens": outputs.shape[1], "Time For Article Generation": article_generation_time }
        json.dump(op, f, indent=4)

  0%|          | 0/78 [00:00<?, ?it/s]

Your are querying for: Wilfred Elrington
Target Collection Is: politician_json_files
Chroma Retrieval Time: 6.9510 seconds
Number of Tokens in Input: 595
Number of Tokens in output: 964
Article Generation Time: 38.7990 seconds


  1%|▏         | 1/78 [00:48<1:02:20, 48.58s/it]

Your document is Saved to Chroma
Execution time: 2.8246 seconds
Your are querying for: Ernst Siemens
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.5420 seconds
Number of Tokens in Input: 2173
Number of Tokens in output: 2758
Article Generation Time: 64.4814 seconds


  3%|▎         | 2/78 [01:56<1:15:55, 59.94s/it]

Your document is Saved to Chroma
Execution time: 1.8598 seconds
Your are querying for: Erich Ludendorff


  4%|▍         | 3/78 [01:58<41:46, 33.42s/it]  

Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.8468 seconds
Error parsing JSON: ':' expected after dictionary key (<unknown>, line 1)
Your are querying for: Ernst Röhm
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.2410 seconds
Number of Tokens in Input: 1455
Number of Tokens in output: 2016
Article Generation Time: 57.8705 seconds


  5%|▌         | 4/78 [02:59<54:52, 44.49s/it]

Your document is Saved to Chroma
Execution time: 2.3557 seconds
Your are querying for: Daniel Romanowitsch
Target Collection Is: politician_json_files
Chroma Retrieval Time: 3.9982 seconds
Number of Tokens in Input: 1089
Number of Tokens in output: 1414
Article Generation Time: 32.2386 seconds


  6%|▋         | 5/78 [03:37<51:05, 42.00s/it]

Your document is Saved to Chroma
Execution time: 1.3386 seconds
Your are querying for: Franz Müntefering
Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.0747 seconds
Number of Tokens in Input: 1396
Number of Tokens in output: 9588
Article Generation Time: 1082.5097 seconds


  8%|▊         | 6/78 [21:43<7:56:34, 397.14s/it]

Your document is Saved to Chroma
Execution time: 1.9459 seconds
Your are querying for: Hans Fritzsche
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.3997 seconds
Number of Tokens in Input: 1015
Number of Tokens in output: 1499
Article Generation Time: 47.4318 seconds


  9%|▉         | 7/78 [22:34<5:36:01, 283.97s/it]

Your document is Saved to Chroma
Execution time: 2.1420 seconds
Your are querying for: Karl Gebhardt
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.7261 seconds
Number of Tokens in Input: 1499
Number of Tokens in output: 2216
Article Generation Time: 74.0076 seconds


 10%|█         | 8/78 [23:52<4:14:38, 218.27s/it]

Your document is Saved to Chroma
Execution time: 1.8449 seconds
Your are querying for: Robert Ley
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.3407 seconds
Number of Tokens in Input: 1267
Number of Tokens in output: 1792
Article Generation Time: 54.0380 seconds


 12%|█▏        | 9/78 [24:50<3:13:26, 168.21s/it]

Your document is Saved to Chroma
Execution time: 2.7800 seconds
Your are querying for: Otto Braun
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.7375 seconds
Number of Tokens in Input: 1027
Number of Tokens in output: 1473
Article Generation Time: 44.4504 seconds


 13%|█▎        | 10/78 [25:38<2:28:30, 131.04s/it]

Your document is Saved to Chroma
Execution time: 1.6182 seconds
Your are querying for: Oskar Dirlewanger


 14%|█▍        | 11/78 [25:40<1:42:14, 91.57s/it] 

Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.0503 seconds
Error parsing JSON: invalid character '–' (U+2013) (<unknown>, line 1)
Your are querying for: Mikhail Lomonosov
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.6508 seconds
Number of Tokens in Input: 2398
Number of Tokens in output: 2864
Article Generation Time: 51.8922 seconds


 15%|█▌        | 12/78 [26:35<1:28:36, 80.56s/it]

Your document is Saved to Chroma
Execution time: 1.8398 seconds
Your are querying for: Hanns Schleyer
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.6183 seconds
Number of Tokens in Input: 1205
Number of Tokens in output: 1736
Article Generation Time: 53.2307 seconds


 17%|█▋        | 13/78 [27:32<1:19:16, 73.17s/it]

Your document is Saved to Chroma
Execution time: 1.3165 seconds
Your are querying for: Sandra Perković


 18%|█▊        | 14/78 [27:34<55:09, 51.72s/it]  

Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.1346 seconds
Error parsing JSON: '{' was never closed (<unknown>, line 1)
Your are querying for: Friedrich von Humboldt
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.6128 seconds
Number of Tokens in Input: 3577
Number of Tokens in output: 4391
Article Generation Time: 99.9863 seconds


 19%|█▉        | 15/78 [29:17<1:10:36, 67.24s/it]

Your document is Saved to Chroma
Execution time: 1.6217 seconds
Your are querying for: Ludwig Grimm


 21%|██        | 16/78 [29:19<49:14, 47.66s/it]  

Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.1801 seconds
Error parsing JSON: '{' was never closed (<unknown>, line 1)
Your are querying for: Jean-Marie Le Pen


 22%|██▏       | 17/78 [29:21<34:23, 33.82s/it]

Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.6456 seconds
Error parsing JSON: '{' was never closed (<unknown>, line 1)
Your are querying for: Zheng Ying


 23%|██▎       | 18/78 [29:22<24:05, 24.09s/it]

Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.4435 seconds
Error parsing JSON: '{' was never closed (<unknown>, line 1)
Your are querying for: Rosa Luxemburg


 24%|██▍       | 19/78 [29:24<17:13, 17.52s/it]

Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.2134 seconds
Error parsing JSON: closing parenthesis '}' does not match opening parenthesis '[' (<unknown>, line 1)
Your are querying for: Giacomo Puccini
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.7496 seconds
Number of Tokens in Input: 2382
Number of Tokens in output: 2944
Article Generation Time: 62.4297 seconds


 26%|██▌       | 20/78 [30:30<30:59, 32.06s/it]

Your document is Saved to Chroma
Execution time: 1.7748 seconds
Your are querying for: Giuseppe Verdi
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.8314 seconds
Number of Tokens in Input: 3112
Number of Tokens in output: 3706
Article Generation Time: 67.9967 seconds


 27%|██▋       | 21/78 [31:42<41:36, 43.80s/it]

Your document is Saved to Chroma
Execution time: 1.3359 seconds
Your are querying for: Jacques Cartier
Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.0937 seconds
Number of Tokens in Input: 1364
Number of Tokens in output: 1968
Article Generation Time: 61.5806 seconds


 28%|██▊       | 22/78 [32:47<47:00, 50.37s/it]

Your document is Saved to Chroma
Execution time: 2.0135 seconds
Your are querying for: Yury Gagarin
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.2527 seconds
Number of Tokens in Input: 2283
Number of Tokens in output: 2860
Article Generation Time: 63.8300 seconds


 29%|██▉       | 23/78 [33:54<50:48, 55.43s/it]

Your document is Saved to Chroma
Execution time: 2.1502 seconds
Your are querying for: Lun Sima
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.6757 seconds
Number of Tokens in Input: 666
Number of Tokens in output: 1072
Article Generation Time: 38.1510 seconds


 31%|███       | 24/78 [34:36<46:10, 51.31s/it]

Your document is Saved to Chroma
Execution time: 1.8632 seconds
Your are querying for: Margaret Roberts


 32%|███▏      | 25/78 [34:38<32:11, 36.45s/it]

Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.7933 seconds
Error parsing JSON: invalid character '०' (U+0966) (<unknown>, line 1)
Your are querying for: Jian Yang
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.3095 seconds
Number of Tokens in Input: 1055
Number of Tokens in output: 9247
Article Generation Time: 1067.0232 seconds


 33%|███▎      | 26/78 [52:36<5:02:29, 349.02s/it]

Your document is Saved to Chroma
Execution time: 9.9148 seconds
Your are querying for: Nina Andreeva
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.6427 seconds
Number of Tokens in Input: 817
Number of Tokens in output: 1123
Article Generation Time: 29.4209 seconds


 35%|███▍      | 27/78 [53:09<3:35:56, 254.05s/it]

Your document is Saved to Chroma
Execution time: 1.3920 seconds
Your are querying for: Irène Joliot-Curie


 36%|███▌      | 28/78 [53:11<2:28:41, 178.44s/it]

Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.0361 seconds
Error parsing JSON: '{' was never closed (<unknown>, line 1)
Your are querying for: Willoughby Newton
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.6891 seconds
Number of Tokens in Input: 780
Number of Tokens in output: 1219
Article Generation Time: 41.9322 seconds


 37%|███▋      | 29/78 [53:56<1:53:08, 138.54s/it]

Your document is Saved to Chroma
Execution time: 1.8202 seconds
Your are querying for: Charles Maurras


 38%|███▊      | 30/78 [53:58<1:17:58, 97.47s/it] 

Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.6532 seconds
Error parsing JSON: closing parenthesis '}' does not match opening parenthesis '[' (<unknown>, line 1)
Your are querying for: Guy Teissier
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.2572 seconds
Number of Tokens in Input: 1052
Number of Tokens in output: 1595
Article Generation Time: 53.6938 seconds


 40%|███▉      | 31/78 [54:55<1:06:52, 85.37s/it]

Your document is Saved to Chroma
Execution time: 2.1902 seconds
Your are querying for: William Terry
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.7778 seconds
Number of Tokens in Input: 790
Number of Tokens in output: 1167
Article Generation Time: 36.1119 seconds


 41%|████      | 32/78 [55:34<54:55, 71.64s/it]  

Your document is Saved to Chroma
Execution time: 1.6946 seconds
Your are querying for: James Gibson
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.3619 seconds
Number of Tokens in Input: 638
Number of Tokens in output: 827
Article Generation Time: 17.8153 seconds


 42%|████▏     | 33/78 [55:56<42:23, 56.52s/it]

Your document is Saved to Chroma
Execution time: 2.0719 seconds
Your are querying for: Allen Sheppard
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.7334 seconds
Number of Tokens in Input: 773
Number of Tokens in output: 1246
Article Generation Time: 45.2561 seconds


 44%|████▎     | 34/78 [56:44<39:37, 54.04s/it]

Your document is Saved to Chroma
Execution time: 1.2591 seconds
Your are querying for: Elisha Meredith
Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.0768 seconds
Number of Tokens in Input: 757
Number of Tokens in output: 992
Article Generation Time: 22.4526 seconds


 45%|████▍     | 35/78 [57:11<32:49, 45.80s/it]

Your document is Saved to Chroma
Execution time: 2.0611 seconds
Your are querying for: Henry Alexander
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.7969 seconds
Number of Tokens in Input: 515
Number of Tokens in output: 973
Article Generation Time: 42.2644 seconds


 46%|████▌     | 36/78 [57:56<31:57, 45.66s/it]

Your document is Saved to Chroma
Execution time: 1.2524 seconds
Your are querying for: Todd Platts
Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.0648 seconds
Number of Tokens in Input: 849
Number of Tokens in output: 1517
Article Generation Time: 64.8222 seconds


 47%|████▋     | 37/78 [59:04<35:48, 52.41s/it]

Your document is Saved to Chroma
Execution time: 1.2773 seconds
Your are querying for: David Cairns
Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.3048 seconds
Number of Tokens in Input: 929
Number of Tokens in output: 1366
Article Generation Time: 42.6520 seconds


 49%|████▊     | 38/78 [59:51<33:47, 50.69s/it]

Your document is Saved to Chroma
Execution time: 1.7330 seconds
Your are querying for: Marit Paulsen
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.7108 seconds
Number of Tokens in Input: 975
Number of Tokens in output: 1227
Article Generation Time: 24.8789 seconds


 50%|█████     | 39/78 [1:00:19<28:30, 43.87s/it]

Your document is Saved to Chroma
Execution time: 1.3396 seconds
Your are querying for: Michael Shaw
Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.0855 seconds
Number of Tokens in Input: 1268
Number of Tokens in output: 1556
Article Generation Time: 29.2582 seconds


 51%|█████▏    | 40/78 [1:00:52<25:45, 40.66s/it]

Your document is Saved to Chroma
Execution time: 1.8373 seconds
Your are querying for: John Gregg
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.3037 seconds
Number of Tokens in Input: 739
Number of Tokens in output: 1100
Article Generation Time: 34.3015 seconds


 53%|█████▎    | 41/78 [1:01:30<24:31, 39.76s/it]

Your document is Saved to Chroma
Execution time: 2.0452 seconds
Your are querying for: Leonid Razvozzhayev
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.6294 seconds
Number of Tokens in Input: 580
Number of Tokens in output: 1141
Article Generation Time: 52.4120 seconds


 54%|█████▍    | 42/78 [1:02:25<26:39, 44.44s/it]

Your document is Saved to Chroma
Execution time: 1.3078 seconds
Your are querying for: Mohamed Sheikh
Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.0395 seconds
Number of Tokens in Input: 691
Number of Tokens in output: 1070
Article Generation Time: 35.8089 seconds


 55%|█████▌    | 43/78 [1:03:04<25:03, 42.96s/it]

Your document is Saved to Chroma
Execution time: 1.6782 seconds
Your are querying for: Albert Grimaldi


 56%|█████▋    | 44/78 [1:03:06<17:18, 30.55s/it]

Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.5952 seconds
Error parsing JSON: '{' was never closed (<unknown>, line 1)
Your are querying for: Jacqueline Cochran
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.2448 seconds
Number of Tokens in Input: 1325
Number of Tokens in output: 2268
Article Generation Time: 96.9370 seconds


 58%|█████▊    | 45/78 [1:04:46<28:19, 51.49s/it]

Your document is Saved to Chroma
Execution time: 2.1453 seconds
Your are querying for: V. Kalairajan
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.6732 seconds
Number of Tokens in Input: 419
Number of Tokens in output: 1049
Article Generation Time: 57.9656 seconds


 59%|█████▉    | 46/78 [1:05:48<29:04, 54.51s/it]

Your document is Saved to Chroma
Execution time: 1.9386 seconds
Your are querying for: Louis Aragon
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.6619 seconds
Number of Tokens in Input: 1965
Number of Tokens in output: 2546
Article Generation Time: 62.8464 seconds


 60%|██████    | 47/78 [1:06:55<30:03, 58.18s/it]

Your document is Saved to Chroma
Execution time: 2.2296 seconds
Your are querying for: Volker Beck
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.6967 seconds
Number of Tokens in Input: 1505
Number of Tokens in output: 1850
Article Generation Time: 35.9287 seconds


 62%|██████▏   | 48/78 [1:07:33<26:11, 52.38s/it]

Your document is Saved to Chroma
Execution time: 1.2275 seconds
Your are querying for: Blaise Compaoré
Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.0262 seconds
Number of Tokens in Input: 1184
Number of Tokens in output: 1839
Article Generation Time: 64.8821 seconds


 63%|██████▎   | 49/78 [1:08:42<27:40, 57.25s/it]

Your document is Saved to Chroma
Execution time: 1.7044 seconds
Your are querying for: Karl Frank
Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.1247 seconds
Number of Tokens in Input: 1500
Number of Tokens in output: 1827
Article Generation Time: 34.0109 seconds


 64%|██████▍   | 50/78 [1:09:20<23:58, 51.36s/it]

Your document is Saved to Chroma
Execution time: 1.4649 seconds
Your are querying for: Danilo Türk
Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.0300 seconds
Number of Tokens in Input: 1161
Number of Tokens in output: 1682
Article Generation Time: 52.3721 seconds


 65%|██████▌   | 51/78 [1:10:15<23:41, 52.67s/it]

Your document is Saved to Chroma
Execution time: 1.3176 seconds
Your are querying for: Dimitris Christofias
Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.0004 seconds
Number of Tokens in Input: 2461
Number of Tokens in output: 3205
Article Generation Time: 84.0429 seconds


 67%|██████▋   | 52/78 [1:11:43<27:23, 63.22s/it]

Your document is Saved to Chroma
Execution time: 1.7898 seconds
Your are querying for: Ali Khamenei


 68%|██████▊   | 53/78 [1:11:45<18:38, 44.75s/it]

Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.6487 seconds
Error parsing JSON: '{' was never closed (<unknown>, line 1)
Your are querying for: Carlos Funes
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.2990 seconds
Number of Tokens in Input: 871
Number of Tokens in output: 1212
Article Generation Time: 33.0811 seconds


 69%|██████▉   | 54/78 [1:12:21<16:53, 42.25s/it]

Your document is Saved to Chroma
Execution time: 2.0364 seconds
Your are querying for: Pierre Nkurunziza


 71%|███████   | 55/78 [1:12:23<11:32, 30.13s/it]

Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.8415 seconds
Error parsing JSON: '{' was never closed (<unknown>, line 1)
Your are querying for: Joseph Fischer
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.6198 seconds
Number of Tokens in Input: 1704
Number of Tokens in output: 2755
Article Generation Time: 109.3625 seconds


 72%|███████▏  | 56/78 [1:14:16<20:08, 54.91s/it]

Your document is Saved to Chroma
Execution time: 1.7609 seconds
Your are querying for: Ernst Arndt
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.6405 seconds
Number of Tokens in Input: 1428
Number of Tokens in output: 1827
Article Generation Time: 41.2239 seconds


 73%|███████▎  | 57/78 [1:15:00<18:05, 51.67s/it]

Your document is Saved to Chroma
Execution time: 1.2558 seconds
Your are querying for: Gustav Noske
Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.1068 seconds
Number of Tokens in Input: 1032
Number of Tokens in output: 1396
Article Generation Time: 35.5038 seconds


 74%|███████▍  | 58/78 [1:15:39<15:59, 47.97s/it]

Your document is Saved to Chroma
Execution time: 1.7248 seconds
Your are querying for: Yahya Jammeh
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.2228 seconds
Number of Tokens in Input: 1402
Number of Tokens in output: 1914
Article Generation Time: 52.6096 seconds


 76%|███████▌  | 59/78 [1:16:35<15:57, 50.38s/it]

Your document is Saved to Chroma
Execution time: 2.1651 seconds
Your are querying for: Andry Rajoelina
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.7199 seconds
Number of Tokens in Input: 715
Number of Tokens in output: 1120
Article Generation Time: 38.4753 seconds


 77%|███████▋  | 60/78 [1:17:17<14:21, 47.85s/it]

Your document is Saved to Chroma
Execution time: 1.7597 seconds
Your are querying for: Thomas de Maizière


 78%|███████▊  | 61/78 [1:17:19<09:36, 33.89s/it]

Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.3216 seconds
Error parsing JSON: leading zeros in decimal integer literals are not permitted; use an 0o prefix for octal integers (<unknown>, line 1)
Your are querying for: Emomali Rakhmonov
Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.1483 seconds
Number of Tokens in Input: 1992
Number of Tokens in output: 2747
Article Generation Time: 79.6525 seconds


 79%|███████▉  | 62/78 [1:18:42<13:00, 48.79s/it]

Your document is Saved to Chroma
Execution time: 1.7395 seconds
Your are querying for: Yoweri Museveni
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.7168 seconds
Number of Tokens in Input: 1883
Number of Tokens in output: 2580
Article Generation Time: 74.9634 seconds


 81%|████████  | 63/78 [1:20:01<14:25, 57.68s/it]

Your document is Saved to Chroma
Execution time: 1.7416 seconds
Your are querying for: Michael Higgins


 82%|████████▏ | 64/78 [1:20:03<09:34, 41.02s/it]

Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.1329 seconds
Error parsing JSON: '{' was never closed (<unknown>, line 1)
Your are querying for: Traian Băsescu


 83%|████████▎ | 65/78 [1:20:04<06:19, 29.23s/it]

Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.7312 seconds
Error parsing JSON: '{' was never closed (<unknown>, line 1)
Your are querying for: Jacob Zuma


 85%|████████▍ | 66/78 [1:20:06<04:10, 20.87s/it]

Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.3484 seconds
Error parsing JSON: '{' was never closed (<unknown>, line 1)
Your are querying for: Toomas Ilves
Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.1216 seconds
Number of Tokens in Input: 2289
Number of Tokens in output: 2686
Article Generation Time: 44.2181 seconds


 86%|████████▌ | 67/78 [1:20:54<05:19, 29.05s/it]

Your document is Saved to Chroma
Execution time: 1.7923 seconds
Your are querying for: Abdullah House of Saud


 87%|████████▋ | 68/78 [1:20:55<03:27, 20.70s/it]

Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.2369 seconds
Error parsing JSON: '[' was never closed (<unknown>, line 1)
Your are querying for: Bamir Topi
Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.0778 seconds
Number of Tokens in Input: 609
Number of Tokens in output: 989
Article Generation Time: 35.4266 seconds


 88%|████████▊ | 69/78 [1:21:34<03:56, 26.27s/it]

Your document is Saved to Chroma
Execution time: 1.7580 seconds
Your are querying for: Hans-Gert Pöttering
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.6962 seconds
Number of Tokens in Input: 1617
Number of Tokens in output: 1837
Article Generation Time: 23.4991 seconds


 90%|████████▉ | 70/78 [1:22:02<03:31, 26.50s/it]

Your document is Saved to Chroma
Execution time: 1.8266 seconds
Your are querying for: Karolos Papoulias
Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.2033 seconds
Number of Tokens in Input: 1384
Number of Tokens in output: 2019
Article Generation Time: 63.9146 seconds


 91%|█████████ | 71/78 [1:23:10<04:33, 39.01s/it]

Your document is Saved to Chroma
Execution time: 2.0837 seconds
Your are querying for: Jalal Talabani
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.8216 seconds
Number of Tokens in Input: 1090
Number of Tokens in output: 1426
Article Generation Time: 33.4849 seconds


 92%|█████████▏| 72/78 [1:23:47<03:50, 38.38s/it]

Your document is Saved to Chroma
Execution time: 1.6187 seconds
Your are querying for: Abdelaziz Bouteflika
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.6808 seconds
Number of Tokens in Input: 2012
Number of Tokens in output: 2169
Article Generation Time: 17.8333 seconds


 94%|█████████▎| 73/78 [1:24:08<02:45, 33.13s/it]

Your document is Saved to Chroma
Execution time: 1.3659 seconds
Your are querying for: Erich Mielke
Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.0412 seconds
Number of Tokens in Input: 1577
Number of Tokens in output: 2109
Article Generation Time: 55.5968 seconds


 95%|█████████▍| 74/78 [1:25:07<02:44, 41.01s/it]

Your document is Saved to Chroma
Execution time: 1.7570 seconds
Your are querying for: Juan Manuel Santos


 96%|█████████▌| 75/78 [1:25:09<01:27, 29.24s/it]

Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.7591 seconds
Error parsing JSON: '{' was never closed (<unknown>, line 1)
Your are querying for: Sihamoni Norodom
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.2414 seconds
Number of Tokens in Input: 1247
Number of Tokens in output: 2046
Article Generation Time: 81.4241 seconds


 97%|█████████▋| 76/78 [1:26:34<01:31, 45.94s/it]

Your document is Saved to Chroma
Execution time: 2.2517 seconds
Your are querying for: Laura Chinchilla
Target Collection Is: politician_json_files
Chroma Retrieval Time: 1.6502 seconds
Number of Tokens in Input: 908
Number of Tokens in output: 1640
Article Generation Time: 71.8533 seconds


 99%|█████████▊| 77/78 [1:27:49<00:54, 54.66s/it]

Your document is Saved to Chroma
Execution time: 1.5119 seconds
Your are querying for: Otto Gessler
Target Collection Is: politician_json_files
Chroma Retrieval Time: 2.3097 seconds
Number of Tokens in Input: 1104
Number of Tokens in output: 1418
Article Generation Time: 30.9411 seconds


100%|██████████| 78/78 [1:28:24<00:00, 68.00s/it]

Your document is Saved to Chroma
Execution time: 1.9394 seconds



