## Create a new Index

In [13]:
from Helper.timing import Timing
from Helper.ORM import fetch_records
from Pipeline.index.index import Index

with Timing('Creating Index...'):
    records = fetch_records(model="Corpus")
    documents = [rec.text for rec in records]
    index = Index(records)
    index.save(model_name="Pipeline/index/Saved/wiki_model.pickle",
               tfidf_name="Pipeline/index/Saved/wiki_tfidf.pickle",
               keys_name="Pipeline/index/Saved/wiki_keys.pickle")
    
print(index.tfidf_matrix.shape[1])

Creating Index... elapsed time: 391.8957905769348
794562


## Load existing Index

In [2]:
from Pipeline.index.index import Index

index = Index.load(model_name="Pipeline/index/Saved/Lower_Stopwords_Processed/wiki_model369721.pickle",
                   tfidf_name="Pipeline/index/Saved/Lower_Processed/wiki_tfidf369721.pickle",
                   keys_name="Pipeline/index/Saved/Lower_Processed/wiki_keys369721.pickle")

print(index.tfidf_matrix.shape[1])

794562


# Evaluate the currently loaded index

In [5]:
from Pipeline.Evaluation.eval import evaluate
from Helper.timing import Timing

with Timing('Evaluating Documents...'):
    ev = evaluate(index, 
                  qrels_path="Pipeline/Evaluation/Wiki/qrels_wiki",
                  queries_path="Pipeline/Evaluation/Wiki/queries.csv", 
                  run_path="Pipeline/Evaluation/Wiki/run_wiki_lower", 
                  create_run_file_bool=False)

    print(ev)

{AP(rel=2): 0.43642916601711607, RR(rel=2): 0.43642916601711607, P@10: 0.18947368421052488, R@10: 0.1733217973386691}
Evaluating Documents... elapsed time: 0.28061437606811523


# Show Clustered Index Plots

In [None]:
from Clustering import cluster_index

cluster_index(index)

# Search for a certain term

In [20]:
with Timing('searching...'):
  top_documents = index.search('IRAQ')
  for doc_id, score, idx in top_documents:
      print(f"Document {doc_id}: Similarity Score {score}")

Document 1358658: Similarity Score 0.6522814603121104
Document 689169: Similarity Score 0.6440234361494456
Document 83127: Similarity Score 0.6406980610023064
Document 1042652: Similarity Score 0.6240635000129057
Document 2103581: Similarity Score 0.5842387741819973
Document 1348390: Similarity Score 0.581838758742348
Document 91022: Similarity Score 0.5768447940575882
Document 2416624: Similarity Score 0.5702820126486285
Document 2409530: Similarity Score 0.5324332217785986
Document 1045421: Similarity Score 0.5305304296199871
Document 2098473: Similarity Score 0.5154709735480356
searching... elapsed time: 1.9454967975616455
