In [1]:
"""
This example computes the score between a query and all possible
sentences in a corpus using a Cross-Encoder for semantic textual similarity (STS).
It output then the most similar sentences for the given query.
"""

import numpy as np

from sentence_transformers.cross_encoder import CrossEncoder

# Pre-trained cross encoder
model = CrossEncoder("cross-encoder/stsb-distilroberta-base")

# We want to compute the similarity between the query sentence
query = "A man is eating pasta."

# With all sentences in the corpus
corpus = [
    "A man is eating food.",
    "A man is eating a piece of bread.",
    "The girl is carrying a baby.",
    "A man is riding a horse.",
    "A woman is playing violin.",
    "Two men pushed carts through the woods.",
    "A man is riding a white horse on an enclosed ground.",
    "A monkey is playing drums.",
    "A cheetah is running behind its prey.",
]

# 1. We rank all sentences in the corpus for the query
ranks = model.rank(query, corpus)

# Print the scores
print("Query:", query)
for rank in ranks:
    print(f"{rank['score']:.2f}\t{corpus[rank['corpus_id']]}")

# 2. Alternatively, you can also manually compute the score between two sentences
sentence_combinations = [[query, sentence] for sentence in corpus]
scores = model.predict(sentence_combinations)

# Sort the scores in decreasing order to get the corpus indices
ranked_indices = np.argsort(scores)[::-1]
print("scores:", scores)
print("indices:", ranked_indices)

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Query: A man is eating pasta.
0.67	A man is eating food.
0.34	A man is eating a piece of bread.
0.08	A man is riding a horse.
0.07	A man is riding a white horse on an enclosed ground.
0.01	The girl is carrying a baby.
0.01	Two men pushed carts through the woods.
0.01	A monkey is playing drums.
0.01	A woman is playing violin.
0.01	A cheetah is running behind its prey.
scores: [0.6732372  0.34102544 0.00542465 0.0756934  0.00525378 0.00536815
 0.06676235 0.00534825 0.00516717]
indices: [0 1 3 6 2 5 7 4 8]


In [2]:
from dotenv import load_dotenv
import os
from sentence_transformers import SentenceTransformer


# Load environment variables from .env file
load_dotenv()

openai_key = os.getenv("OPENAI_API_KEY")
openai_organization = os.getenv("OPENAI_ORGANIZATION")
openai_project_id = os.getenv("OPENAI_PROJECT_ID")

splitter_name = os.getenv("SPLITTER")

embeddingModelName = "paraphrase-MiniLM-L6-v2"
embeddingModel = SentenceTransformer(embeddingModelName)

collection_name = f"levycampaign_{splitter_name.lower()}_{embeddingModelName.replace('-', '')}"

print(f"Qdrant Collection name: {collection_name}")

llmmodel = "gpt-4o-mini"

Qdrant Collection name: levycampaign_sentencesplitter_paraphraseMiniLML6v2


In [11]:

from sentence_transformers import SentenceTransformer
from qdrant_client import models, QdrantClient
from dotenv import load_dotenv
import os
from openai import OpenAI
from sentence_transformers import SentenceTransformer
from datetime import datetime

# https://thepythoncode.com/article/calculate-rouge-score-in-python
from rouge_score import rouge_scorer
import numpy as np

qdrant = QdrantClient(path="./qdrant_data")

In [53]:
qdrant.close()

In [9]:


def debug_hit (score, payload, filename):
    with open(filename, "w") as file:
        file.write("Score: " + str(score) + "\n")
        file.write("Payload: " + str(payload) + "\n")

def retrieveDocuments (query):
    hits = qdrant.search(
        collection_name=collection_name,
        query_vector=embeddingModel.encode(query).tolist(),
        limit=10,
    )

    sorted_documents = sorted(hits, key=lambda hit: hit.score, reverse=True)

    for index, hit in enumerate(sorted_documents):
        debug_hit(hit.score, hit.payload, f"debug/hit_{index}.txt")

    return sorted_documents

In [46]:
query = "Tell me which scenarios are available in the game"

In [47]:
docs = retrieveDocuments(query)
docs

[ScoredPoint(id=2, version=0, score=0.46452985186040163, payload={'source': 'Players must keep an eye on the calendar and \nreward lords to keep them in the field. \nAn advanced rule adds detail on vassal forces’ length of service. \nPlayers may use optional screens that hide the strength of their \nlords for greater fog of war. Though Almoravid has no solitaire \nsystem, the standard game is solitaire friendly. \nA player aid foldout and a separate Taifa reference sheet sum-\nmarize key game functions. The last few pages of this rule book \nprovide scenarios and key terms. A background booklet has sup-\nporting material such as examples of play, tips on solitaire and \nteam play, detailed histories of the era, and a separate minigame \non its greatest battle.\nThe page opposite lists rules changes from Levy & Campaign \nSeries V olume I, Nevsky. In addition, this icon \uf075 precedes such \nnew rules sections and concepts in this rules booklet.\n1.1 General Course of Play\nIn Almoravi

In [16]:
import numpy as np
import torch

# https://www.sbert.net/examples/applications/cross-encoder/README.html
from sentence_transformers.cross_encoder import CrossEncoder

# Pre-trained cross encoder
#model = CrossEncoder("cross-encoder/ms-marco-TinyBERT-L-2-v2", default_activation_function=torch.nn.Sigmoid())
model = CrossEncoder("cross-encoder/stsb-distilroberta-base", default_activation_function=torch.nn.Sigmoid())

In [44]:
docsource = [doc.payload["source"] for doc in docs]

In [48]:
ranks = model.rank(query, docsource)
ranks

[{'corpus_id': 1, 'score': 0.6133897},
 {'corpus_id': 6, 'score': 0.574987},
 {'corpus_id': 2, 'score': 0.5650815},
 {'corpus_id': 3, 'score': 0.55435777},
 {'corpus_id': 9, 'score': 0.5427601},
 {'corpus_id': 4, 'score': 0.51015484},
 {'corpus_id': 7, 'score': 0.49552408},
 {'corpus_id': 8, 'score': 0.49371135},
 {'corpus_id': 5, 'score': 0.4223103},
 {'corpus_id': 0, 'score': 0.40432304}]

In [36]:
print("Query:", query)
for rank in ranks:
    print(f"{rank['score']:.2f}\t{docsource[rank['corpus_id']][:100]}")

Query: In what years does the game take place?
0.64	2.2.1 Seasons. Each Scenario covers one or more Seasons—
Spring, Summer, Autumn, and Winter. Each Se
0.61	4.0 CAMPAIGN
After Levy, players conduct that 40 Days’ Campaign. Complete 
the steps below, then pro
0.56	NOTE: Battles last at least one Round.
• This Round, 
the Enemy gains a Pursuit advantage against th
0.55	The owning 
player chooses each Lord’s fate among the above, 
within the following requirements. 
• 
0.54	© 2021 GMT Games, LLC
Summary of Almoravid Changes from Nevsky  ....................2
1. Introductio
0.53	If they lose a Battle there, they Withdraw or Retreat normal-
ly (4.3.4, 4.4.3). NOTE: Only a Marsha
0.51	Whenever a Lord Routs to create a new Flanking situa-
tion, apply remaining Hits accordingly. 
PROTE
0.49	6.3.4 Plowing. At the end of the second 40 Days of Winter (box 
8), each Lord at a Siege (only) redu
0.48	CARDS VERSUS RULES: Whenever card text contradicts the 
rules, the card takes precedence.
MARKERS: S

In [49]:
sorted_ranks = sorted(ranks, key=lambda x: x['score'], reverse=True)
sorted_ranks

[{'corpus_id': 1, 'score': 0.6133897},
 {'corpus_id': 6, 'score': 0.574987},
 {'corpus_id': 2, 'score': 0.5650815},
 {'corpus_id': 3, 'score': 0.55435777},
 {'corpus_id': 9, 'score': 0.5427601},
 {'corpus_id': 4, 'score': 0.51015484},
 {'corpus_id': 7, 'score': 0.49552408},
 {'corpus_id': 8, 'score': 0.49371135},
 {'corpus_id': 5, 'score': 0.4223103},
 {'corpus_id': 0, 'score': 0.40432304}]

In [52]:
docsource

['Players must keep an eye on the calendar and \nreward lords to keep them in the field. \nAn advanced rule adds detail on vassal forces’ length of service. \nPlayers may use optional screens that hide the strength of their \nlords for greater fog of war. Though Almoravid has no solitaire \nsystem, the standard game is solitaire friendly. \nA player aid foldout and a separate Taifa reference sheet sum-\nmarize key game functions. The last few pages of this rule book \nprovide scenarios and key terms. A background booklet has sup-\nporting material such as examples of play, tips on solitaire and \nteam play, detailed histories of the era, and a separate minigame \non its greatest battle.\nThe page opposite lists rules changes from Levy & Campaign \nSeries V olume I, Nevsky. In addition, this icon \uf075 precedes such \nnew rules sections and concepts in this rules booklet.\n1.1 General Course of Play\nIn Almoravid, two players (or teams) take the roles of Christians \n(yellow) and Musli

In [51]:
[ docsource[rank['corpus_id']] for rank in sorted_ranks ]

['4.0 CAMPAIGN\nAfter Levy, players conduct that 40 Days’ Campaign. Complete \nthe steps below, then proceed to the next Levy (3.0). The Se-\nquence of Play page of the foldout summarizes the steps.\nCAPABILITY DISCARD: The players (Christian first) must \nselect and discard any Capability cards they have in excess of \ntheir number of Mustered Lords—not including any “This Lord” \nCapabilities (3.4.4). Compare the number of cards tucked under \na side’s map edge to its number of Lord mats in use—the player \nmust discard any excess.\nCAMPAIGN STEPS:\n• Plan:\n Each side builds a Campaign Plan—an ordered stack of\nCommand cards—and may designate Lieutenants to lead other\nLords (4.1.3); Muslims may deposit Taifa Coin (4.1.4).\n• Command Activation:\n Starting with the Christian player, one\nside flips its top Command card and executes (if desired) Com-\nmand actions (4.2-4.7) by the Lord on that card or Passes back\nto the other side if a Pass card.\no Actions:\n One side is Active, us

In [31]:
print(docsource[ranks[0]['corpus_id']][:80])
print("\n")
print(docsource[ranks[1]['corpus_id']][:80])

2.2.1 Seasons. Each Scenario covers one or more Seasons—
Spring, Summer, Autumn,


4.0 CAMPAIGN
After Levy, players conduct that 40 Days’ Campaign. Complete 
the s


In [27]:
ranks

[{'corpus_id': 3, 'score': 0.6435344},
 {'corpus_id': 0, 'score': 0.6057899},
 {'corpus_id': 5, 'score': 0.5806104},
 {'corpus_id': 8, 'score': 0.57328206},
 {'corpus_id': 9, 'score': 0.5492988},
 {'corpus_id': 2, 'score': 0.54861087},
 {'corpus_id': 7, 'score': 0.54226846},
 {'corpus_id': 6, 'score': 0.53730893},
 {'corpus_id': 1, 'score': 0.49005497},
 {'corpus_id': 4, 'score': 0.38747823}]

In [22]:
ranked_indices = np.argsort(ranks)[::-1]
print("scores:", ranks)
print("indices:", ranked_indices)

TypeError: '<' not supported between instances of 'dict' and 'dict'