In [1]:
%pip install termcolor

Note: you may need to restart the kernel to use updated packages.


In [2]:
from typing import Dict, Union, List
import json
import pinecone
import boto3
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import Document
from termcolor import colored
from pinecone_text.sparse import SpladeEncoder
from pinecone_text.hybrid import hybrid_convex_scale


  from tqdm.autonotebook import tqdm


In [3]:
# create funciton to retriev secrete

def get_secret(secret_name: str) -> Union[str, Dict]:
    secret_name = secret_name
    region_name = "us-east-1"
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name
    )
    get_secret_value_response = client.get_secret_value(
        SecretId=secret_name
    )
    secret_string = get_secret_value_response['SecretString']
    try:
        return json.loads(secret_string)
    except:
        return secret_string

In [4]:
INDEX_NAME = "cheating-demo"
pinecone_api_key = get_secret("dev/tai_service/pinecone_db/api_key")
pinecone.init(api_key=pinecone_api_key, environment="us-east-1-aws")
try:
    client = pinecone.create_index(
        name=INDEX_NAME,
        dimension=1536,
        metric="dotproduct",
        pod_type="s1.x1",
        pods=1,
        replicas=1,
    )
except pinecone.exceptions.ApiException as e:
    print(e)

In [5]:
turned_in_assignment = """\
A Comparative Analysis of the American Civil War and World War 1

Introduction:
The aim of this paper is to compare and contrast two significant events in history, the American Civil War and World War 1. Both conflicts left a lasting impact on their respective nations and the entire world. By examining their causes, strategies, and outcomes, this essay seeks to highlight the similarities and differences between these momentous events.

Causes:
The American Civil War erupted due to underlying tensions between the Northern and Southern states regarding slavery, states' rights, and economic disparities. On the other hand, World War 1 was triggered by a complex web of factors, including militarism, alliances, imperialism, and the assassination of Archduke Franz Ferdinand.

Strategies:
During the American Civil War, battlefield tactics consisted primarily of traditional methods, including frontal assaults and trench warfare, alongside the use of rifled muskets and ironclads. World War 1, however, introduced modern warfare techniques such as trench networks, barbed wire, poison gas, machine guns, and tanks, revolutionizing the face of warfare.

Outcomes:
As a consequence of the American Civil War, the abolition of slavery occurred, along with an increased centralized power of the federal government. In contrast, World War 1 led to significant geopolitical shifts, the redrawing of national boundaries, the collapse of empires, and the emergence of new ideologies such as communism.

Conclusion:
In conclusion, while the American Civil War and World War 1 were distinct in their causes, strategies, and outcomes, they both served as pivotal moments in history. The American Civil War paved the way for societal change, emphasizing the importance of equality and the role of the federal government. On the other hand, World War 1 reshaped the global order and set the stage for future conflicts. By comparing and contrasting these two wars, we can gain valuable insights into the transformative power of conflict.
"""

In [6]:
honest_student = """\
Student: Hi there! I'm working on my assignment comparing the American Civil War and World War 1. Do you have any suggestions to help me brainstorm and organize my thoughts?
ChatBot: Of course! I'd be happy to assist you. Let's start by discussing the main points you want to cover in your essay. What aspects of these wars are you planning to compare and contrast?

Student: I was thinking of focusing on the causes, strategies, and outcomes of both wars. I find these aspects really interesting and important to understand their impact.
ChatBot: Great choice! To analyze the causes, you can discuss how social, economic, and political factors led to the American Civil War, while the complex web of militarism, alliances, and imperialism triggered World War 1. For the strategies, consider highlighting the traditional tactics used during the Civil War, versus the introduction of modern warfare techniques during World War 1. Finally, don't forget to explore the outcomes and the long-lasting consequences of both conflicts.

Student: That sounds like a solid structure. I'll incorporate those points. Can you help me refine the introduction and conclusion to make it engaging and impactful?
ChatBot: Absolutely! For the introduction, you can try starting with a captivating statement about the significance of studying history and the importance of understanding the events that shaped our world. In the conclusion, you can emphasize how comparing and contrasting these two wars provides valuable insights into the transformative power of conflict and the far-reaching consequences it can have on societies.

Student: I really like those ideas! Thank you so much for your guidance. I'll make these revisions and then start working on the body of the essay. Is there anything else you suggest I consider?

ChatBot: One suggestion is to include specific examples and evidence to support your comparisons and contrasts. You could also highlight any lesser-known facts or interesting anecdotes that help bring the wars to life. Remember to keep your writing concise and focused within the given word count of 300 words.

Student: Thank you for all your help! I feel more confident now in completing my assignment. I appreciate your guidance and support.
ChatBot: You're welcome! I'm glad I could assist you. If you have any more questions or need further assistance, feel free to ask. Good luck with your assignment!
"""

cheating_student = """\
Student: Hey, I need some help with my assignment comparing the American Civil War and World War 1. Can you generate an introduction for me?

ChatBot: Of course! Here's an introduction for your paper:

"In this essay, I will provide an in-depth analysis and comparison of the American Civil War and World War 1. These two significant historical events had a profound impact on their respective nations and the entire world. By examining their causes, strategies, and outcomes, we can gain valuable insights into the similarities and differences between them."

Student: Great, thanks! Now, can you help me with the section on causes?

ChatBot: Certainly! Here's a section on the causes you can include:

"The American Civil War was fueled by tensions regarding slavery, states' rights, and economic disparities, while World War 1 was primarily triggered by militarism, alliances, imperialism, and the assassination of Archduke Franz Ferdinand."

Student: Perfect. How about the strategies section?

ChatBot: Here's a section on strategies you can incorporate:

"During the American Civil War, traditional tactics like frontal assaults and trench warfare were utilized, alongside the use of rifled muskets and ironclads. On the other hand, World War 1 introduced modern warfare techniques such as trench networks, barbed wire, poison gas, and tanks, revolutionizing the face of warfare."

Student: Excellent! Now I jubst need the outcomes section.

ChatBot: Here's a section on the outcomes:

"The American Civil War led to the abolition of slavery and an increased centralized power of the federal government. In contrast, World War 1 resulted in geopolitical shifts, the redrawing of national boundaries, the collapse of empires, and the emergence of new ideologies like communism."

Student: Awesome! With this help, my assignment will be a breeze. Last things, can you write me a conclusion?

Chatbot: Sure thing! Here's a conclusion for your paper:

"In conclusion, while the Civil War and WW1 were unique in their causes, leadership, and outcomes, they serve as pivotal moments throughout all time. The Civil War paved the way for change, highlighting the role of the federal government. On the other hand, WW1 reshaped the world and set the stage for future conflicts. By comparing these two wars, we observe how conflict can transform societies."

Student: Thanks so much! I really appreciate your help!
"""

In [7]:
CHUNK_OVERLAP = 0
splitter = RecursiveCharacterTextSplitter(keep_separator=False, chunk_size=500, chunk_overlap=CHUNK_OVERLAP)
embedder = OpenAIEmbeddings(openai_api_key=get_secret("dev/tai_service/openai/api_key"))

In [8]:
assignment_doc = Document(page_content=turned_in_assignment)
assignment_docs = splitter.split_documents([assignment_doc])
assignment_embeddings = embedder.embed_documents([doc.page_content for doc in assignment_docs])
sparse_embeddings = SpladeEncoder().encode_documents([doc.page_content for doc in assignment_docs])
vectors = []
for _id, triplet in enumerate(zip(assignment_docs, assignment_embeddings, sparse_embeddings)):
    doc, embedding, sparse_embedding = triplet
    vectors.append({
        "id": str(_id),
        "values": embedding,
        "metadata": {"text": doc.page_content},
        "sparse_values": sparse_embedding,
    })
index = pinecone.Index(INDEX_NAME)
index.upsert(vectors=vectors)


{'upserted_count': 7}

In [9]:
cheating_student_doc = Document(page_content=cheating_student)
honest_student_doc = Document(page_content=honest_student)
cheating_student_docs = splitter.split_documents([cheating_student_doc])
honest_student_docs = splitter.split_documents([honest_student_doc])
cheating_student_embeddings = embedder.embed_documents([doc.page_content for doc in cheating_student_docs])
cheating_student_sparse_embeddings = SpladeEncoder().encode_queries([doc.page_content for doc in cheating_student_docs])
honest_student_embeddings = embedder.embed_documents([doc.page_content for doc in honest_student_docs])
honest_student_sparse_embeddings = SpladeEncoder().encode_queries([doc.page_content for doc in honest_student_docs])

In [10]:
cheating_emoji = "❌"
honest_emoji = "✅"
student_emoji = "🏫"
ALPHA = 0.25
MAX_SPARSE_SCORE = 76 # not sure what this upper limit actually is. It might be domain specific.
PERCENTAGE = 0.7
CHEATING_THRESHOLD = ALPHA * PERCENTAGE + (1 - ALPHA) * MAX_SPARSE_SCORE * PERCENTAGE

def print_scores_for_student(student_docs: List[Document], student_embeddings: List[float], sparse_embeddings: List[float], student_number: int):
    scores = []
    print(colored(f"{student_emoji} Student {student_number} {student_emoji}", "blue"))
    likely_cheating = False
    index = pinecone.Index(INDEX_NAME)
    for student_doc, student_embedding, sparse_vector in zip(student_docs, student_embeddings, sparse_embeddings):
        hybrid_dense, hybrid_sparse = hybrid_convex_scale(student_embedding, sparse_vector, alpha=ALPHA)
        results = index.query(hybrid_dense, sparse_vector=hybrid_sparse, top_k=1, include_metadata=True)
        results = results.to_dict()
        matches = results["matches"]
        similarity_score = matches[0]['score']
        scores.append(similarity_score)
        above_threshold = True if similarity_score > CHEATING_THRESHOLD else False
        color = "red" if above_threshold else "green"
        if not likely_cheating:
            likely_cheating = True if similarity_score > CHEATING_THRESHOLD else False
        print(colored(f"Chat Message Contents: ", "blue"))
        print(f"{student_doc.page_content}")
        print(colored(f"Similarity score: {similarity_score}", color))
        if above_threshold:
            print("Part of assignment similar to student chats: ", end="")
            print(colored(f"{matches[0]['metadata']['text']}", color))
        print("\n")
    color = "red" if likely_cheating else "green"
    emoji = cheating_emoji if likely_cheating else honest_emoji
    if likely_cheating:
        print(colored(f"{emoji} WARNING: This student may be cheating! {emoji}", color))
    print("\n\n\n")

print(colored(f"Cheating Threshold: {round(CHEATING_THRESHOLD)}\n\n\n", "yellow"))
print_scores_for_student(cheating_student_docs, cheating_student_embeddings, cheating_student_sparse_embeddings, student_number=1)
print_scores_for_student(honest_student_docs, honest_student_embeddings, honest_student_sparse_embeddings, student_number=2)

[33mCheating Threshold: 40


[0m
[34m🏫 Student 1 🏫[0m
[34mChat Message Contents: [0m
Student: Hey, I need some help with my assignment comparing the American Civil War and World War 1. Can you generate an introduction for me?

ChatBot: Of course! Here's an introduction for your paper:
[32mSimilarity score: 15.4994297[0m


[34mChat Message Contents: [0m
"In this essay, I will provide an in-depth analysis and comparison of the American Civil War and World War 1. These two significant historical events had a profound impact on their respective nations and the entire world. By examining their causes, strategies, and outcomes, we can gain valuable insights into the similarities and differences between them."

Student: Great, thanks! Now, can you help me with the section on causes?

ChatBot: Certainly! Here's a section on the causes you can include:
[32mSimilarity score: 27.6989365[0m


[34mChat Message Contents: [0m
"The American Civil War was fueled by tensions regarding slav

In [12]:
index = pinecone.Index(INDEX_NAME)
index.delete(delete_all=True)
confirm = input("Are you sure you want to delete the index? (y/n)")
if confirm.lower() == "y":
    pinecone.delete_index(INDEX_NAME)
    print("Index deleted.")
else:
    print("Index not deleted.")

Index deleted.
