Usage:
1. Run `python api.py` to start the server.
2. Run this script to test the server.
3. You don't need to consider quota limit for rag.

You MUST keep the logs of this file in your submission.

In [1]:
from typing import Dict

import requests
import torch
import tqdm

In [2]:
class Server:
    def __init__(self, base_url: str = "http://localhost:8000"):
        self.base_url = base_url.rstrip("/")
        
    def get_embedding(self, prompt: str) -> Dict:
        """get embedding"""
        # TODO: get embedding function call
        # you need to post a request to the server
        # and parse the response in sync way
        # the response is a json with the following format:
        # {
        #     "embedding": List[float],
        # }
        # ==== start your code here ====
        data = {
            "prompt": prompt
        }
        response = requests.post(self.base_url + "/get_embedding", json=data)
        embedding = response.text
        result = {
            "embedding": eval(embedding)["embedding"]
        }
        return result
        # ==== end of your code ====

    def generate(self, prompt: str) -> Dict:
        """generate"""
        # TODO: generate function call
        # you need to post a request to the server
        # and parse the response in async way
        # the response is a json with the following format:
        # {
        #     "status": "success" or "error",
        #     "text": "the generated text"
        # }
        # ==== start your code here ====
        data = {
            "prompt": prompt
        }
        response = requests.post(self.base_url + "/generate", json=data)
        result = response.text
        result = {
            "status": eval(result)["status"],
            "text": eval(result)["text"]
        }
        return result
        # ==== end of your code ====


server = Server()

In [3]:
DATABASE = {}


def construct_database(file_path: str):
    # TODO: construct database
    # you need to read the file and split the file into several paragraphs
    # then construct the database by calling the get_embedding function: emb = server.get_embedding(text)["embedding"]
    # the database is a dictionary with the following format:
    # {
    #     "prompt1": torch.Tensor of shape [dim,],
    #     "prompt2": torch.Tensor of shape [dim,],
    #     ...
    # }
    # ==== start your code here ====
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
        paragraphs = [p.strip() for p in content.split('\n\n') if p.strip()]
    for paragraph in paragraphs:
        emb = server.get_embedding(paragraph)["embedding"]
        DATABASE[paragraph] = emb

    # ==== end of your code ====

construct_database("openai_wiki.txt")

In [7]:
def cosine_similarity(a: torch.Tensor, b: torch.Tensor) -> float:
    # TODO: cosine similarity function
    # you need to calculate the cosine similarity between two tensors
    # ==== start your code here ====
    dot_product = a@b.T
    magnitude_a = torch.norm(a)
    magnitude_b = torch.norm(b)
    cos_sim = dot_product / (magnitude_a * magnitude_b)
    return cos_sim.item()
    # ==== end of your code ====


def rag(prompt: str):
    prompt_embedding = torch.tensor(server.get_embedding(prompt)["embedding"]).cuda()

    topk = 5
    # TODO: rag prompt
    # you first need to find the topk similar prompt in the database by calculating the cosine similarity
    # then you need to construct the rag prompt by adding the topk similar prompt with the original prompt
    # ==== start your code here ====
    
    rag_prompt = ""
    prompts = []
    similarities = []
    for pr, emb in DATABASE.items():
        prompts.append(pr)
        similarities.append(cosine_similarity(prompt_embedding.detach().cpu(), torch.tensor(emb)))
    

    topk_indices = torch.topk(torch.tensor(similarities), topk).indices
    selected_prompts = " ".join([prompts[i] for i in topk_indices])
    rag_prompt = selected_prompts + prompt
    
    # ==== end of your code ====
    answer = server.generate(rag_prompt)['text']
    print(f"Question: {prompt}\n\nRAG Prompt: {rag_prompt}\n\nAnswer: {answer}")

rag("When was Sam Altman removed as CEO?")


Question: When was Sam Altman removed as CEO?

RAG Prompt: Controversies
Firing of Altman
Further information: Removal of Sam Altman from OpenAI
On November 17, 2023, Sam Altman was removed as CEO when its board of directors (composed of Helen Toner, Ilya Sutskever, Adam D'Angelo and Tasha McCauley) cited a lack of confidence in him. Chief Technology Officer Mira Murati took over as interim CEO. Greg Brockman, the president of OpenAI, was also removed as chairman of the board[242][243] and resigned from the company's presidency shortly thereafter.[244] Three senior OpenAI researchers subsequently resigned: director of research and GPT-4 lead Jakub Pachocki, head of AI risk Aleksander Madry, and researcher Szymon Sidor.[245][246] On June 13, 2024, OpenAI announced that Paul Nakasone, the former head of the NSA was joining the company's board of directors. Nakasone also joined the company's security subcommittee.[95] In October 2023, Sam Altman and Peng Xiao, CEO of the Emirati AI firm G