<a href="https://colab.research.google.com/github/seanreed1111/colab-demos/blob/master/Copy_of_resumeGPT_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install -qqq loguru textract tiktoken openai azure-ai-ml mlflow azureml-sdk azureml-mlflow #ast

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.0/60.0 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.9/71.9 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.1/6.1 MB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.7/17.7 MB[0m [31m16.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m814.0/814.0 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.8/32.8 MB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.9/106.9 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━

# before running this notebook, UPLOAD these files
- openai.env
- azure.env
- Resumes stored in "Resumes" dir
 

In [None]:
import os,argparse,loguru, json, time, datetime, openai
from pathlib import Path
from loguru import logger

In [None]:
def maybe_load_aml_env_vars(env_path=None):
  import os, json
  try:
    with open(env_path, "r") as f:
      env_vars = json.load(f)
    os.environ["resource_group"] = env_vars["resource_group"]
    os.environ["workspace_name"] = env_vars["workspace_name"]
    os.environ["subscription_id"] = env_vars["subscription_id"]
    if (os.getenv("resource_group") and os.getenv("workspace_name")
    and os.getenv("subscription_id")):
      return True
  except Exception as e:
    logger.error(f"{e}")
    return False

In [None]:
def set_open_ai_key(env_path=None):
  import json, os
  from pathlib import Path
  try:
    with open(env_path, "r") as f:
        env_vars = json.load(f)
    os.environ["OPENAI_API_KEY"] = env_vars["OPENAI_API_KEY"]
    openai.api_key = os.environ["OPENAI_API_KEY"]
    openai.Model.list() #test a random command on the openai API
    return True
  except Exception as e:
    logger.error(f"{e}")
  return False

def test_set_open_ai_key(key_path=None):
  openai.api_key = None #disconnect from api key if already registered
  try:
    set_open_ai_key(key_path)
    openai.Model.list()
    return True
  except Exception as e:
    logger.error(f"{e}")
  return False


In [None]:
def maybe_get_ml_client(env_path=None):
  # this is a mix of sdk v1 and v2. Try to consolidate 
  import json, os, mlflow
  from pathlib import Path
  from azureml.core import Workspace 
  from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
  from azure.ai.ml import MLClient

  if not env_path: return None

  ws = Workspace.from_config(env_path)
  tracking_uri = ws.get_mlflow_tracking_uri()
  mlflow.set_tracking_uri(tracking_uri)

  try:
      credential = DefaultAzureCredential()
  except Exception as ex:
      # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not working
      credential = InteractiveBrowserCredential()

  is_loaded = maybe_load_aml_env_vars(env_path)
  if is_loaded:
    try:
      ml_client = MLClient(
          subscription_id=os.getenv("subscription_id"),
          resource_group_name=os.getenv("resource_group"),
          workspace_name=os.getenv("workspace_name"),
          credential=credential,
      )
      return ml_client
    except Exception as e:
      logger.error(f"{e}")
      return None

In [None]:
def maybe_setup_azure_env(azure_env_path=None):
  # setup azure ml env if azure credentials are available.
  if azure_env_path and azure_env_path.is_file() and maybe_load_aml_env_vars(azure_env_path):
    ml_client = maybe_get_ml_client(azure_env_path)
    if ml_client:
      #do a random test to check that ml_client and mlflow are playing nicely together
      import mlflow
      experiment_name = 'mlflow-2'
      mlflow.set_experiment(experiment_name)
      from random import random

      with mlflow.start_run() as mlflow_test_run:
          mlflow.log_param("hello_param", "world")
          mlflow.log_metric("hello_metric2", random())
          os.system(f"echo 'hello world2' > helloworld2.txt")
          mlflow.log_artifact("helloworld2.txt")
      return True
  return False

In [None]:
# setup
azure_env_path, openai_env_path, ml_client, openai.api_key = None, None, None , None
cwd = Path.cwd()
resume_path = cwd / "Resumes"
resume_path.mkdir(exist_ok=True)

# azure_env_path = cwd / "azure.env" ##uncomment if providing azure env
openai_env_path = cwd/ "openai.env"
maybe_setup_azure_env(azure_env_path)
set_open_ai_key(openai_env_path)






True

# SPLIT SECTIONS
source: Embedding_Wikipedia_articles_for_search.ipynb 
https://colab.research.google.com/drive/1EJMtCmF8jZc2Y-c1RaBxFSCTPcjzjJf4#scrollTo=TOVSYkDur9zA

Next, we'll recursively split long sections into smaller sections.

There's no perfect recipe for splitting text into sections.

Some tradeoffs include:
- Longer sections may be better for questions that require more context
- Longer sections may be worse for retrieval, as they may have more topics muddled together
- Shorter sections are better for reducing costs (which are proportional to the number of tokens)
- Shorter sections allow more sections to be retrieved, which may help with recall
- Overlapping sections may help prevent answers from being cut by section boundaries

Here, we'll use a simple approach and limit sections to 1,000 tokens each by default, recursively halving any sections that are too long. To avoid cutting in the middle of useful sentences, we'll split along paragraph boundaries when possible.

### extract text from pdf

In [None]:
import textract, os, openai, tiktoken

In [None]:
#TODO walk the directory to get all the filenames, use as names of the people in tagging and document retrieval
#TODO use regex to get rid of excess spaces and new lines. only one new line needed per line.
file_names = ["Jesse_Jayant.pdf", "Nadia_Smythe.pdf", "Naimal_Chisti.pdf", "SeanReed.pdf"]
file_paths = [(resume_path / file) for file in file_names];print(file_paths)
names = [path.stem.lower() for path in file_paths];print(names)

# Extract the raw text from each PDF using textract

texts =[textract.process((file_path), method='pdfminer').decode('utf-8') for file_path in file_paths]

#TODO Do more cleaning with regex
texts = [text.strip().replace("  ", " ") for text in texts]
#create tuple[list[str],str]
clean_texts = [(['NameOnResume: '+ item1], item2) for item1,item2 in zip(names,texts)] 

len(clean_texts)

[PosixPath('/content/Resumes/Jesse_Jayant.pdf'), PosixPath('/content/Resumes/Nadia_Smythe.pdf'), PosixPath('/content/Resumes/Naimal_Chisti.pdf'), PosixPath('/content/Resumes/SeanReed.pdf')]
['jesse_jayant', 'nadia_smythe', 'naimal_chisti', 'seanreed']


4

In [None]:
GPT_MODEL = 'text-ada-001'  # only matters insofar as it selects which tokenizer to use

def num_tokens(text: str, model: str = GPT_MODEL) -> int:
    """Return the number of tokens in a string."""
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(text))

def halved_by_delimiter(string: str, delimiter: str = "\n") -> list[str, str]:
    """Split a string in two, on a delimiter, trying to balance tokens on each side."""
    chunks = string.split(delimiter)
    if len(chunks) == 1:
        return [string, ""]  # no delimiter found
    elif len(chunks) == 2:
        return chunks  # no need to search for halfway point
    else:
        total_tokens = num_tokens(string)
        halfway = total_tokens // 2
        best_diff = halfway
        for i, chunk in enumerate(chunks):
            left = delimiter.join(chunks[: i + 1])
            left_tokens = num_tokens(left)
            diff = abs(halfway - left_tokens)
            if diff >= best_diff:
                break
            else:
                best_diff = diff
        left = delimiter.join(chunks[:i])
        right = delimiter.join(chunks[i:])
        return [left, right]


def truncated_string(
    string: str,
    model: str,
    max_tokens: int,
    print_warning: bool = True,
) -> str:
    """Truncate a string to a maximum number of tokens."""
    encoding = tiktoken.encoding_for_model(model)
    encoded_string = encoding.encode(string)
    truncated_string = encoding.decode(encoded_string[:max_tokens])
    if print_warning and len(encoded_string) > max_tokens:
        logger.warning(f"Warning: Truncated string from {len(encoded_string)} tokens to {max_tokens} tokens.")
    return truncated_string

In [None]:
def split_strings_from_subsection(
    subsection: tuple[list[str], str],
    max_tokens: int = 1000,
    model: str = GPT_MODEL,
    max_recursion: int = 5,
) -> list[str]:
    """
    Split a subsection into a list of subsections, each with no more than max_tokens.
    Each subsection is a tuple of parent titles [H1, H2, ...] and text (str).
    """
    titles, text = subsection
    string = "\n\n".join(titles + [text])
    num_tokens_in_string = num_tokens(string)
    # if length is fine, return string
    if num_tokens_in_string <= max_tokens:
        return [string]
    # if recursion hasn't found a split after X iterations, just truncate
    elif max_recursion == 0:
        return [truncated_string(string, model=model, max_tokens=max_tokens)]
    # otherwise, split in half and recurse
    else:
        titles, text = subsection
        for delimiter in ["\n\n", "\n", ". "]:
            left, right = halved_by_delimiter(text, delimiter=delimiter)
            if left == "" or right == "":
                # if either half is empty, retry with a more fine-grained delimiter
                continue
            else:
                # recurse on each half
                results = []
                for half in [left, right]:
                    half_subsection = (titles, half)
                    half_strings = split_strings_from_subsection(
                        half_subsection,
                        max_tokens=max_tokens,
                        model=model,
                        max_recursion=max_recursion - 1,
                    )
                    results.extend(half_strings)
                return results
    # otherwise no split was found, so just truncate (should be very rare)
    return [truncated_string(string, model=model, max_tokens=max_tokens)]
 

In [None]:
# split resumes into chunks. Small chunks probably better when searching for skills? 
# maybe even shrink to individual sentences
MAX_TOKENS = 150
resume_strings = []
for section in clean_texts:
    resume_strings.extend(split_strings_from_subsection(section, max_tokens=MAX_TOKENS))

print(f"{len(clean_texts)} resumes split into {len(resume_strings)} strings.")


4 resumes split into 50 strings.


# calculate embeddings and store in dataframe

In [None]:
import pandas as pd
EMBEDDING_MODEL = "text-embedding-ada-002"  # OpenAI's best embeddings as of Apr 2023
MAX_BATCH_SIZE = 1000 # you can submit up to 2048 embedding inputs per request
NUMBER_OF_STRINGS_TO_EMBED = len(resume_strings)

if NUMBER_OF_STRINGS_TO_EMBED < MAX_BATCH_SIZE:
  BATCH_SIZE = NUMBER_OF_STRINGS_TO_EMBED
else: 
  BATCH_SIZE = MAX_BATCH_SIZE 

embeddings = []
for batch_start in range(0, NUMBER_OF_STRINGS_TO_EMBED, BATCH_SIZE):
    batch_end = batch_start + BATCH_SIZE
    batch = resume_strings[batch_start:batch_end]
    print(f"Batch {batch_start} to {batch_end-1}")
    response = openai.Embedding.create(model=EMBEDDING_MODEL, input=batch)
    for i, be in enumerate(response["data"]):
        assert i == be["index"]  # double check embeddings are in same order as input
    batch_embeddings = [e["embedding"] for e in response["data"]]
    embeddings.extend(batch_embeddings)

df = pd.DataFrame({"text": resume_strings, "embedding": embeddings})

Batch 0 to 49


In [None]:
df.head()

Unnamed: 0,text,embedding
0,NameOnResume: jesse_jayant\n\nJesse Jayant \n\...,"[-0.02786160260438919, -0.011775648221373558, ..."
1,NameOnResume: jesse_jayant\n\n•\n•\n•\n\nBudge...,"[-0.004550006706267595, -0.01587117835879326, ..."
2,NameOnResume: jesse_jayant\n\n•\n•\n• M&A\n\nE...,"[-0.011240021325647831, -0.03403883054852486, ..."
3,NameOnResume: jesse_jayant\n\nTracked spending...,"[-0.014598352834582329, -0.00543337594717741, ..."
4,NameOnResume: jesse_jayant\n\n•\n• As a member...,"[-0.006679536309093237, -0.014260951429605484,..."


# search documents using query and text embeddings and and retrieve relevant consultant name from resume information using GPT

1. TODO Store: Embeddings are saved (for large datasets, use a vector database)
1. Search (once per query) - Given a user question, generate an embedding for the query from the OpenAI API
1. Using the embeddings, rank the text sections by relevance to the query
1. Ask (once per query)
  1. Insert the question and the most relevant sections into a message to GPT
  1. Return GPT's answer

In [None]:
from scipy import spatial

In [None]:
GPT_MODEL = 'text-ada-001'

# #TODO Modify so that it can return and parse more than one response
# def extract_chunk(document,template_prompt, model='text-ada-001'):
    
#     prompt=template_prompt.replace('<document>',document)

#     response = openai.Completion.create(
#     model=model, 
#     prompt=prompt,
#     temperature=0,
#     max_tokens=100,
#     top_p=1,
#     frequency_penalty=0,
#     presence_penalty=0
#     )
#     return "1." + response['choices'][0]['text']

In [None]:
# ### getting items from csv file

# import pandas as pd
# import numpy as np

# datafile_path = "data/fine_food_reviews_with_embeddings_1k.csv"

# df = pd.read_csv(datafile_path)
# df["embedding"] = df.embedding.apply(eval).apply(np.array)

In [None]:
# search function
def strings_ranked_by_relatedness(
    query: str,
    df: pd.DataFrame,
    relatedness_fn=lambda x, y: 1 - spatial.distance.cosine(x, y),
    top_n: int = 3
) -> tuple[list[str], list[float]]:
    """Returns a list of strings and relatednesses, sorted from most related to least."""
    query_embedding_response = openai.Embedding.create(
        model=EMBEDDING_MODEL,
        input=query,
    )
    query_embedding = query_embedding_response["data"][0]["embedding"]
    strings_and_relatednesses = [
        (row["text"], relatedness_fn(query_embedding, row["embedding"]))
        for i, row in df.iterrows()
    ]
    strings_and_relatednesses.sort(key=lambda x: x[1], reverse=True)
    strings, relatednesses = zip(*strings_and_relatednesses)
    return strings[:top_n], relatednesses[:top_n]

In [None]:
def test_strings_ranked_by_relatedness(query, df, top_n=3):
  strings, relatednesses = strings_ranked_by_relatedness(query, df, top_n)
  for string, relatedness in zip(strings, relatednesses):
      print(f"{relatedness=:.3f}")
      display(string)

In [None]:
query = "strong in math"
strings_ranked_by_relatedness(query, df)


(('NameOnResume: naimal_chisti\n\nHarlem Children’s Zone: Program Aide/Teacher: 2nd grade, New York, NY\n\nJuly 2022-August 2022\n\n● Lead tutor sessions which aided students with their homework\n● Facilitate activities that would engage students and enhance their creativity\n● Strategize ways for students to comprehend math problems and reading courses\n● Taught students from disadvantaged neighborhoods how to use the computer and Google software',
  'NameOnResume: seanreed\n\n2017 - 2018 \n\ntechnology education company. \n\n \nPython + Data Science Meetup, New York, NY \nData Scientist, Lead Instructor \n● Grew membership from 200 to over 8,600 technologists by personally preparing and delivering \n\n2016 - 2017 \n\nconsistent, high-quality academic content. ',
  'NameOnResume: jesse_jayant\n\nJesse Jayant \n\n555-555-5555/ you@post.harvard.edu\n\nSummary \n\nResults-oriented finance professional with over 10 years of experience in publicly traded and privately held \nenterprises. P

In [None]:
query = "understands pytorch"
strings_ranked_by_relatedness(query, df)

(('NameOnResume: seanreed\n\n● Built distributed Swin-UNETR encoder-decoder model using Azure ML, PyTorch, Docker, and \nLightning AI that can pretrain on unlabeled 3D CT scans, eliminating substantial future labeling \ntime and expense incurred by company radiologists. \nImplemented computer vision model from innovative academic research paper and completely \nrefactored model’s existing Python code to solve client’s business problem. \n\n● ',
  'NameOnResume: seanreed\n\n2009 \n\n1990 \n\nDatabricks, Azure ML, Python, Pandas, Spark, PyTorch, TensorFlow, Keras, Git, Computer Vision, \nNatural Language Processing, Medical Image Segmentation, Deep Learning, Machine Learning, \nGLMs, SQL, Linux, Bayesian Statistics, Data Pipelines, GCP, AWS, Docker, Kubernetes, Pandas, \nNumPy, Random Forests, Gradient Boosting, SVMs, GLMs, Recommender Systems, Graph \nDatabases, Neo4j',
  'NameOnResume: seanreed\n\n\x0cPractical Programming, New York, NY \nData Scientist, Lead Instructor \n● Developed a

In [None]:
query = " python programming"
strings_ranked_by_relatedness(query, df)

(('NameOnResume: seanreed\n\nPython model to production environment and usage with Ray, Python, and PySpark on Azure \nDatabricks. \n\n \nGalvanize, New York, NY \nSenior Data Scientist \n● Served as Customer Data Scientist, tasked to identify new markets and clients most likely to \n\n2018 - 2021 \n\nqualify and benefit from company’s educational program. ',
  'NameOnResume: seanreed\n\n\x0cPractical Programming, New York, NY \nData Scientist, Lead Instructor \n● Developed and taught SQL, neural networks, machine learning in Spark, and Python curriculum \nat startup programming school, focusing on CNNs and natural language programming in Keras. \n● Clearly communicated complicated machine learning concepts to senior executives for growing ',
  'NameOnResume: seanreed\n\n2009 \n\n1990 \n\nDatabricks, Azure ML, Python, Pandas, Spark, PyTorch, TensorFlow, Keras, Git, Computer Vision, \nNatural Language Processing, Medical Image Segmentation, Deep Learning, Machine Learning, \nGLMs, SQL, 

In [None]:
query = " azure databricks"
strings_ranked_by_relatedness(query, df)

(('NameOnResume: seanreed\n\n2009 \n\n1990 \n\nDatabricks, Azure ML, Python, Pandas, Spark, PyTorch, TensorFlow, Keras, Git, Computer Vision, \nNatural Language Processing, Medical Image Segmentation, Deep Learning, Machine Learning, \nGLMs, SQL, Linux, Bayesian Statistics, Data Pipelines, GCP, AWS, Docker, Kubernetes, Pandas, \nNumPy, Random Forests, Gradient Boosting, SVMs, GLMs, Recommender Systems, Graph \nDatabases, Neo4j',
  'NameOnResume: seanreed\n\nPython model to production environment and usage with Ray, Python, and PySpark on Azure \nDatabricks. \n\n \nGalvanize, New York, NY \nSenior Data Scientist \n● Served as Customer Data Scientist, tasked to identify new markets and clients most likely to \n\n2018 - 2021 \n\nqualify and benefit from company’s educational program. ',
  'NameOnResume: seanreed\n\n● Built distributed Swin-UNETR encoder-decoder model using Azure ML, PyTorch, Docker, and \nLightning AI that can pretrain on unlabeled 3D CT scans, eliminating substantial fut

In [None]:
query = " azure AND databricks"
strings_ranked_by_relatedness(query, df)


(('NameOnResume: seanreed\n\n2009 \n\n1990 \n\nDatabricks, Azure ML, Python, Pandas, Spark, PyTorch, TensorFlow, Keras, Git, Computer Vision, \nNatural Language Processing, Medical Image Segmentation, Deep Learning, Machine Learning, \nGLMs, SQL, Linux, Bayesian Statistics, Data Pipelines, GCP, AWS, Docker, Kubernetes, Pandas, \nNumPy, Random Forests, Gradient Boosting, SVMs, GLMs, Recommender Systems, Graph \nDatabases, Neo4j',
  'NameOnResume: seanreed\n\n● Built distributed Swin-UNETR encoder-decoder model using Azure ML, PyTorch, Docker, and \nLightning AI that can pretrain on unlabeled 3D CT scans, eliminating substantial future labeling \ntime and expense incurred by company radiologists. \nImplemented computer vision model from innovative academic research paper and completely \nrefactored model’s existing Python code to solve client’s business problem. \n\n● ',
  'NameOnResume: seanreed\n\nPython model to production environment and usage with Ray, Python, and PySpark on Azure \

# Aside: v2 of search and vector similarity funciton. does not use scipy.spatial

In [None]:
from openai.embeddings_utils import get_embedding, cosine_similarity

# search function
def search_resumes(df, query, top_n=10, pprint=False):
    query_embedding = get_embedding(
        query,
        engine="text-embedding-ada-002"
    )
    df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, query_embedding))

    results = (
        df.sort_values("similarity", ascending=False)
        .head(top_n)
    )
    if pprint:
        for r in results:
            print(r[:200])
            print()
    return results

In [None]:
query = "python skills"
search_resumes(df,query, 3)

Unnamed: 0,text,embedding,similarity
45,NameOnResume: seanreed\n\nPractical Programmi...,"[-0.004207970108836889, 0.005816794466227293, ...",0.774659
46,NameOnResume: seanreed\n\n2017 - 2018 \n\ntech...,"[0.0005816532066091895, -0.005294321104884148,...",0.773287
49,NameOnResume: seanreed\n\n2009 \n\n1990 \n\nDa...,"[-0.0005839008954353631, -0.006026691291481256...",0.77244


## 3. Ask

With the search function above, we can now automatically retrieve relevant knowledge and insert it into messages to GPT.

Below, we define a function `ask` that:
- Takes a user query
- Searches for text relevant to the query
- Stuffs that text into a message for GPT
- Sends the message to GPT
- Returns GPT's answer

In [None]:
GPT_MODEL = 'gpt-3.5-turbo'
def num_tokens(text: str, model: str = 'gpt-3.5-turbo') -> int:
    """Return the number of tokens in a string."""
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(text))


#using v1 search function
def query_message(
    query: str,
    df: pd.DataFrame,
    model: str,
    token_budget: int
) -> str:
    """Return a message for GPT, with relevant source texts pulled from a dataframe."""
    strings, relatednesses = strings_ranked_by_relatedness(query, df)
    introduction = ' You are a Human Resources agent looking for skills in resumes'
    question = f"\n\nQuestion: {query}"
    message = introduction
    for string in strings:
        next_article = f'\n\nresume section:\n"""\n{string}\n"""'
        if (
            num_tokens(message + next_article + question, model=model)
            > token_budget
        ):
            break
        else:
            message += next_article
    return message + question

@logger.catch
def ask(
    query: str,
    df: pd.DataFrame = df,
    model: str = GPT_MODEL,
    token_budget: int = 4096 - 500,
    print_message: bool = False,
) -> str:
    """Answers a query using GPT and a dataframe of relevant texts and embeddings."""
    message = query_message(query, df, model=model, token_budget=token_budget)
    logger.debug(f"{message}")
    content = "Construct a list of NameOnResume fields from the documents given. Remove all duplicates from the list"
    messages = [
        {"role": "system", "content": content},
        {"role": "user", "content": message},
    ]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0
    )
    response_message = response["choices"][0]["message"]["content"]
    return response_message



In [None]:
ask('who knows law')

[32m2023-05-08 19:50:15.430[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36mask[0m:[36m40[0m - [34m[1m You are a Human Resources agent looking for skills in resumes

resume section:
"""
NameOnResume: naimal_chisti

Changemaker Certificate: Fordham Social Innovation Summer Experience: Systems Thinking (2020) and Fordham Business
Development Collaboratory (2020), Global Outreach Certificate Colombia (2022), Matteo Ricci Seminar Alumni (2021),
Dean's List (2021-2022)
EXPERIENCE
Fordham Law School Development of Law Office/Alumni Relations:
Student Worker, New York, NY
"""

resume section:
"""
NameOnResume: naimal_chisti

Near star, Guiding question, and Framing the question in mind

● Meditated on the fundamental forces that impact how the system in Yemen works (including influential figures,

events, norms/beliefs, institutions, laws/policies, etc.)

Fordham Business Development Collaboratory: Financial research participant, New York, NY    June 2020-August 2020
"""

resume se

'The NameOnResume field in all three resumes is "naimal_chisti". Therefore, the list of NameOnResume fields is ["naimal_chisti"]. There are no duplicates to remove.'