# Import

In [43]:
import json

from dotenv import load_dotenv
from openai import OpenAI
from variable import FOLDER_RESUME, FOLDER_JSON_SCHEMA, FOLDER_TXT

# API OpenAI

## Créer un client

In [None]:
load_dotenv() # Permet de charger les données sensibles présentes dans le fichier .env tel que l'API key de OpenAI
client = OpenAI() # Permet de se connecter à son client

## Charger un fichier dans OpenAI

In [44]:

file = client.files.create(
  file=open(FOLDER_TXT + '/' + "cv_format_texte_example.txt", "rb"),
  purpose='assistants'
)
file

FileObject(id='file-W2qQ49ujkpxTkjWbGyuUZcqv', bytes=4413, created_at=1708422991, filename='cv_format_texte_example.txt', object='file', purpose='assistants', status='processed', status_details=None)

## Format type d'un prompt

In [86]:
instructions = """
You are great to extract informations from resume.
Rules:
    1) Use the JSON schema as directive
    2) Only use information that you find in the document provided
    3) Make sure to include all the properties mentioned
    4) Recreate answers with only informations wanted
    5) You will redecompose the resume in distinct part to extract
    6) Use paragraph to extract most precise information or context
    7) Process multiple choice as answer, create a list of answers
    8) If multiple response, try to find one more
    9) Always finish with the json file

Response format:
    1) No accent at all
    2) Consistence phrase
    3) JSON format
    4) Always finish with } symbol
    

Output result in JSON object to be equivalent at the JSON schema.
"""
instructions

'\nYou are great to extract informations from resume.\nRules:\n    1) Use the JSON schema as directive\n    2) Only use information that you find in the document provided\n    3) Make sure to include all the properties mentioned\n    4) Recreate answers with only informations wanted\n    5) You will redecompose the resume in distinct part to extract\n    6) Use paragraph to extract most precise information or context\n    7) Process multiple choice as answer, create a list of answers\n    8) If multiple response, try to find one more\n    9) Always finish with the json file\n\nResponse format:\n    1) No accent at all\n    2) Consistence phrase\n    3) JSON format\n    4) Always finish with } symbol\n    \n\nOutput result in JSON object to be equivalent at the JSON schema.\n'

## Création de l'assistant

In [87]:
assistant = client.beta.assistants.create(
  name="jsonParser", # Nom de l'assistant, visualisable sur OpenAI dans la section Assistants
  instructions=instructions, # Instruction précédente permettant au robot de comprendre sa tâche
  model="gpt-3.5-turbo", # Nom du modèle utilisé
  tools=[{"type": "retrieval"}], # L'outil 'retrieval' permet d'imposer à l'IA de lire dans le(s) fichier(s) pour trouver l'information de la réponse
  file_ids=[file.id] # Permet de préciser tous les fichiers à utiliser
)
assistant

Assistant(id='asst_n0W7oB6YhyO5kvA8Oa3u8Sm2', created_at=1708423441, description=None, file_ids=['file-W2qQ49ujkpxTkjWbGyuUZcqv'], instructions='\nYou are great to extract informations from resume.\nRules:\n    1) Use the JSON schema as directive\n    2) Only use information that you find in the document provided\n    3) Make sure to include all the properties mentioned\n    4) Recreate answers with only informations wanted\n    5) You will redecompose the resume in distinct part to extract\n    6) Use paragraph to extract most precise information or context\n    7) Process multiple choice as answer, create a list of answers\n    8) If multiple response, try to find one more\n    9) Always finish with the json file\n\nResponse format:\n    1) No accent at all\n    2) Consistence phrase\n    3) JSON format\n    4) Always finish with } symbol\n    \n\nOutput result in JSON object to be equivalent at the JSON schema.\n', metadata={}, model='gpt-3.5-turbo', name='jsonParser', object='assis

## Création de la question de l'utilisateur

In [95]:
content = "Extract information from the file to obtain the JSON schema as follow : \n{json_schema}\n\nOnly the json."
json_schema = json.load(open(FOLDER_JSON_SCHEMA + '/jsonSchema_personalInformation.json', 'r'))
content = content.format(json_schema=json_schema)
content

'Extract information from the file to obtain the JSON schema as follow : \n{\'name\': \'personal_information\', \'description\': \'Informations personal about the person in the resume. It will contain the firstname, lastname, age, living address and the role desired by the person\', \'type\': \'object\', \'properties\': {\'personal_information\': {\'type\': \'object\', \'description\': \'All personnal information about the person\', \'properties\': {\'firstname\': {\'type\': \'string\', \'title\': \'First name\', \'description\': "The person\'s first name"}, \'lastname\': {\'type\': \'string\', \'title\': \'Last name\', \'description\': "The person\'s last name"}, \'age\': {\'type\': \'string\', \'description\': \'Age in years of the person which must be equal to or greather than zero\'}, \'address\': {\'type\': \'string\', \'description\': \'Adresse of the person in real life which must be precise by the country\'}, \'role\': {\'type\': \'string\', \'description\': \'Role desired by t

## Création d'un thread afin de comprendre quel conversation suivre

Cela permet de paralléliser les tâches d'un même besoin (par exemple ici le fichier) afin de préparer l'ensemble des requêtes et obtenir 
toutes les réponses en une seule fois.  
Il n'y a pas besoin de créer plusieurs threads pour un même document dans l'exemple suivant. 

In [96]:
thread = client.beta.threads.create()
thread

Thread(id='thread_Hm5fi0K8m6xmAE8HJuIKKMlb', created_at=1708423506, metadata={}, object='thread', tool_resources=[])

## Création du message envoyé à l'assistant

In [97]:
message = client.beta.threads.messages.create(
  thread_id = thread.id, # ID du thread précédent
  role = 'user', # Role correspond 
  content = content # Message du rôle
)
message

ThreadMessage(id='msg_xzblxKjuMnPDZ6kcGuJL3U8w', assistant_id=None, content=[MessageContentText(text=Text(annotations=[], value='Extract information from the file to obtain the JSON schema as follow : \n{\'name\': \'personal_information\', \'description\': \'Informations personal about the person in the resume. It will contain the firstname, lastname, age, living address and the role desired by the person\', \'type\': \'object\', \'properties\': {\'personal_information\': {\'type\': \'object\', \'description\': \'All personnal information about the person\', \'properties\': {\'firstname\': {\'type\': \'string\', \'title\': \'First name\', \'description\': "The person\'s first name"}, \'lastname\': {\'type\': \'string\', \'title\': \'Last name\', \'description\': "The person\'s last name"}, \'age\': {\'type\': \'string\', \'description\': \'Age in years of the person which must be equal to or greather than zero\'}, \'address\': {\'type\': \'string\', \'description\': \'Adresse of the pe

## Création d'une instance de déploiement

In [98]:
run = client.beta.threads.runs.create(
  thread_id = thread.id, # Toujours le même thread
  assistant_id = assistant.id # ID de l'assistant pour traiter du thread
)
run

Run(id='run_va0ZaH0WpNXi205IIt8GZMbq', assistant_id='asst_n0W7oB6YhyO5kvA8Oa3u8Sm2', cancelled_at=None, completed_at=None, created_at=1708423507, expires_at=1708424107, failed_at=None, file_ids=['file-W2qQ49ujkpxTkjWbGyuUZcqv'], instructions='\nYou are great to extract informations from resume.\nRules:\n    1) Use the JSON schema as directive\n    2) Only use information that you find in the document provided\n    3) Make sure to include all the properties mentioned\n    4) Recreate answers with only informations wanted\n    5) You will redecompose the resume in distinct part to extract\n    6) Use paragraph to extract most precise information or context\n    7) Process multiple choice as answer, create a list of answers\n    8) If multiple response, try to find one more\n    9) Always finish with the json file\n\nResponse format:\n    1) No accent at all\n    2) Consistence phrase\n    3) JSON format\n    4) Always finish with } symbol\n    \n\nOutput result in JSON object to be equiv

## Permet de lire la réponse du système

In [100]:
run = client.beta.threads.runs.retrieve( # Retrouver la réponse
  thread_id = thread.id,
  run_id = run.id
)

messages = client.beta.threads.messages.list( # Stocker l'information sous forme de liste de message
  thread_id = thread.id
)

for message in reversed(messages.data):
  print(message.role + ": ", message.content[0].text.value, "\n\n") # afficher les messages

user:  Extract information from the file to obtain the JSON schema as follow : 
{'name': 'personal_information', 'description': 'Informations personal about the person in the resume. It will contain the firstname, lastname, age, living address and the role desired by the person', 'type': 'object', 'properties': {'personal_information': {'type': 'object', 'description': 'All personnal information about the person', 'properties': {'firstname': {'type': 'string', 'title': 'First name', 'description': "The person's first name"}, 'lastname': {'type': 'string', 'title': 'Last name', 'description': "The person's last name"}, 'age': {'type': 'string', 'description': 'Age in years of the person which must be equal to or greather than zero'}, 'address': {'type': 'string', 'description': 'Adresse of the person in real life which must be precise by the country'}, 'role': {'type': 'string', 'description': 'Role desired by the person'}}}}}

Only the json. 


assistant:  {
  "personal_information": {

In [101]:
content = "Extract information from the file to obtain the JSON schema as follow : \n{json_schema}\n\nOnly the json."
json_schema = json.load(open(FOLDER_JSON_SCHEMA + '/jsonSchema_jobs.json', 'r'))
content = content.format(json_schema=json_schema)


#thread = client.beta.threads.create()

message = client.beta.threads.messages.create(
  thread_id = thread.id,
  role = 'user',
  content = content
)

run = client.beta.threads.runs.create(
  thread_id = thread.id,
  assistant_id = assistant.id
)

In [102]:
run = client.beta.threads.runs.retrieve(
  thread_id = thread.id,
  run_id = run.id
)

messages = client.beta.threads.messages.list(
  thread_id = thread.id
)

for message in reversed(messages.data):
  print(message.role + ": ", message.content[0].text.value, "\n\n")

user:  Extract information from the file to obtain the JSON schema as follow : 
{'name': 'personal_information', 'description': 'Informations personal about the person in the resume. It will contain the firstname, lastname, age, living address and the role desired by the person', 'type': 'object', 'properties': {'personal_information': {'type': 'object', 'description': 'All personnal information about the person', 'properties': {'firstname': {'type': 'string', 'title': 'First name', 'description': "The person's first name"}, 'lastname': {'type': 'string', 'title': 'Last name', 'description': "The person's last name"}, 'age': {'type': 'string', 'description': 'Age in years of the person which must be equal to or greather than zero'}, 'address': {'type': 'string', 'description': 'Adresse of the person in real life which must be precise by the country'}, 'role': {'type': 'string', 'description': 'Role desired by the person'}}}}}

Only the json. 


assistant:  {
  "personal_information": {