### Setup

In [1]:
import os
import dotenv
import json
from pydantic import BaseModel, Field, validator
from langchain import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.output_parsers import PydanticOutputParser
from jobgpt.resume_processor.resume_reader import ResumeReader
from langchain.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate
)
from langchain.chains import SequentialChain
from typing import Optional,List
import langchain
from jobgpt.utils.llm import count_tokens
import difflib
dotenv.load_dotenv(".env", override=True)
from langchain.schema import AIMessage, HumanMessage, SystemMessage

In [2]:
# initialize the models
openai = ChatOpenAI(
    model_name="gpt-4",
    openai_api_key=os.environ["OPENAI_API_KEY"],
    temperature=0,
    verbose=True,
)

In [77]:
class WorkExperience(BaseModel):
    title: str = Field(description="The title of the position")
    company: str = Field(description="The company of the position")
    bullet_points: List[str] = Field(description="The list of text that describes the experience")

class Education(BaseModel):
    school: str = Field(description="The school of the education")
    degree: str = Field(description="The degree of the education")
    major: str = Field(description="The major of the education")
    bullet_points: List[str] = Field(description="The list of text that describes the education")

class PersonalProject(BaseModel):
    title: str = Field(description="The title of the project")
    bullet_points: List[str] = Field(description="The list of text that describes the project and the canditate's contribution")

class Summary(BaseModel):
    text: List[str] = Field(description="The summary or highlight of the resume which usually appears at the top or buttom of the resume")

class Skills(BaseModel):
    text: List[str] = Field(description="The list of skills that the candidate has")

class SegmentedResume(BaseModel):
    work_experiences: List[WorkExperience] = Field(description="The work experience section of the resume")
    educations: List[Education] = Field(description="The education section of the resume")
    personal_projects: Optional[List[PersonalProject]] = Field(description="The project section of the resume. It can be personal project, research project, case study or attended competition")
    summary: Optional[Summary] = Field(description="The summary section of the resume")
    skills: Optional[Skills] = Field(description="The skills section of the resume")

In [78]:
parser = PydanticOutputParser(pydantic_object=SegmentedResume)

In [79]:
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"work_experiences": {"title": "Work Experiences", "description": "The work experience section of the resume", "type": "array", "items": {"$ref": "#/definitions/WorkExperience"}}, "educations": {"title": "Educations", "description": "The education section of the resume", "type": "array", "items": {"$ref": "#/definitions/Education"}}, "personal_projects": {"title": "Personal Projects", "description": "The project section of the resume. It can be personal project, research project, case study or attended competition", "type": "ar

### Strategy 1

In [3]:
with open('../data/resume_txt/jiawei_wang.txt') as f:
    resume = f.read()

In [84]:
system_template = """
You are an experienced career consultalt who helps clients to improve their resumes.
You should be familiar with the general structure of a resume and the use of professional language.
When you are asked to provide evaluation or suggestion, make sure your are critical and specific.
Focus on the use of professional language and the relevancy to the job description.
REMEMBER DO NOT make things up or create fake experiences. 
"""
user_teamplate = """
You are an experienced career consultalt helping clients with their resumes.
First, let's understand the client's background by reading the resume. 
Your job is to read the resume and segment the resume into different sections.
A general resume should at least have work experience and education sections. 
It may also have additional sections such as personal projects, summary and skills.
Segment the given resume into the sections mentioned above.
If you think that one section is missing, just DO NOT include the segment key in the output
{json_format}

resume: {resume_text}
"""
system_prompt = SystemMessagePromptTemplate.from_template(system_template.strip())
user_prompt = HumanMessagePromptTemplate.from_template(user_teamplate.strip())
resume_analyzer_prompt_1 = ChatPromptTemplate(input_variables=["json_format", "resume_text"], messages=[system_prompt, user_prompt])
chain_analyze_1 = LLMChain(llm=openai, prompt=resume_analyzer_prompt_1)

In [85]:
output = count_tokens(chain_analyze_1, {"resume_text": resume, "json_format": parser.get_format_instructions()})
print(output["token_count"])

2600


In [86]:
print(output['result'])

{
  "work_experiences": [
    {
      "title": "Banking Advisor",
      "company": "Royal bank of Canada",
      "bullet_points": [
        "Assisted clients with various questions and concerns related to their accounts and bank products.",
        "Accurately processed daily transactions (deposits, withdrawals, money transfers, payments, etc.)",
        "Analyzed client needs and presented appropriate financing solutions.",
        "Ability to work under pressure and built ongoing relationships with clients.",
        "Ability to work well independently and in group settings."
      ]
    },
    {
      "title": "Bookkeeper",
      "company": "Viceroy Homes",
      "bullet_points": [
        "Recording transactions and post them to various accounts.",
        "Processing payments and issuing invoice.",
        "Conduct periodic reconciliations of all accounts to ensure their accuracy.",
        "Provide clerical and administrative support to management as requested."
      ]
    },
  

In [4]:
from jobgpt.resume_processor.resume_segmenter import ResumeSegmenter
segmenter = ResumeSegmenter()
segmenter.segment(resume)

SegmentedResume(work_experiences=[WorkExperience(title='Banking Advisor', company='Royal bank of Canada', bullet_points=['Assisted clients with various questions and concerns related to their accounts and bank products.', 'Accurately processed daily transactions (deposits, withdrawals, money transfers, payments, etc.)', 'Analyzed client needs and presented appropriate financing solutions.', 'Ability to work under pressure and built ongoing relationships with clients.', 'Ability to work well independently and in group settings.']), WorkExperience(title='Bookkeeper', company='Viceroy Homes', bullet_points=['Recording transactions and post them to various accounts.', 'Processing payments and issuing invoice.', 'Conduct periodic reconciliations of all accounts to ensure their accuracy.', 'Provide clerical and administrative support to management as requested.']), WorkExperience(title='CVITP (Community Volunteer Income Tax Program)', company='West Neighbourhood House', bullet_points=['Commu

In [1]:
test = [1,2,'a',3]
[a+1 for a in test if isinstance(a, int)]

[2, 3, 4]

In [1]:
import tiktoken
encoder = tiktoken.get_encoding("cl100k_base")
text = """
{
Evaluation: [
The wording in the work experience section is generally professional and appropriate.,
Measurable results are used to describe the experience, such as building a backtesting framework and achieving a cumulative return of 130%.,
The experience is relevant to the job description, as it involves quantitative research, trading analytics, and data visualization.,
Some key words from the job description, such as 'business analyst', 'analytical rigour', and 'problem solving skills' are not used in the resume.
],
Suggestions: [
In the first work experience, instead of 'Built backtesting framework', consider rewording it to 'Developed a backtesting framework in Python'.,
In the first work experience, instead of 'Constructed sensitivity analysis', consider rewording it to 'Performed sensitivity analysis on option prices'.,
In the first work experience, instead of 'Formulated Buy the Dip strategies', consider rewording it to 'Developed Buy the Dip strategies'.,
In the second work experience, instead of 'Parsed money market rates', consider rewording it to 'Extracted money market rates'.,
"""
print(len(encoder.encode(text)))




222
