In [7]:
from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Any
import fitz  # PyMuPDF
from dotenv import load_dotenv
import os
from openai import OpenAI

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

def extract_text_from_pdf(pdf_path: str) -> str:
    doc = fitz.open(pdf_path)
    return "\n".join(page.get_text() for page in doc)

class ResumeItem(BaseModel):
    title: str
    subtitle: Optional[str]
    start_date: Optional[str]
    end_date: Optional[str]
    details: Optional[List[str]]
    extra: Optional[Dict[str, Any]]

class ResumeSection(BaseModel):
    section_name: str
    items: List[ResumeItem]

class Resume(BaseModel):
    sections: List[ResumeSection]

def parse_resume_with_openai(resume_text: str) -> Optional[Resume]:
    openai = OpenAI(api_key=api_key)
    # openai.api_key = openai_api_key
    prompt = f"""
You are an expert resume parser. Given this resume text, extract all information into the following JSON schema: {Resume.schema_json(indent=2)}
Resume Text:
\"\"\"
{resume_text}
\"\"\"
Return only the JSON.
"""
    response = openai.beta.chat.completions.parse(
        model="gpt-4o",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=4096,
        temperature=0,
        response_format=Resume,
    )
    output = response.choices[0].message.parsed
    # json_str = response['choices'][0]['message']['content']
    # import json
    # try:
    #     data = json.loads(json_str)
    #     return Resume.parse_obj(data)  # This will create ResumeSection and ResumeItem objects
    # except (json.JSONDecodeError, ValidationError) as e:
    #     print("Parsing error:", e)
    #     return None
    return output

def resume_to_markdown(resume):
    md = []
    for section in resume.sections:
        md.append(f"## {section.section_name}\n")
        for item in section.items:
            # Title and subtitle
            line = f"**{item.title}**"
            if item.subtitle:
                line += f", *{item.subtitle}*"
            # Dates
            if item.start_date or item.end_date:
                dates = []
                if item.start_date:
                    dates.append(item.start_date)
                if item.end_date:
                    dates.append(item.end_date)
                line += f" ({' - '.join(dates)})"
            md.append(line)
            # Details
            if item.details:
                for detail in item.details:
                    md.append(f"- {detail}")
            # Extra fields
            if item.extra:
                for k, v in item.extra.items():
                    md.append(f"  - **{k.capitalize()}**: {v}")
            md.append("")  # Blank line for spacing
    return "\n".join(md)


resume = extract_text_from_pdf('/home/mory/jobProject/resumeBuilder2/uploads/Mory_Gharasuie_resume.pdf')
output = parse_resume_with_openai(resume)
# print(resume_to_markdown(output))
print(output)



/tmp/ipykernel_263929/1793766883.py:34: PydanticDeprecatedSince20: The `schema_json` method is deprecated; use `model_json_schema` and json.dumps instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  You are an expert resume parser. Given this resume text, extract all information into the following JSON schema: {Resume.schema_json(indent=2)}


sections=[ResumeSection(section_name='Contact Information', items=[ResumeItem(title='Mory Gharasuie', subtitle=None, start_date=None, end_date=None, details=['Norfolk, VA, USA', 'mmoha014@odu.edu', '+1 757 287 1602', 'https://www.linkedin.com/in/mory-gharasui-53415258/', 'https://github.com/mortezamg63'], extra=None)]), ResumeSection(section_name='Education', items=[ResumeItem(title='PhD candidate in computer science', subtitle='Old Dominion University, Norfolk, USA', start_date='Aug 2019', end_date='present', details=['GPA: 3.84/4.0', 'Research Interests: Self-Supervised Learning and Semi-supervised Learning in Imbalanced datasets (Image, Text and Tabular Domains)'], extra=None), ResumeItem(title='Master of Science in Computer Engineering', subtitle='University of NabiAkram, Tabriz, Iran', start_date=None, end_date=None, details=None, extra=None), ResumeItem(title='Bachelor of Science in Computer Engineering', subtitle='University of Shamsipoor, Tehran, Iran', start_date=None, end_dat

In [3]:
from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Any
import fitz  # PyMuPDF
from dotenv import load_dotenv
import os
from openai import OpenAI

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

def extract_text_from_pdf(pdf_path: str) -> str:
    doc = fitz.open(pdf_path)
    return "\n".join(page.get_text() for page in doc)

class ResumeItem(BaseModel):
    title: str
    subtitle: Optional[str]
    start_date: Optional[str]
    end_date: Optional[str]
    details: Optional[List[str]]
    extra: Optional[Dict[str, Any]]

class ResumeSection(BaseModel):
    section_name: str
    items: List[ResumeItem]

class Resume(BaseModel):
    sections: List[ResumeSection]

def parse_resume_with_openai(resume_text: str) -> Optional[Resume]:
    openai = OpenAI(api_key=api_key)
    # openai.api_key = openai_api_key
    prompt = f"""
You are an expert resume parser. Given this resume text, extract all information into the following JSON schema: {Resume.schema_json(indent=2)}
Resume Text:
\"\"\"
{resume_text}
\"\"\"
Return only the JSON.
"""
    response = openai.beta.chat.completions.parse(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=4096,
        temperature=0,
        response_format=Resume,
    )
    output = response.choices[0].message.parsed
    # json_str = response['choices'][0]['message']['content']
    # import json
    # try:
    #     data = json.loads(json_str)
    #     return Resume.parse_obj(data)  # This will create ResumeSection and ResumeItem objects
    # except (json.JSONDecodeError, ValidationError) as e:
    #     print("Parsing error:", e)
    #     return None
    return output

def resume_to_markdown(resume):
    md = []
    for section in resume.sections:
        md.append(f"## {section.section_name}\n")
        for item in section.items:
            # Title and subtitle
            line = f"**{item.title}**"
            if item.subtitle:
                line += f", *{item.subtitle}*"
            # Dates
            if item.start_date or item.end_date:
                dates = []
                if item.start_date:
                    dates.append(item.start_date)
                if item.end_date:
                    dates.append(item.end_date)
                line += f" ({' - '.join(dates)})"
            md.append(line)
            # Details
            if item.details:
                for detail in item.details:
                    md.append(f"- {detail}")
            # Extra fields
            if item.extra:
                for k, v in item.extra.items():
                    md.append(f"  - **{k.capitalize()}**: {v}")
            md.append("")  # Blank line for spacing
    return "\n".join(md)


resume = extract_text_from_pdf('/home/mory/jobProject/resumeBuilder2/uploads/Mory_Gharasuie_resume.pdf')
output = parse_resume_with_openai(resume)
print(resume_to_markdown(output))



/tmp/ipykernel_263929/1885356036.py:34: PydanticDeprecatedSince20: The `schema_json` method is deprecated; use `model_json_schema` and json.dumps instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  You are an expert resume parser. Given this resume text, extract all information into the following JSON schema: {Resume.schema_json(indent=2)}


## Education

**PhD candidate in computer science**, *Old Dominion University* (Aug 2019 - present)
- GPA: 3.84/4.0
- Research Interests: Self-Supervised Learning and Semi-supervised Learning in Imbalanced datasets (Image, Text and Tabular Domains)

**Master of Science in Computer Engineering**, *University of NabiAkram*

**Bachelor of Science in Computer Engineering**, *University of Shamsipoor*

## Technical Skills

**Languages & databases**
- python
- Java
- C++
- ASP Webform
- C#
- SQL
- MySQL
- HTML

**Libraries**
- Tensorflow
- Keras
- PyTorch
- OpenCV
- Scikit-learn
- NLP toolkit
- HuggingFace
- Pandas
- Matplotlib
- Seaborn
- LangChain
- Dask
- BeautifulSoup
- Flask

**Development tools**
- Anaconda
- Jupyter Notebook
- Google Colab
- Visual Studio
- Git
- Docker
- AWS

**Operating Systems**
- Windows
- Linux
- Mac OS X

## Certifications

**LanGraph**

**LLM Engineering**

**AWS SageMaker**

## Awards and Honors

**Best Teaching Assistant**, *Spring 2025*

## Experience

**Sof

In [6]:
output

Resume(sections=[ResumeSection(section_name='Education', items=[ResumeItem(title='PhD candidate in computer science', subtitle='Old Dominion University', start_date='Aug 2019', end_date='present', details=['GPA: 3.84/4.0', 'Research Interests: Self-Supervised Learning and Semi-supervised Learning in Imbalanced datasets (Image, Text and Tabular Domains)'], extra=None), ResumeItem(title='Master of Science in Computer Engineering', subtitle='University of NabiAkram', start_date=None, end_date=None, details=None, extra=None), ResumeItem(title='Bachelor of Science in Computer Engineering', subtitle='University of Shamsipoor', start_date=None, end_date=None, details=None, extra=None)]), ResumeSection(section_name='Technical Skills', items=[ResumeItem(title='Languages & databases', subtitle=None, start_date=None, end_date=None, details=['python', 'Java', 'C++', 'ASP Webform', 'C#', 'SQL', 'MySQL', 'HTML'], extra=None), ResumeItem(title='Libraries', subtitle=None, start_date=None, end_date=Non