In [9]:
from typing import List, Optional, Dict, Any
from pydantic import BaseModel
import json

# Re-define models after code state reset
class ResumeItem(BaseModel):
    title: str
    subtitle: Optional[str] = None
    start_date: Optional[str] = None
    end_date: Optional[str] = None
    details: Optional[List[str]] = None
    extra: Optional[Dict[str, Any]] = None

class ResumeSection(BaseModel):
    section_name: str
    items: List[ResumeItem]

class Resume(BaseModel):
    sections: List[ResumeSection]

# Sample resume data reflecting output1 style
sample_resume = Resume(sections=[
    ResumeSection(
        section_name="Education",
        items=[
            ResumeItem(
                title="PhD candidate in computer science",
                subtitle="Old Dominion University | Norfolk, USA",
                start_date="Aug 2019",
                end_date="present",
                details=[
                    "GPA: 3.84/4.0",
                    "Research Interests: Self-Supervised Learning and Semi-supervised Learning in Imbalanced datasets (Image, Text and Tabular Domains)"
                ]
            ),
            ResumeItem(
                title="Master of Science in Computer Engineering",
                subtitle="University of NabiAkram | Tabriz, Iran"
            ),
            ResumeItem(
                title="Bachelor of Science in Computer Engineering",
                subtitle="University of Shamsipoor | Tehran, Iran"
            ),
        ]
    ),
    ResumeSection(
        section_name="Technical Skills",
        items=[
            ResumeItem(title="Languages & databases", details=["python", "Java", "C++", "ASP Webform", "C#", "SQL", "MySQL", "HTML"]),
            ResumeItem(title="Libraries", details=["Tensorflow", "Keras", "PyTorch", "OpenCV", "Scikit-learn", "NLP toolkit", "HuggingFace", "Pandas", "Matplotlib", "Seaborn", "LangChain", "Dask", "BeautifulSoup", "Flask"]),
            ResumeItem(title="Development tools", details=["Anaconda", "Jupyter Notebook", "Google Colab", "Visual Studio", "Git", "Docker", "AWS"]),
            ResumeItem(title="Operating Systems", details=["Windows", "Linux", "Mac OS X"]),
            ResumeItem(title="Certifications", details=["LanGraph: Link", "LLM Engineering: Link", "AWS SageMaker: Link"]),
            ResumeItem(title="Awards and Honors", details=["Received \"Best Teaching Assistant\" award, Spring 2025"])
        ]
    )
])

# Serialize to JSON and save
resume_json = sample_resume.model_dump_json(indent=2)



In [6]:
# resume_json

In [18]:
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from flask import Response
from bs4 import BeautifulSoup
import io
import json

# Load resume JSON
# with open("/mnt/data/sample_resume.json", "r") as f:
#     resume_data = json.load(f)
# resume_json = '{"sections": [...]}'
resume_dict = json.loads(resume_json)
resume = Resume.model_validate(resume_dict)
# Re-parse with Pydantic
# resume = Resume.model_validate(resume_data)

# Generate DOCX
def generate_docx_from_resume(resume: Resume) -> bytes:
    doc = Document()
    style = doc.styles['Normal']
    font = style.font
    font.name = 'Arial'
    font.size = Pt(11)

    for section in resume.sections:
        doc.add_heading(section.section_name, level=1)

        for item in section.items:
            # Subtitle as job title or university
            if item.subtitle:
                p = doc.add_paragraph()
                run = p.add_run(item.subtitle)
                run.bold = True
                p.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT

            # Title if distinct from subtitle
            if item.title and item.title != item.subtitle:
                doc.add_paragraph(item.title)

            # Date range
            if item.start_date or item.end_date:
                doc.add_paragraph(f"{item.start_date or ''} – {item.end_date or ''}", style='Intense Quote')

            # Details: inline for Technical Skills, bullets for others
            if item.details:
                if "skill" in section.section_name.lower():# == "Technical Skills":
                    doc.add_paragraph(", ".join(item.details))
                else:
                    for detail in item.details:
                        doc.add_paragraph(detail, style='List Bullet')

    buffer = io.BytesIO()
    doc.save(buffer)
    buffer.seek(0)
    return buffer.read()

# Save DOCX
docx_bytes = generate_docx_from_resume(resume)
docx_path = "generated_resume.docx"
with open(docx_path, "wb") as f:
    f.write(docx_bytes)

docx_path


'generated_resume.docx'

In [17]:
from weasyprint import HTML

# Convert resume object into simple HTML structure
def resume_to_html(resume: Resume) -> str:
    html_parts = [
        '<html><head><meta charset="utf-8"><style>',
        'body { font-family: Arial, sans-serif; line-height: 1.6; font-size: 12pt; }',
        'h1 { font-size: 16pt; margin-top: 30px; color: #2c3e50; border-bottom: 1px solid #ccc; }',
        'h2 { font-size: 13pt; margin-top: 20px; color: #2c3e50; }',
        'ul { margin: 0; padding-left: 20px; }',
        'li { margin-bottom: 4px; }',
        'p { margin: 5px 0; }',
        '</style></head><body>'
    ]

    for section in resume.sections:
        html_parts.append(f"<h1>{section.section_name}</h1>")
        for item in section.items:
            if item.subtitle:
                html_parts.append(f"<h2>{item.subtitle}</h2>")

            if item.title and item.title != item.subtitle:
                html_parts.append(f"<p><strong>{item.title}</strong></p>")

            if item.start_date or item.end_date:
                html_parts.append(f"<p><em>{item.start_date or ''} – {item.end_date or ''}</em></p>")

            if item.details:
                if "skill" in section.section_name.lower():# == "Technical Skills":
                    # Inline format for technical skills
                    html_parts.append(f"<p>{', '.join(item.details)}</p>")
                else:
                    html_parts.append("<ul>")
                    for detail in item.details:
                        html_parts.append(f"<li>{detail}</li>")
                    html_parts.append("</ul>")

    html_parts.append('</body></html>')
    return ''.join(html_parts)
# Generate and save PDF
html_content = resume_to_html(resume)
pdf_path = "generated_resume2.pdf"
HTML(string=html_content).write_pdf(pdf_path)

pdf_path


'generated_resume2.pdf'

In [33]:
res=Resume(sections=[ResumeSection(section_name='Education', items=[ResumeItem(title='Ph.D. in Computer Science', subtitle='Old Dominion University, VA, USA', start_date='August 2019', end_date='Present', details=['GPA: 3.84/4.0', 'Research Interest: Natural Language Processing (NLP), Computer Vision (CV), Machine Learning (ML), Deep Learning (DL), Digital Libraries, and Scholarly Big Data'], extra=None), ResumeItem(title='Bachelor of Science in Computer Engineering', subtitle='Elizabethtown College, PA, USA', start_date='August 2014', end_date='May 2018', details=['GPA: 3.36/4.0', 'Minor: Information Systems'], extra=None)]), ResumeSection(section_name='Technical Skills', items=[ResumeItem(title='Languages & Database', subtitle=None, start_date=None, end_date=None, details=['Python', 'PHP', 'C', 'HTML', 'CSS', 'SQL', 'MySQL', 'AWS S3'], extra=None), ResumeItem(title='Technologies & APIs', subtitle=None, start_date=None, end_date=None, details=['Keras', 'Tensorflow', 'PyTorch', 'OpenCV', 'scikit-learn', 'NLP toolkit'], extra=None), ResumeItem(title='Development Tools', subtitle=None, start_date=None, end_date=None, details=['Anaconda', 'Jupyter Notebook', 'Google Colab', 'Visual Studio', 'SVN', 'Git', 'Docker', 'AWS'], extra=None), ResumeItem(title='Operating Systems', subtitle=None, start_date=None, end_date=None, details=['Linux', 'Mac OS X', 'Windows Server'], extra=None)]), ResumeSection(section_name='Experience', items=[ResumeItem(title='Graduate Research Assistant', subtitle='Old Dominion University', start_date='August 2019', end_date='Present', details=['Developing AI-based applications using ML and DL by leveraging NLP and CV.', 'Conducting research on Digital Libraries, Computational Reproducibility and Replicability.', 'Mentoring students, writing research papers, and presenting research work at top conferences and journals.'], extra={}), ResumeItem(title='Research Intern', subtitle='Los Alamos National Laboratory', start_date='June 2020', end_date='August 2020', details=['Conducted research and implemented a framework for offline handwritten mathematical equation recognition.', 'Preprocessed images, built ground truth, applied OpenCV for segmentation, blurring, and binary thresholding.', 'Employed deep neural networks such as LeNET5-CNN as a model backbone and achieved 89% model accuracy.'], extra=None), ResumeItem(title='Machine Learning Intern', subtitle='Bihrle Applied Research Inc', start_date='June 2021', end_date='August 2021', details=['Developed and enhanced algorithms for Train Detection used by Rail-Inspector – a cloud-based software that processes aerial imagery of railroad tracks using machine learning and deep learning.', 'Built ground truth by labeling images of trains, employed deep learning model such as FCN for segmentation.', 'Trained the model, solved overfitting problems, optimized the result, and achieved 96% accuracy.'], extra=None)]), ResumeSection(section_name='Projects', items=[ResumeItem(title='AutoMeta', subtitle=None, start_date='August 2019', end_date='January 2021', details=['A metadata extractor application to extract metadata fields from scanned book-length documents such as electronic theses and dissertations (ETDs) by leveraging NLP techniques.', 'It uses ML-based methods such as Conditional Random Field (CRF), which incorporates text and visual features.', 'The model was trained and evaluated using AutoMeta-ETD500, and achieved F1 score of 83% – 96%.'], extra={}), ResumeItem(title='ETDPC', subtitle=None, start_date='March 2021', end_date='August 2023', details=['A two-stream novel multi-modal classification model with cross-attention that uses vision encoder (ResNet50v2) and text encoder (BERT with Talking-Heads Attention) to classify ETD pages into 13 categories.', 'The model was trained and evaluated using ETDPC-ETD500, and achieved F1 score of 84% – 96%.'], extra={}), ResumeItem(title='MetaEnhance', subtitle=None, start_date='May 2022', end_date='December 2022', details=['An application to improve the metadata quality of ETDs by filling out the missing values, correcting the incorrect values and misspellings, and canonicalizing the surface values by leveraging the SOTA ML and DL models.', 'The framework was evaluated against MetaEnhance-ETDQual500 and achieved nearly perfect F1-scores in detecting errors and F1-scores ranging from 85% – 100% for correcting five of seven key metadata fields.'], extra={})]), ResumeSection(section_name='Publications', items=[ResumeItem(title='MetaEnhance: Metadata Quality Improvement for ETDs of University Libraries.', subtitle='JCDL 2023', start_date=None, end_date=None, details=['Link (Best Paper Award)'], extra=None), ResumeItem(title='A Study on Reproducibility and Replicability of Table Structure Recognition Methods.', subtitle='ICDAR 2023', start_date=None, end_date=None, details=['Link'], extra=None), ResumeItem(title='A Study of Computational Reproducibility using URLs Linking to Open Access Datasets and Software.', subtitle='WWW 2022', start_date=None, end_date=None, details=['Link'], extra=None), ResumeItem(title='Segmenting Technical Drawing Figures in US Patents.', subtitle='SDU@AAAI 2022', start_date=None, end_date=None, details=['Link'], extra=None), ResumeItem(title='Automatic Metadata Extraction Incorporating Visual Features from Scanned ETDs.', subtitle='JCDL 2021', start_date=None, end_date=None, details=['Link'], extra=None), ResumeItem(title='A Heuristic Baseline Method for Metadata Extraction from Scanned ETDs.', subtitle='JCDL 2020', start_date=None, end_date=None, details=['Link (Best Poster Award)'], extra=None)])])

In [34]:
def convert_to_reactive_resume(resume: Resume) -> Dict[str, Any]:
    sections_title = []
    for sec in resume.sections:
        section_name = sec.section_name.strip()
        sections_title.append(section_name)
        # for item in sec.items:
        #     data = {
        #             "name": item.title,
        #             "position": item.subtitle or "",
        #             "startDate": item.start_date or "",
        #             "endDate": item.end_date or "",
        #             "highlights": item.details or [],
        #             "summary": "",
        #             "url": "",
        #             "keywords": []
        #         }
    reactive = { "basics": {
                "name": "",
                "email": "",
                "phone": "",
                "location": {"city": "", "region": "", "country": ""},
                "url": "",
                "profiles": []
            },}
    for S in sections_title:
        reactive[S]=[]
    
    for section in resume.sections:
            section_name = section.section_name.strip()
    
            for item in section.items:
                data = {
                    "name": item.title,
                    "position": item.subtitle or "",
                    "startDate": item.start_date or "",
                    "endDate": item.end_date or "",
                    "highlights": item.details or [],
                    "summary": "",
                    "url": "",
                    "keywords": []
                }
                reactive[section_name].append(data)
    return reactive

In [35]:
convert_to_reactive_resume(res)

{'basics': {'name': '',
  'email': '',
  'phone': '',
  'location': {'city': '', 'region': '', 'country': ''},
  'url': '',
  'profiles': []},
 'Education': [{'name': 'Ph.D. in Computer Science',
   'position': 'Old Dominion University, VA, USA',
   'startDate': 'August 2019',
   'endDate': 'Present',
   'highlights': ['GPA: 3.84/4.0',
    'Research Interest: Natural Language Processing (NLP), Computer Vision (CV), Machine Learning (ML), Deep Learning (DL), Digital Libraries, and Scholarly Big Data'],
   'summary': '',
   'url': '',
   'keywords': []},
  {'name': 'Bachelor of Science in Computer Engineering',
   'position': 'Elizabethtown College, PA, USA',
   'startDate': 'August 2014',
   'endDate': 'May 2018',
   'highlights': ['GPA: 3.36/4.0', 'Minor: Information Systems'],
   'summary': '',
   'url': '',
   'keywords': []}],
 'Technical Skills': [{'name': 'Languages & Database',
   'position': '',
   'startDate': '',
   'endDate': '',
   'highlights': ['Python',
    'PHP',
    'C'