In [3]:
from docx import Document
import json

In [36]:
resume_dict = { "user_name": "Shashank Shashishekhar Reddy", "user_email": "shashankshashishekharreddy@gmail.com", "user_phone": "(510) 892-7191", "user_city": "San Jose, California", "user_linkedin_link": "https://www.linkedin.com/in/snkreddy", "user_summary": "Results-driven Data Engineer with extensive experience in administering and enhancing data infrastructure, developing ETL pipelines, and supporting statistical data analysis for manufacturing environments. Skilled in creating efficient data models and facilitating communication across engineering teams. Proficient in Python, SQL, and data visualization tools to deliver actionable insights that improve production performance and drive decision-making.", "user_skills": "Certificates: AWS Cloud Practitioner | Validation Code: QD8E074C82EQ1WCE Programming Languages: Python, C++, Java, R, SQL, Bash, JavaScript Cloud Services: AWS (EC2, S3, Lambda, Glue, Amazon Redshift), GCP, Azure Databases: MySQL, MongoDB, PostgreSQL, RDS, Elasticsearch. Tools: Hadoop, Spark, Airflow, Tableau, Pandas, Scikit-Learn, Keras, Docker, Terraform. Statistical Analysis: JMP, Origin.", "experiences": [ { "exp_company": "Data Engineer", "exp_role": "Kantar", "exp_start_date": "2024-12-03", "exp_end_date": "2024-12-04", "exp_description": "Administered and optimized existing data applications to improve production statistics including throughput and yield. Developed ETL pipelines using Apache Airflow and Spark, supporting data analysis for customer-oriented inquiries. Documented application functionality and provided training for end-users, facilitating a smooth user experience." }, { "exp_company": "Data Engineer", "exp_role": "The Sparks Foundation", "exp_start_date": "2024-12-11", "exp_end_date": "2024-12-12", "exp_description": "Engineered robust ETL processes to manage and process vast volumes of data from multiple sources, significantly enhancing data integration. Collaborated with engineering teams to support data inquiries related to product specifications and production processes, reducing data retrieval times by 30%." }, { "exp_company": "Data Engineer Intern", "exp_role": "The Sparks Foundation", "exp_start_date": "2024-12-04", "exp_end_date": "2024-12-12", "exp_description": "Supported the development of data pipelines and conducted thorough analysis to identify trends that inform decision-making. Assisted in optimizing SQL queries and automating data cleaning processes to ensure high data quality and reliability." } ], "projects": [ { "proj_name": "Your Own Cabs Analysis", "proj_start_date": "2024-12-04", "proj_end_date": "2024-12-05", "proj_description": "Developed data pipelines leveraging Kafka, Spark, and Hadoop to analyze booking and clickstream data. Implemented data models that improved data throughput by 15% and provided critical insights for decision-making, enhancing operational efficiency." }, { "proj_name": "Spar Nord Bank ETL", "proj_start_date": "2024-12-04", "proj_end_date": "2024-12-12", "proj_description": "Managed the ETL process for transactional data from MySQL RDS to Amazon Redshift. Conducted thorough data analysis to identify patterns in usage, optimizing ATM refueling processes and achieving a 20% reduction in unnecessary refills." } ], "education": [ { "edu_course": "Master of Science in Data Analytics", "edu_institution": "San Jose State University", "edu_start_date": "2024-12-02", "edu_end_date": "2024-12-11" }, { "edu_course": "PG Diploma in Data Science Specialization in Data Engineering", "edu_institution": "Visvesvaraya Technological University", "edu_start_date": "2024-12-03", "edu_end_date": "2024-12-04" }, { "edu_course": "Bachelor’s in Computer Science", "edu_institution": "Indian Institute of Information Technology", "edu_start_date": "2024-12-11", "edu_end_date": "2024-12-12" } ] }

In [37]:
resume_dict

{'user_name': 'Shashank Shashishekhar Reddy',
 'user_email': 'shashankshashishekharreddy@gmail.com',
 'user_phone': '(510) 892-7191',
 'user_city': 'San Jose, California',
 'user_linkedin_link': 'https://www.linkedin.com/in/snkreddy',
 'user_summary': 'Results-driven Data Engineer with extensive experience in administering and enhancing data infrastructure, developing ETL pipelines, and supporting statistical data analysis for manufacturing environments. Skilled in creating efficient data models and facilitating communication across engineering teams. Proficient in Python, SQL, and data visualization tools to deliver actionable insights that improve production performance and drive decision-making.',
 'user_skills': 'Certificates: AWS Cloud Practitioner | Validation Code: QD8E074C82EQ1WCE Programming Languages: Python, C++, Java, R, SQL, Bash, JavaScript Cloud Services: AWS (EC2, S3, Lambda, Glue, Amazon Redshift), GCP, Azure Databases: MySQL, MongoDB, PostgreSQL, RDS, Elasticsearch. T

In [271]:
from docx import Document
from docx.shared import Pt, Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_BREAK
from docx.enum.table import WD_ALIGN_VERTICAL
from docx.oxml import parse_xml
from docx.oxml.ns import nsdecls
from datetime import datetime
from typing import Dict, List, Any
from dataclasses import dataclass

@dataclass
class Section:
    title: str
    content_generator: callable

class ResumeGenerator:
    MARGIN = 0.5
    DATE_INPUT_FORMAT = '%Y-%m-%d'
    DATE_OUTPUT_FORMAT = '%b %Y'
    NAME_FONT_SIZE = 14
    DEFAULT_FONT_SIZE = 11

    def __init__(self, template_path: str):
        self.template = Document(template_path)
        self.document = Document()
        self._setup_margins()

    def _setup_margins(self) -> None:
        for section in self.document.sections:
            for margin in ['top_margin', 'bottom_margin', 'left_margin', 'right_margin']:
                setattr(section, margin, Inches(self.MARGIN))

    def _format_date(self, date_str: str) -> str:
        try:
            date_obj = datetime.strptime(date_str, self.DATE_INPUT_FORMAT)
            return date_obj.strftime(self.DATE_OUTPUT_FORMAT).capitalize()
        except ValueError as e:
            raise ValueError(f"Invalid date format. Expected YYYY-MM-DD, got {date_str}") from e

    def _add_bottom_border(self, paragraph) -> None:
        p = paragraph._p
        pPr = p.get_or_add_pPr()
        bottom = parse_xml(f'<w:pBdr {nsdecls("w")}><w:bottom w:val="single" w:sz="4" w:space="1" w:color="auto"/></w:pBdr>')
        pPr.append(bottom)

    def _add_paragraph(self, text: str = "", bold: bool = False, spacing: int = 1, 
                      font_size: int = DEFAULT_FONT_SIZE, align_center: bool = False,
                      add_border: bool = False) -> None:
        paragraph = self.document.add_paragraph()
        if text:
            run = paragraph.add_run(text)
            run.bold = bold
            font = run.font
            font.size = Pt(font_size)
            font.name = "Times New Roman" 
        if align_center:
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
        paragraph.paragraph_format.space_before = Pt(spacing)
        paragraph.paragraph_format.space_after = Pt(spacing)
        if add_border:
            self._add_bottom_border(paragraph)
        return paragraph

    def _add_two_column_table(self, left_text: str, right_text: str, bold: bool = False) -> None:
        table = self.document.add_table(rows=1, cols=2)
        table.autofit = True
        table.allow_autofit = True
        left_cell = table.cell(0, 0)
        left_paragraph = left_cell.paragraphs[0]
        left_run = left_paragraph.add_run(left_text)
        left_run.font.size = Pt(self.DEFAULT_FONT_SIZE)
        left_run.font.name = "Times New Roman"
        left_paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT
        left_paragraph.paragraph_format.space_before = Pt(0)
        left_paragraph.paragraph_format.space_after = Pt(0)

        right_cell = table.cell(0, 1)
        right_paragraph = right_cell.paragraphs[0]
        right_run = right_paragraph.add_run(right_text)
        right_run.font.size = Pt(self.DEFAULT_FONT_SIZE)
        right_run.font.name = "Times New Roman"
        right_paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
        right_paragraph.paragraph_format.space_before = Pt(0)
        right_paragraph.paragraph_format.space_after = Pt(0)

    def _add_header_section(self, data: Dict[str, Any]) -> None:
        self._add_paragraph(data['user_name'], bold=True, font_size=self.NAME_FONT_SIZE, align_center=True)
        contact_info = f"{data['user_city']} | {data['user_phone']} | {data['user_email']}"
        if data['user_linkedin_link']:
            contact_info += f" | {data['user_linkedin_link']}"
        self._add_paragraph(contact_info, align_center=True)
        self._add_paragraph("Summary", bold=True, add_border=True)
        self._add_paragraph(data['user_summary'])

    def _add_experience_section(self, experiences: List[Dict[str, Any]]) -> None:
        self._add_paragraph("Work Experience", bold=True, add_border=True)
        for exp in experiences:
            left_text = f"{exp['exp_role']} | {exp['exp_company']}"
            right_text = f"{self._format_date(exp['exp_start_date'])} - {self._format_date(exp['exp_end_date'])}"
            self._add_two_column_table(left_text, right_text, bold=True)
            description_points = exp['exp_description'].split('.')
            for point in description_points:
                if point.strip():
                    self._add_paragraph(f"• {point.strip()}", spacing=1)

    def _add_skills_section(self, skills: str) -> None:
        self._add_paragraph("Skills", bold=True, add_border=True)
        for skill in skills.split('\n'):
            if skill.strip():
                self._add_paragraph(f"{skill.strip()}.", spacing=1)

    def _add_projects_section(self, projects: List[Dict[str, Any]]) -> None:
        self._add_paragraph("Academic Projects", bold=True, add_border=True)
        for project in projects:
            self._add_two_column_table(
                project['proj_name'],
                f"{self._format_date(project['proj_start_date'])} - {self._format_date(project['proj_end_date'])}",
                bold=True
            )
            description_points = project['proj_description'].split('.')
            for point in description_points:
                if point.strip():
                    self._add_paragraph(f"• {point.strip()}", spacing=1)

    def _add_education_section(self, education: List[Dict[str, Any]]) -> None:
        self._add_paragraph("Education", bold=True, add_border=True)
        for edu in education:
            edu_institution_abbr = "".join([word[0] for word in edu['edu_institution'].split(" ") if not word[0].islower()])
            self._add_two_column_table(
                f"{edu['edu_course']} | {edu_institution_abbr}",
                f"{self._format_date(edu['edu_start_date'])} - {self._format_date(edu['edu_end_date'])}",
                bold=True
            )

    def generate(self, data: Dict[str, Any], output_path: str) -> None:
        # Set the default font for the document
        style = self.document.styles['Normal']
        font = style.font
        font.name = "Times New Roman"  # Set default font to Times New Roman
        font.size = Pt(self.DEFAULT_FONT_SIZE)
        sections = [
            Section("Header", lambda: self._add_header_section(data)),
            Section("Experience", lambda: self._add_experience_section(data['experiences'])),
            Section("Skills", lambda: self._add_skills_section(data['user_skills'])),
            Section("Projects", lambda: self._add_projects_section(data['projects'])),
            Section("Education", lambda: self._add_education_section(data['education']))
        ]
        for section in sections:
            try:
                section.content_generator()
            except Exception as e:
                raise ValueError(f"Error generating {section.title} section: {str(e)}")
        style = self.document.styles['Normal']
        style.paragraph_format.line_spacing = 1
        self.document.save(output_path)

def fill_resume(sourcepath: str, destpath: str, data: Dict[str, Any]) -> None:
    try:
        generator = ResumeGenerator(sourcepath)
        generator.generate(data, destpath)
    except Exception as e:
        raise Exception(f"Failed to generate resume: {str(e)}")


In [272]:
source = "/Users/reddy/Documents/GitHub/FastApply/FastApply/inputs/template.docx"
dest = "/Users/reddy/Documents/GitHub/FastApply/FastApply/outputs/test.docx"

In [273]:
fill_resume(source, dest, resume_dict)

In [282]:
input_string = "Certificates: AWS Cloud Practitioner | Validation Code: QD8E074C82EQ1WCE Programming Languages: Python, Java, C++, SQL, R, Bash Cloud Services: AWS (EC2, S3, Lambda, Glue, Amazon RDS, Amazon Redshift), GCP, Azure Databases: MySQL, PostgreSQL, MongoDB, RDS, Elasticsearch. Tools: Hadoop, Spark, PySpark, Airflow, Kafka, Pandas, Scikit-Learn, Keras, Tableau, Docker, Terraform.."

data_dict = {}

# Split the input string into lines by looking for key-value pairs separated by a colon
items = input_string.split(' ')
for item in items:
    print(item)
    # if ': ' in item:
    #     key, value = item.split(': ', 1)
    #     data_dict[key.strip()] = value.strip()

# print(data_dict)

Certificates:
AWS
Cloud
Practitioner
|
Validation
Code:
QD8E074C82EQ1WCE
Programming
Languages:
Python,
Java,
C++,
SQL,
R,
Bash
Cloud
Services:
AWS
(EC2,
S3,
Lambda,
Glue,
Amazon
RDS,
Amazon
Redshift),
GCP,
Azure
Databases:
MySQL,
PostgreSQL,
MongoDB,
RDS,
Elasticsearch.
Tools:
Hadoop,
Spark,
PySpark,
Airflow,
Kafka,
Pandas,
Scikit-Learn,
Keras,
Tableau,
Docker,
Terraform..


In [280]:
type(data_dict)

dict

In [274]:
new_resume_dict = { "user_name": "Shashank Shashishekhar Reddy", "user_email": "shashankshashishekharreddy@gmail.com", "user_phone": "(510) 892-7191", "user_city": "San Jose, California", "user_linkedin_link": "https://www.linkedin.com/in/snkreddy", "user_summary": "Data Engineer with extensive experience in designing, deploying, and managing data applications and infrastructures to support high-throughput manufacturing environments. Proficient in developing complex data retrieval solutions and conducting statistical analyses to drive data-driven insights for production optimization. Highly skilled in collaborating with cross-functional teams to adapt data solutions to evolving business needs, ensuring scalability and efficiency in manufacturing processes.", "user_skills": "Certificates: AWS Cloud Practitioner | Validation Code: QD8E074C82EQ1WCE Programming Languages: Python, Java, C, C++, R, SQL, Bash, JavaScript Cloud Services: AWS (EC2, S3, Lambda, Glue), GCP, Azure Databases: MySQL, MongoDB, PostgreSQL, RDS, Elasticsearch. Tools: Hadoop, Spark, Sqoop, Hive, PySpark, Kafka, Docker, Terraform, JMP.", "experiences": [ { "exp_company": "Kantar", "exp_role": "Data Engineer", "exp_start_date": "2024-12-03", "exp_end_date": "2024-12-04", "exp_description": "Administered and enhanced data applications using Apache Spark and Airflow to monitor and optimize production processes for ads and sales data. Developed models for key statistics, ensuring sound documentation for functionalities and procedures. Facilitated training sessions for end-users, improving understanding of data applications which resulted in better data utilization across teams." }, { "exp_company": "The Sparks Foundation", "exp_role": "Data Engineer", "exp_start_date": "2024-12-11", "exp_end_date": "2024-12-12", "exp_description": "Engineered and maintained ETL pipelines to consolidate various data sources, yielding a 25% improvement in data integration. Collaborated closely with engineering teams to provide data analysis supporting production processes, achieving a 30% reduction in retrieval times through optimized querying and indexing strategies. Generated dashboards for stakeholder insights through real-time data visualization." }, { "exp_company": "The Sparks Foundation", "exp_role": "Data Engineer Intern", "exp_start_date": "2024-12-04", "exp_end_date": "2024-12-12", "exp_description": "Supported the administration of data applications and contributed to the seamless integration of new data sources. Assisted in monitoring production data analytics to inform engineering decisions. Enhanced data processing efficiency by automating data cleaning processes, thus improving the accuracy of data insights." } ], "projects": [ { "proj_name": "Your Own Cabs Analysis", "proj_start_date": "2024-12-04", "proj_end_date": "2024-12-05", "proj_description": "Designed and implemented data pipelines for booking and clickstream analysis using Kafka, Spark, and Hadoop. Enhanced data ingestion processes, processing clickstream data efficiently into AWS components, and generating actionable insights to optimize customer engagement and service quality." }, { "proj_name": "Spar Nord Bank ETL", "proj_start_date": "2024-12-04", "proj_end_date": "2024-12-12", "proj_description": "Managed ETL processes for Spar Nord Bank’s transactional data, delivering optimized data flow into Amazon Redshift. Conducted comprehensive analyses to highlight usage patterns, influencing operational strategies for ATM refilling and enhancing overall efficiency." } ], "education": [ { "edu_course": "Master of Science in Data Analytics", "edu_institution": "SJSU", "edu_start_date": "2024-12-02", "edu_end_date": "2024-12-11" }, { "edu_course": "PG Diploma in Data Science Specialization in Data Engineering", "edu_institution": "VTU", "edu_start_date": "2024-12-03", "edu_end_date": "2024-12-04" }, { "edu_course": "Bachelor’s in Computer Science", "edu_institution": "IIIT", "edu_start_date": "2024-12-11", "edu_end_date": "2024-12-12" } ] }

In [275]:
new_resume_dict

{'user_name': 'Shashank Shashishekhar Reddy',
 'user_email': 'shashankshashishekharreddy@gmail.com',
 'user_phone': '(510) 892-7191',
 'user_city': 'San Jose, California',
 'user_linkedin_link': 'https://www.linkedin.com/in/snkreddy',
 'user_summary': 'Data Engineer with extensive experience in designing, deploying, and managing data applications and infrastructures to support high-throughput manufacturing environments. Proficient in developing complex data retrieval solutions and conducting statistical analyses to drive data-driven insights for production optimization. Highly skilled in collaborating with cross-functional teams to adapt data solutions to evolving business needs, ensuring scalability and efficiency in manufacturing processes.',
 'user_skills': 'Certificates: AWS Cloud Practitioner | Validation Code: QD8E074C82EQ1WCE Programming Languages: Python, Java, C, C++, R, SQL, Bash, JavaScript Cloud Services: AWS (EC2, S3, Lambda, Glue), GCP, Azure Databases: MySQL, MongoDB, Po