In [1]:
from langchain_community.document_loaders import Docx2txtLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_vertexai import ChatVertexAI

In [25]:
policy_prompt = ChatPromptTemplate.from_messages([
    ("system", 
     "you are an HR policy writer creating policies"
     "you will recieve an HR POLICY Template which is extracted from a DOCX file.\n"
     "Generate a New policy that follows same structure/headings style, but write ORIGINAL content.\n"
     "Rules:\n"
     "- Do not copy long phrases verbatiam from the template"
     "- Generate a concise and Well-structure policy"),
     ("user", 
      "TEMPLATE (reference): \n---\n{template_text}"
      "Generate policy:\n"
      "- company: {company_name}\n"
      "- company size: {company_size}\n"
      "- tone: {tone} \n"
      "- company category: {company_category}\n"
      "- country context: {country}\n"
      "Return only Markdown")

])

In [28]:
loader = Docx2txtLoader("../docs/Policy/CompanyPolicies/AttendancePolicy.docx")
docs = loader.load()
template_text = "\n\n".join(doc.page_content for doc in docs)

In [31]:
print(template_text)

Attendance Policy



1. OVERVIEW



Each employee at [Company Name] is responsible for punctual and consistent attendance. Employees should arrive on time, be prepared to work, and on schedule. Employees are also expected to stay at work for the whole of their shift. It is inconvenient to arrive late, leave early, or miss other scheduled hours, and it must be avoided.



This policy does not apply to FMLA-covered (FMLA - Family and Medical Leave Act) absences or leave taken as a reason

able accommodation under the Americans with Disabilities Act (ADA). They have their own policies that cover these exceptions.



2. OBJECTIVE

The goal of this policy is to lay out [Company Names] policies and processes for dealing with employee absences and tardiness in order to increase the company's efficiency and reduce unscheduled absences.



3. ATTENDANCE INFRACTIONS CALCULATION



Absent with calls - 1 Point 

Absent with no calls - 2 Points 

Tardy - ½ Point

Early Departure - ½ Point

Returnin

In [None]:
print(template_text)

In [32]:
llm = ChatVertexAI(
    model_name="gemini-2.5-flash",
    temperature=0.2,
    max_output_tokens=4096,
)

  llm = ChatVertexAI(


In [33]:
chain = policy_prompt | llm
response = chain.invoke(
    {
        "template_text": template_text,
        "company_name": "Acme Corp",
        "company_size": "10000 employees",
        "tone": "professional",
        "company_category": "Information Technology",
        "country": "India"
    })

In [34]:
response.pretty_print()


# Attendance Policy

## 1. OVERVIEW

At Acme Corp, consistent and punctual attendance is fundamental to our operational efficiency and team collaboration. Every employee is expected to arrive at their workstation on time, prepared to commence work, and remain for their entire scheduled shift. Unscheduled absences, tardiness, or early departures disrupt workflow and impact team productivity, and as such, must be minimized.

This policy outlines the company's expectations regarding attendance and punctuality. It does not supersede or apply to absences covered under specific statutory leaves (e.g., Maternity Benefit Act, Rights of Persons with Disabilities Act) or other company-approved leaves, which are governed by their respective policies.

## 2. OBJECTIVE

The primary objective of this policy is to establish clear guidelines and procedures for managing employee attendance and addressing instances of tardiness or absence. This aims to foster a culture of accountability, ensure optimal

In [35]:
import os

def combine_dir_with_markdown(dir_path, docx_path):
    # Extract filename from docx path
    filename = os.path.basename(docx_path)
    
    # Remove extension and convert to markdown name
    name_without_ext = os.path.splitext(filename)[0]
    markdown_name = name_without_ext.replace(" ", "_") + ".md"
    
    # Combine directory path with markdown filename
    return os.path.join(dir_path, markdown_name)

In [37]:
from langchain_community.document_loaders import DirectoryLoader
chain = policy_prompt | llm

directory_loader = DirectoryLoader(
    "../docs/Policy/CompanyPolicies",
    glob="*.docx",
    loader_cls=Docx2txtLoader)

docs = directory_loader.load()
for doc in docs:
    template_text = "\n\n".join(doc.page_content for doc in docs)
    path = combine_dir_with_markdown(
        "../generated_data", doc.metadata['source'])
    response = chain.invoke(
        {
            "template_text": template_text,
            "company_name": "Acme Corp",
            "company_size": "10000 employees",
            "tone": "professional",
            "company_category": "Information Technology",
            "country": "India"
        })
    with open(path, 'w') as f:
        f.write(response.content)