In [None]:
import os
import time
from docx import Document
from pdfminer.high_level import extract_text  
from transformers import T5Tokenizer, T5ForConditionalGeneration

tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large")

In [None]:




def count_tokens(text: str) -> int:
    token_ids = tokenizer.encode(text, add_special_tokens=False)
    num_tokens = len(token_ids)
    return num_tokens 

def extract_text_from_docx(docx_path):
    doc = Document(docx_path)
    text = "\n".join([para.text for para in doc.paragraphs])
    return text

def extract_text_from_pdf_pdfminer(pdf_path):
    text = extract_text(pdf_path)
    return text
def process_resume(text, question):
    input_text_template = f"""You are a resume parser. Extract the correct response from the resume for the following question:
                            Based on the following resume, answer the question:
                            
                            Resume:
                            {{resume_text}}
                            
                            Question: {question}
                            Answer:
                            """

    input_text = input_text_template.format(resume_text=text)
    input_ids = tokenizer.encode(input_text, return_tensors="pt")

    sample_outputs = model.generate(
        input_ids,
        do_sample=True,
        max_length=500,
        top_k=50,
        top_p=0.95,
        temperature=0.3, 
        num_return_sequences=1
    )

    return tokenizer.decode(sample_outputs[0], skip_special_tokens=True)

def extract_details_from_resume(resume_text):
    question = "What are the personal information details from the given resume ? (Include Name, Phone Number, Email ID, and Location)"
    
    start_time = time.time()
    response = process_resume(resume_text, question)
    end_time = time.time()
    
    return response

resumes_folder = 'Resumes'

overall_start_time = time.time()
for filename in os.listdir(resumes_folder):
    file_path = os.path.join(resumes_folder, filename)
    if filename.endswith('.pdf'):
        resume_text = extract_text_from_pdf_pdfminer(file_path)
    elif filename.endswith('.docx'):
        resume_text = extract_text_from_docx(file_path)
    else:
        print(f"Unsupported file format for {filename}")
    
    start_time = time.time()
    details = extract_details_from_resume(resume_text)
    end_time = time.time()
    print(f"Resume: {filename}")
    
    print("Total Tokens: ",count_tokens(resume_text))
    print("Personal Information:", details)  
    print(f"Time taken to generate response: {end_time - start_time:.4f} seconds")
    print("\n" + "=" * 50 + "\n")

overall_end_time = time.time()
print("-----------------")
print(f"Overall Time taken to generate response: {overall_end_time - overall_start_time:.4f} seconds")
print("-----------------")
