In [7]:
from googleapiclient.discovery import build
from google.oauth2 import service_account
import io
from googleapiclient.http import MediaIoBaseDownload
import pdfplumber
import docx

# 🔹 Load credentials from your Google Service Account
SERVICE_ACCOUNT_FILE = "tensile-axiom-425220-d2-8cb1f313d613.json"
SCOPES = ["https://www.googleapis.com/auth/drive"]

creds = service_account.Credentials.from_service_account_file(
    SERVICE_ACCOUNT_FILE, scopes=SCOPES
)

# 🔹 Initialize the Google Drive API service
drive_service = build("drive", "v3", credentials=creds)

# 🔹 Define the folder ID where resumes are stored
FOLDER_ID = "1oTBOho6yIrxqdk5RCe6QPuEhxhEOYNJ2"  # Replace with your actual folder ID

# 🔹 Function to recursively fetch all files from a folder (including subfolders)
def get_all_files_from_folder(folder_id):
    all_files = []

    # Step 1: Fetch files inside the folder
    query = f"'{folder_id}' in parents and (mimeType='application/pdf' or mimeType='application/vnd.openxmlformats-officedocument.wordprocessingml.document')"
    results = drive_service.files().list(q=query, fields="files(id, name, mimeType)").execute()
    files = results.get("files", [])

    all_files.extend(files)  # Add found files

    # Step 2: Fetch subfolders
    subfolder_query = f"'{folder_id}' in parents and mimeType='application/vnd.google-apps.folder'"
    subfolder_results = drive_service.files().list(q=subfolder_query, fields="files(id, name)").execute()
    subfolders = subfolder_results.get("files", [])

    # Step 3: Recursively fetch files from subfolders
    for subfolder in subfolders:
        subfolder_id = subfolder["id"]
        subfolder_name = subfolder["name"]
        print(f"📁 Searching in subfolder: {subfolder_name}")
        all_files.extend(get_all_files_from_folder(subfolder_id))  # Recursive call

    return all_files

# 🔹 Function to extract text from a Google Drive file (without downloading)
def extract_text_from_drive_file(file_id, mime_type):
    request = drive_service.files().get_media(fileId=file_id)
    file_stream = io.BytesIO()
    downloader = MediaIoBaseDownload(file_stream, request)
    
    done = False
    while not done:
        _, done = downloader.next_chunk()
    
    file_stream.seek(0)
    
    # Process PDF
    if mime_type == "application/pdf":
        with pdfplumber.open(file_stream) as pdf:
            text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
        return text
    
    # Process DOCX
    elif mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
        doc = docx.Document(file_stream)
        return "\n".join([para.text for para in doc.paragraphs])

    return ""

# 🔹 Fetch all resumes & extract text directly from Google Drive (including subfolders)
resume_files = get_all_files_from_folder(FOLDER_ID)
resume_texts = {}

print(f"✅ Found {len(resume_files)} resumes including subfolders!")

for file in resume_files:
    text = extract_text_from_drive_file(file["id"], file["mimeType"])
    if text:
        resume_texts[file["name"]] = text
        print(f"✅ Processed: {file['name']}")

# 🔹 Print a summary
print(f"\n✅ Extracted text from {len(resume_texts)} resumes!")

# (Optional) Save extracted text to a file
with open("cleaned_resumes.txt", "w", encoding="utf-8") as f:
    for filename, text in resume_texts.items():
        f.write(f"### {filename} ###\n{text}\n\n")

print("✅ Cleaned resumes saved!")


📁 Searching in subfolder: PM_resumes&portfolio
📁 Searching in subfolder: Bhavishya Agarwal
📁 Searching in subfolder:  Portfolio
📁 Searching in subfolder: Resume
📁 Searching in subfolder: Vandhana_Comandur_PM
📁 Searching in subfolder: Utkarsh_Bindal_APM
📁 Searching in subfolder: Sumeet Karwa APM
📁 Searching in subfolder: Sumeet APM Portfolio
📁 Searching in subfolder: Raghuraman_Rajendran_PM
📁 Searching in subfolder: Aditya_Sisodia_PM
📁 Searching in subfolder: Himanshi_APM
📁 Searching in subfolder: Pratik Ailani_ APM
📁 Searching in subfolder: Aradhya_Mishra_APM
📁 Searching in subfolder: Aradhya_Mishra_APM_resume
📁 Searching in subfolder: Pavneet_Kaur_PM_Portfolio
📁 Searching in subfolder: Meghana_PM
📁 Searching in subfolder: Aniruddh_Solanki_APM
📁 Searching in subfolder: Riti S_PM
📁 Searching in subfolder: Vaibhav_Mehta_PM_Resume
📁 Searching in subfolder: Aryan_Pasricha_APM
📁 Searching in subfolder: Wesley_APM_Resume
📁 Searching in subfolder: AnkitJha_APM
📁 Searching in subfolder: Mukesh

CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: a AnmolAgrawalPM.pdf
✅ Processed: Bhavan_PGP24039_V3.docx
✅ Processed: CV_Rachit.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Prayansh_Srivastava_PM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Kanika_Behl_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Hemant_kushwah_APM_resume.pdf
✅ Processed: Pamli_Product Manager(2+ yr exp).pdf
✅ Processed: Avi_Gupta_PM_resume.docx
✅ Processed: Ayushi_Dantulwar_PM_Resume.docx


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Siddhant Malani- Resume- Product Roles.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Aryan_Pasricha_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Prachi_Singh_APM_Resume.pdf
✅ Processed: Ramya_Jayaraman_APM_resume.docx


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Jitender_Jagarwal_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: CV_Apurva_Deshpande.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Sumeet_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Kavita_Adhikari_APM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: . AnkitJha_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Sharvani Jadhav resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Rahul_Sharma_PM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Harshit Garg(2).pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Nitin_Thakur_PM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: DEEPIKA_GUGULOTHU_PM_RESUME.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Avi_Gupta_PM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Ankit_PM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Rahul_Sharma_PM_Portfolio.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Shaino_Sajimon_PM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Ayushi_Dantulwar_PM_Resume.pdf
✅ Processed: Aditya_Sisodia_Resume_PM.docx
✅ Processed: Anurag Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Hetal_Verma_PM_Resume.pdf
✅ Processed: Pulkit_Prakash_PM.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Mohammed Istiyaq_PM_Resume & Portfolio Link.pdf
✅ Processed: Anmol_Rattan_PM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Nihar_PM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Sohini_Chakraborty_PM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Aadesh_Mittal_PM_Resume.pdf
✅ Processed: AntrikshKhandegar-IITBombay.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Arpit Jain_PM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Swapnil_Bukshete_PM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Zeeshan_Anjum_PM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Ashish Kumar Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Monica_Yeluri_PM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Prakhar_Prashant_PM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Aiushe_Mishra_PM_Resume.pdf
✅ Processed: Tejas_Nagpure_PM_resume.docx


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Aditya_Sisodia_Resume_PM.pdf
✅ Processed: Swapnil_Kale_PM_Resume.docx
✅ Processed: Swati_Mishra_PM_Resume.docx
✅ Processed: Manish_PM_resume.pdf
✅ Processed: Joshan_Babu_PM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Prapti_Jain_PM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Riya_Diwan_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Vaibhav_ Gupta_PM_resume.pdf
✅ Processed: Prithviraj Singh.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Prashant_Dhiman_PM_Resume.pdf
✅ Processed: Prashansa_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: SatishBabuSanjamala_Sr Engineering Manger.pdf
✅ Processed: Arinjit_Resume_Oct2023_C.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Vanshdeep_Madan_PM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Pratik_Wankhede_PM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Titiksha_Ganjoo_PM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Krishna_Chaitanya_PM_ Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: PM _Palak Jain.pdf
✅ Processed: Prasad_Tambekar_PM_Resume.docx


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: CV - Tamanna Yadav.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Rucha_Sonje_PM_Resume.pdf
✅ Processed: Navneet_Singh_PM_Resume.docx


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Sakshi Chandhok_PM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Divya Rani Lakra_PM_Resume.pdf
✅ Processed: Shubham_PM_Resume.docx
✅ Processed: Akshita Singh Bais_PM_Resume.docx
✅ Processed: Rajkumar_PM_Resume.pdf
✅ Processed: Product CV Komal.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Daksh_Dudhat_PM.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Rahul_Sikder-PM.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Abhishek_Singh_PM_Resume.pdf
✅ Processed: Roshan_Borkar_PM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Anuj Goyal Resume for PM _ SPM Profiles.pdf
✅ Processed: Marshal Tavakar Product Manager Resume-9680018108.docx
✅ Processed: Sashank_Yadavilli_PM.pdf
✅ Processed: Akshita Singh Bais_PM_Resume.pdf
✅ Processed: Rashikant_Raj_Product_Manager_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Ranajna_kumtekar_PM.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Shashwat_Saxena_PM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Anish_Kapoor_PM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Nikhil_Mahadik_PM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Shubhi_Arora_PM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Priyanka_Thakran_PM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Tejas_Nagpure_PM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: MisalAvinashCV.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Shubham_PM_Resume.pdf
✅ Processed: Aniruddha_Rao_PM_Resume.pdf
✅ Processed: Shikha Jain - PM Resume.pdf
✅ Processed: EatClub.pdf
✅ Processed: Book My Show.pdf
✅ Processed: Quick Ride.pdf
✅ Processed: SWIGGY.pdf
✅ Processed: Bhavishya Agarwal-Monetization Features on Paytm Insider.pdf
✅ Processed: bhavishya.pdf
✅ Processed: Vandhana_Comandur_PM_Resume.docx


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Vandhana_Comandur_PM_Resume.pdf
✅ Processed: Vandhana_Comandur_PM_Portfolio.pdf
✅ Processed: Product Portfolio Link.docx


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Resume_Utkarsh.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Sumeet APM Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: IITKGP Flipkart APM [Shortlisted].pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Using VR to Solve Trade Challenges for DPWorld @BigTechProject.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: First attempt at building_ WayOutCareers.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Product Case_ Building loyalty programs for Indian SMBs.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Building for the future of logistics as a VR Product Management Intern @DPWorld.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Product Case_ Consumer problems in the Indian consumer finance space_ family neo-banking.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Updated_ Links to all work_portfolio.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Raghu_resume.pdf
✅ Processed: Aditya_Sisodia _Portfolio_PM.docx


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Aditya_Sisodia _Portfolio_PM.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Aditya_Sisodia_Resume_PM.pdf
✅ Processed: HIMANSHI _cv - nov 2023.pdf
✅ Processed: Gokul Badrinarayanan Aug 2023.pdf
✅ Processed: PM Portfolio- Pratik Ailani.docx


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: PM Portfolio- Pratik Ailani.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Pratik ailani resume.docx (2).pdf
✅ Processed: Aradhya_Mishra_APM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, def

✅ Processed: GoodReads_PMSchool_Pavneet.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: 2111478_Pavneet Kaur.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Pavneet Kaur_IIMB_Paytm.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: 2111478_Pavneet Kaur (1).pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: AppDynamics_Pavneet Kaur_2111478.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Pavneet Kaur_ChatGPT_MS.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Meghana_ Portfolio.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Meghana_ Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Aniruddh_Solanki_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Program Manager & Business Analyst with 10+ Work Exp.docx (1).pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Vaibhav Mehta Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Aryan_Pasricha_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Wesley_APM_Portfolio.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Wesley - Product Management.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: AnkitJha_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, def

✅ Processed: Homesfy_Assignment_Ankit_Jha.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, def

✅ Processed: MyFavouriteProduct.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, def

✅ Processed: SwiggyMini_CaseStudy.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Mukesh_PM_Portfolio_Link.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Mukesh_PM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Abhinand_PM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: _Sudheer_Gurram_PM_Portfolio.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: _Sudheer_Gurram_PM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Hitesh_Gawhade_Resume_Updated.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Hemant_kushwah_APM_resume.pdf.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Hemendra_Pratap_Singh_SPM_Resume.pdf
✅ Processed: Rajeshwari_Kini_SPM.docx
✅ Processed: Prateek_Gahlowt_SPM_Resume.docx
✅ Processed: SnehalDamale_SPM.docx
✅ Processed: Rahul_Bareja_SPM_resume.docx


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Avi_Gupta_SPM_resume.pdf
✅ Processed: Avi_Gupta_SPM_resume.docx
✅ Processed: IshanGupta_SPM_Resume.docx
✅ Processed: Hariharan V Product Management Resume.docx


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Prateek_Gahlowt_SPM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Rajeshwari_Kini_SPM.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Sharvani Jadhav resume.pdf
✅ Processed: Rahul_Bareja_SPM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Nitin_Thakur_SPM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Nitin Bhat Resume_Product Manager_1.0.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Rahul_SPM_Resume.pdf
✅ Processed: Subhrajyoti_Saha_SPM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Kalpana_Sharma_SPM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Zeeshan_Anjum_SPM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Prabu_Venkatraman_Resume_SPM_Roles_US_Remote.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Prakhar_Prashant_SPM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Vaibhav_ Gupta_SPM_resume.pdf
✅ Processed: Satyendra_Talreja_SPM_resume .pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Mandar Chiplunkar_IITB_Resume_N.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Mihir_Patel_SPM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Vanshdeep_Madan_SPM_resume.pdf
✅ Processed: Sr Product_Manager.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Nupur_Tyagi_SPM_Resume.pdf
✅ Processed: Raina_Singh_SPM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: IshanGupta_SPM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Samiksha_Seth_SPM_Resume.pdf
✅ Processed: Product CV Komal.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Binit Kumar_SPM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Kamal_Chelani_SPM.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: SnehalDamale_SPM.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Anuj_Garg_SPM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Rohit SPM Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Hariharan V Product Management Resume.pdf
✅ Processed: Resume_ChiragSaini_SPM.pdf
✅ Processed: Resume - Siddhant Sahni 31.08.2023.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Nishant_Raut_-_Senior_Technical_Product_Manager_Oracle.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Vishal_Ponugoti_SPM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Raj_Roy_SPM.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Rishabh_Singh_SPM_resume.pdf
✅ Processed: Vandhana_Comandur_SPM_Portfolio.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Vandhana_Comandur_SPM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Shaino_Sajimon_APM_Resume.pdf
✅ Processed: Sunidhi_Kashyap_APM_Portfolio.docx


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Zaid_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Zaid_APM_Portfolio.pdf
✅ Processed: Pulkit_Prakash_APM.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Nitin_Kumar_APM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Prashant_Katiyar_APM_Resume.pdf
✅ Processed: Shubham_APM_Resume.docx


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Ramesh_Kumar_Saragadam_APM_portfolio.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Ramesh_Kumar_Saragadam_APM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Monalisa_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Sunidhi_Kashyap_APM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Nancy_Pewal_APM.pdf
✅ Processed: NISHI_MALL_CV.pdf
✅ Processed: NISHI_MALL_RESUME.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Nishi_Mall_Resume(1).pdf
✅ Processed: Nishi_Mall_APM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Rahul_Sikder-APM.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Resume -Sankalp Singhal.pdf
✅ Processed: Sashank_Yadavilli_APM.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Rishabh_Malhotra_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Nikhil_Mahadik_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Nikhil_Mahadik_APM_Resume(1).pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Santhosh_APM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Shivam_Pandey_APM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Tejas_Nagpure_APM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Shubham_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: ParulShrivastava_APM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Sneha_Singh_APM_resume.pdf
✅ Processed: Yash_Sharma_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Santosh @CV( NIT Jaipur).pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Resume_Aniket Gaonkar_APM.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: VivekSingh_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Navya Sharma_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Pooja_TumkurRavishankar_APM_resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, def

✅ Processed: Rama krishna Thanneru RESUME.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Riddhish_Patel_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: SAhuja_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Rishikesh Parik Resume 2023.docx.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Mohan_Shivaprakash_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Venkat Pasumarthi_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Prateek_APM_Resume.pdf


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


✅ Processed: Suman_Sahu_APM_resume.pdf

✅ Extracted text from 239 resumes!
✅ Cleaned resumes saved!


In [8]:
pip install pdfplumber

Note: you may need to restart the kernel to use updated packages.


In [4]:
!pip uninstall python-docx
!pip install python-docx



Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Collecting lxml>=3.1.0 (from python-docx)
  Downloading lxml-5.3.1-cp313-cp313-win_amd64.whl.metadata (3.8 kB)
Downloading python_docx-1.1.2-py3-none-any.whl (244 kB)
Downloading lxml-5.3.1-cp313-cp313-win_amd64.whl (3.8 MB)
   ---------------------------------------- 0.0/3.8 MB ? eta -:--:--
   ---------- ----------------------------- 1.0/3.8 MB 5.6 MB/s eta 0:00:01
   ------------------------ --------------- 2.4/3.8 MB 6.1 MB/s eta 0:00:01
   ----------------------------------- ---- 3.4/3.8 MB 5.7 MB/s eta 0:00:01
   ----------------------------------- ---- 3.4/3.8 MB 5.7 MB/s eta 0:00:01
   ---------------------------------------- 3.8/3.8 MB 4.1 MB/s eta 0:00:00
Installing collected packages: lxml, python-docx
Successfully installed lxml-5.3.1 python-docx-1.1.2


In [9]:
import os
import pdfplumber
from docx import Document


# 📌 Step 1: Define Resume Folder
RESUME_FOLDER = "/content/drive/Othercomputers/My Laptop/Desktop/Placements/CV/PM resumes & portfolios-20240210T075719Z-001"

# 📌 Step 2: Recursively Find All Resumes (PDF & DOCX)
all_files = []
for root, _, files in os.walk(RESUME_FOLDER):
    for file in files:
        if file.endswith((".pdf", ".docx")):
            all_files.append(os.path.join(root, file))

print(f"✅ Found {len(all_files)} resumes!")

# 📌 Step 3: Define Functions to Extract Text
def extract_text_from_pdf(pdf_path):
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            extracted = page.extract_text()
            if extracted:
                text += extracted + "\n"
    return text

def extract_text_from_docx(docx_path):
    doc = Document(docx_path)
    return "\n".join([para.text for para in doc.paragraphs])

# 📌 Step 4: Extract Text from All Resumes
resume_texts = []
for file_path in all_files:
    if file_path.endswith(".pdf"):
        extracted_text = extract_text_from_pdf(file_path)
    elif file_path.endswith(".docx"):
        extracted_text = extract_text_from_docx(file_path)
    else:
        continue  # Skip files that are not PDFs or DOCX

    if extracted_text.strip():  # Ensure it's not empty
        resume_texts.append(extracted_text)

print(f"✅ Extracted text from {len(resume_texts)} resumes!")

# 📌 Step 5: (Optional) Save All Extracted Text into a Single File
all_resumes_combined = "\n\n".join(resume_texts)

with open("all_resumes.txt", "w", encoding="utf-8") as f:
    f.write(all_resumes_combined)

print("✅ Saved all extracted text into 'all_resumes.txt'!")


✅ Found 0 resumes!
✅ Extracted text from 0 resumes!
✅ Saved all extracted text into 'all_resumes.txt'!


In [6]:
python -c "import os, sys; print(os.path.dirname(sys.executable))"

SyntaxError: invalid syntax (861653967.py, line 1)

In [None]:
import re
import spacy

# 📌 Step 6: Read Extracted Resumes from File
input_file = "all_resumes.txt"
output_file = "cleaned_resumes.txt"

with open(input_file, "r", encoding="utf-8") as f:
    resume_texts = f.read().split("\n\n")  # Assuming resumes are separated by double newlines

print(f"✅ Loaded {len(resume_texts)} resumes from '{input_file}'!")

# 📌 Step 7: Load spaCy NLP Model for Name Detection
nlp = spacy.load("en_core_web_sm")  # Uses a pre-trained English NER model

# 📌 Step 8: List of Common Indian Names (to enhance detection)
common_indian_names = set([
    "Amit", "Ravi", "Ananya", "Rahul", "Neha", "Rajesh", "Priya", "Vikram",
    "Deepak", "Anil", "Kiran", "Pooja", "Kumar", "Singh", "Sharma", "Iyer",
    "Varun", "Suresh", "Harish", "Sunita", "Jaya", "Alok", "Manoj", "Santosh"
])

# 📌 Step 9: Function to Remove PII (Emails, Phones, and Names)
def remove_pii(text):
    # Remove emails
    text = re.sub(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", "[EMAIL_REMOVED]", text)

    # Remove phone numbers (common formats: +91 XXXXX XXXXX, (123) 456-7890, etc.)
    text = re.sub(r"\+?\d{1,3}[\s-]?\(?\d{2,4}\)?[\s-]?\d{3,4}[\s-]?\d{3,4}", "[PHONE_REMOVED]", text)

    # Apply NLP for name detection
    doc = nlp(text)
    for ent in doc.ents:
        if ent.label_ == "PERSON" or ent.text in common_indian_names:
            text = text.replace(ent.text, "[NAME_REMOVED]")

    return text

# 📌 Step 10: Apply PII Removal to All Resumes
cleaned_resume_texts = [remove_pii(resume) for resume in resume_texts if resume.strip()]

print(f"✅ Removed PII from {len(cleaned_resume_texts)} resumes!")

# 📌 Step 11: Save Cleaned Resumes to a File
with open(output_file, "w", encoding="utf-8") as f:
    f.write("\n\n".join(cleaned_resume_texts))

print(f"✅ Saved cleaned resumes into '{output_file}'!")


✅ Loaded 381 resumes from 'all_resumes.txt'!
✅ Removed PII from 371 resumes!
✅ Saved cleaned resumes into 'cleaned_resumes.txt'!


In [None]:
import re

# Reload cleaned resumes
with open("cleaned_resumes.txt", "r", encoding="utf-8") as f:
    cleaned_text = f.read()

# Define regex patterns
email_pattern = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
phone_pattern = r"\+?\d{1,3}[\s-]?\(?\d{2,4}\)?[\s-]?\d{3,4}[\s-]?\d{3,4}"

# Search for emails and phone numbers
emails_found = re.findall(email_pattern, cleaned_text)
phones_found = re.findall(phone_pattern, cleaned_text)

# Display results
if emails_found:
    print(f"⚠️ Found {len(emails_found)} remaining emails! Example: {emails_found[:3]}")
else:
    print("✅ No emails found!")

if phones_found:
    print(f"⚠️ Found {len(phones_found)} remaining phone numbers! Example: {phones_found[:3]}")
else:
    print("✅ No phone numbers found!")


✅ No emails found!
✅ No phone numbers found!


In [None]:
!pip install huggingface_hub



In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: write).
The token `xxx` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `xxx`


In [None]:
from transformers import AutoTokenizer

# ✅ Step 1: Load Tokenizer (with authentication)
tokenizer = AutoTokenizer.from_pretrained(
    "deepseek-ai/deepseek-llm-67b-base", # Changed to a public model
    token=True  # Uses your HF token from `huggingface-cli login`
)

# ✅ Step 2: Read Cleaned Resumes
with open("cleaned_resumes.txt", "r", encoding="utf-8") as f:
    cleaned_resume_texts = f.read().split("\n\n")

# ✅ Step 3: Tokenize (with truncation & padding)
tokenized_resumes = [
    tokenizer(
        resume,
        truncation=True,
        padding="max_length",
        max_length=512,
        return_tensors="pt"  # Returns PyTorch tensors (optional)
    )
    for resume in cleaned_resume_texts
]

print(f"✅ Tokenized {len(tokenized_resumes)} resumes successfully!")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/792 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/4.61M [00:00<?, ?B/s]

✅ Tokenized 371 resumes successfully!


In [None]:
!pip install faiss-cpu sentence-transformers


Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_6

In [None]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer


In [None]:
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
# Read again if not already in memory
with open("cleaned_resumes.txt", "r", encoding="utf-8") as f:
    cleaned_resume_texts = f.read().split("\n\n")

# Generate embeddings
embeddings = embedding_model.encode(cleaned_resume_texts, convert_to_numpy=True)
print(f"✅ Generated embeddings shape: {embeddings.shape}")


✅ Generated embeddings shape: (371, 384)


In [None]:
# Define dimensionality from embeddings
dimension = embeddings.shape[1]

# Create FAISS index
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

print(f"✅ Stored {index.ntotal} vectors in FAISS index")


✅ Stored 371 vectors in FAISS index


In [None]:
faiss.write_index(index, "resume_faiss.index")
index = faiss.read_index("resume_faiss.index")


In [None]:
query = "Consultant"
query_embedding = embedding_model.encode([query], convert_to_numpy=True)

# Search top 5 matches
D, I = index.search(query_embedding, k=5)

print("Top 5 most similar resumes:")
for idx in I[0]:
    print(f"\nResume {idx+1}:\n{cleaned_resume_texts[idx][:10000]}...")


Top 5 most similar resumes:

Resume 254:
Accenture (2011-2014)
Business Technology Group (Business Consultant-Business analyst-Project Management)...

Resume 106:

[NAME_REMOVED] [EMAIL_REMOVED]
[PHONE_REMOVED]
Technology Strategy Consultant, Strategy & Analytics
[NAME_REMOVED]
Software developer turned Tech Strategy Consultant with demonstrated experience in Architecture Transformation, User Experience Enhancement
and Tech Expenditure Rationalization. Assisted clients in the Retail and Technology sectors to transform their technology platforms to improve
operational metrics and product KPIs. Looking for product roles to envision, build and deliver innovative products in the Retail domain
PORTFOLIO WORK EXPERIENCE
Re-imagining Uber for Senior Consultant
Indian Parents Deloitte USI
Enhancing Restaurant 06/2022 - Present,
Discovery on [NAME_REMOVED] teams to enable Fortune 500 organizations architect technology strategy roadmaps to improve user experience and
development velocity, and re

In [None]:
user_input = input("Enter your CV points or describe your job profile: ")
query_embedding = embedding_model.encode([user_input], convert_to_numpy=True)
D, I = index.search(query_embedding, k=5)  # Retrieve top 10 matches

retrieved_cvs = [cleaned_resume_texts[idx] for idx in I[0]]

context_text = "\n\n".join(retrieved_cvs)
import requests
import json

from openai import OpenAI

client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key="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"
)

# Step 1: Extract and improve CV points
completion_1 = client.chat.completions.create(
    extra_headers={
        "HTTP-Referer": "<YOUR_SITE_URL>",
        "X-Title": "<YOUR_SITE_NAME>",
    },
    extra_body={},
    model="deepseek/deepseek-r1-zero:free",
    messages=[
        {"role": "system", "content": "You are a resume expert. Extract relevant CV points related to work experience, projects, and internships from the provided resumes. Display them as is, but mask numbers with 'xx' and company names with [Company] before displaying the final output. Do not display anything else like designation or duration—just the CV points' text."},
        {"role": "user", "content": f"Here are resumes for reference:\n\n{context_text}"}
    ]
)

improved_cv_points = completion_1.choices[0].message.content

# Step 2: Re-check for PII, company names, and absolute numbers
completion_2 = client.chat.completions.create(
    extra_headers={
        "HTTP-Referer": "<YOUR_SITE_URL>",
        "X-Title": "<YOUR_SITE_NAME>",
    },
    extra_body={},
    model="deepseek/deepseek-r1-zero:free",
    messages=[
        {"role": "system", "content": "You are a security expert. Review the following CV points and ensure all personal identifiable information (PII), company names, and absolute numbers are masked. Use 'xx' for numbers and replace company names with [Company]. Ensure complete redaction of sensitive data."},
        {"role": "user", "content": f"Here are the extracted CV points:\n\n{improved_cv_points}"}
    ]
)

final_cv_points = completion_2.choices[0].message.content

print("\n✅ Final CV Points (Sanitized):\n", final_cv_points)


Enter your CV points or describe your job profile: Product Intern, CEO Office | RagaAI | Bengaluru FEB 2024 – JULY 2024 Product Execution • Orchestrated the development of a RAG LLM evaluation platform (0-1) in just 2 weeks using Agile methodologies. • Led consumer research and created 10+ wireframes, collaborating with Engineering and Data Science teams to finalize the MVP. • Drafted and refined 30+ PRDs while owning the EASA AI Compliance check platform right from the research on regulations to launch. • Delivered 15+ new features on time by developing and managing a detailed product roadmap, ensuring 100% Quality Assurance. User Growth and Documentation • Drove 23%+ increase in user engagement by enhancing the public sandbox of RagaAI with 6+ industry-relevant ready-to-use cases • Authored a 20-page detailed whitepaper on RAG LLM evaluation techniques establishing key metrics for assessment

✅ Final CV Points (Sanitized):
 \boxed{```json
[
  {
    "Candidate Name": "Kandukuri [NAME_

In [None]:
pip install fastapi uvicorn


Collecting fastapi
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)
Collecting starlette<0.47.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.46.1-py3-none-any.whl.metadata (6.2 kB)
Downloading fastapi-0.115.12-py3-none-any.whl (95 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading uvicorn-0.34.0-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading starlette-0.46.1-py3-none-any.whl (71 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: uvicorn, starlette, fastapi
Successfully installed fastapi-0.115.12 starlette-0.46.1 uvicorn-0.34.0
