In [None]:
import re
import json
import PyPDF2

def parse_resume(resume_text):
    resume_data = {
        "Name": "",
        "Contact Information": {
            "Email": "",
            "Phone": "",
            "LinkedIn": "",
            "GitHub": ""
        },
        "Education": [],
        "Experience": [],
        "Projects": [],
        "Skills": {
            "Languages": [],
            "Developer Tools": [],
            "Libraries": []
        }
    }

    # Extract Name (Assuming it's the first line)
    lines = resume_text.split('\n')
    resume_data["Name"] = lines[0].strip()

    # Extract Contact Information
    contact_info = re.findall(r'[\w\.-]+@[\w\.-]+', resume_text)
    if contact_info:
        resume_data["Contact Information"]["Email"] = contact_info[0]

    phone_number = re.findall(r'\+?\d[\d -]{8,12}\d', resume_text)
    if phone_number:
        resume_data["Contact Information"]["Phone"] = phone_number[0]

    linkedin = re.findall(r'(https?://[^\s]+linkedin[^\s]+)', resume_text)
    if linkedin:
        resume_data["Contact Information"]["LinkedIn"] = linkedin[0]

    github = re.findall(r'(https?://[^\s]+github[^\s]+)', resume_text)
    if github:
        resume_data["Contact Information"]["GitHub"] = github[0]

    # Extract Sections: Education, Experience, Projects, Skills
    education_section = re.search(r'Education\n(.*?)\n\n', resume_text, re.DOTALL)
    if education_section:
        education_text = education_section.group(1)
        education_entries = education_text.split('\n\n')
        for entry in education_entries:
            degree = re.search(r'(.*?),', entry)
            institution = re.search(r'at (.*)', entry)
            location = re.search(r'\((.*?)\)', entry)
            date_range = re.findall(r'\d{4}', entry)
            if degree and institution and date_range:
                resume_data["Education"].append({
                    "Degree": degree.group(1).strip(),
                    "Institution": institution.group(1).strip(),
                    "Location": location.group(1).strip() if location else "",
                    "Start Date": date_range[0].strip() if len(date_range) > 0 else "",
                    "End Date": date_range[1].strip() if len(date_range) > 1 else ""
                })

    experience_section = re.search(r'Experience\n(.*?)\n\n', resume_text, re.DOTALL)
    if experience_section:
        experience_text = experience_section.group(1)
        experience_entries = experience_text.split('\n\n')
        for entry in experience_entries:
            job_title = re.search(r'(.*?),', entry)
            company = re.search(r'at (.*)', entry)
            location = re.search(r'\((.*?)\)', entry)
            date_range = re.findall(r'\d{4}', entry)
            responsibilities = entry.split('•')[1:]
            if job_title and company and date_range:
                resume_data["Experience"].append({
                    "Job Title": job_title.group(1).strip(),
                    "Company": company.group(1).strip(),
                    "Location": location.group(1).strip() if location else "",
                    "Start Date": date_range[0].strip() if len(date_range) > 0 else "",
                    "End Date": date_range[1].strip() if len(date_range) > 1 else "",
                    "Responsibilities": [resp.strip() for resp in responsibilities]
                })

    project_section = re.search(r'Projects\n(.*?)\n\n', resume_text, re.DOTALL)
    if project_section:
        project_text = project_section.group(1)
        project_entries = project_text.split('\n\n')
        for entry in project_entries:
            project_name = re.search(r'(.*?)\|', entry)
            description = entry.split('•')[1:]
            if project_name:
                resume_data["Projects"].append({
                    "Project Name": project_name.group(1).strip(),
                    "Description": ' '.join(description).strip(),
                    "Technologies Used": [],
                    "Date": ""
                })

    skills_section = re.search(r'Skills\n(.*?)\n\n', resume_text, re.DOTALL)
    if skills_section:
        skills_text = skills_section.group(1)
        languages = re.search(r'Languages:\s*(.*)', skills_text)
        developer_tools = re.search(r'Developer Tools:\s*(.*)', skills_text)
        libraries = re.search(r'Libraries:\s*(.*)', skills_text)
        if languages:
            resume_data["Skills"]["Languages"] = [lang.strip() for lang in languages.group(1).split(',')]
        if developer_tools:
            resume_data["Skills"]["Developer Tools"] = [tool.strip() for tool in developer_tools.group(1).split(',')]
        if libraries:
            resume_data["Skills"]["Libraries"] = [lib.strip() for lib in libraries.group(1).split(',')]

    return json.dumps(resume_data, indent=4)

# Example usage
resume_text = """
SARTHAK AGARWAL
+91 7906557429 | sarthakaggarwal584@gmail.com | linkedin.com/in/sarthak | github.com/sarthak

Education
SRM Institute Of Science and Technology at Chennai, Tamil Nadu (2021 – 2025)
Bachelor of Technology, Computer Science

Dewan Public School at Meerut, Uttar Pradesh (2019 – 2021)
Higher secondary

Experience
Machine Learning Intern at Bornbhukkad, Hyderabad, Telangana (April 2024 – Present)
• Specialized in Selenium and WebDriver for data acquisition, improving data gathering efficiency by 40%.
• Developed models and conducted Exploratory Data Analysis (EDA) to uncover valuable insights, resulting in a 25% increase in decision-making accuracy.
• Implemented advanced automation methods, boosting data gathering accuracy by 30%.

Undergraduate Research Assistant at National University of Singapore (NUS), Kent Ridge, Singapore (Nov 2023 – Feb 2024)
• Collaborated with Haiyue Zhu in Unity development, creating immersive interactive experiences and increasing user engagement by 20%.
• Integrated ROS for advanced robotic intelligence, enhancing communication and performance by 35%.
• Optimized robotic processes with RoboDK, achieving 15% greater precision and efficiency.

Network Analyst at Audio Bridge, Noida, Uttar Pradesh (June 2023 – July 2023)
• Monitored and troubleshot network issues, ensuring 99.9% system uptime.
• Configured and maintained network systems, improving operational performance by 20%.
• Documented and reported on network operations, facilitating streamlined processes and informed decision-making.

Projects
KYC Online Verification | Flask, Bootstrap, Deepface, OpenCV, pytesseract (Mar 2024)
• Engineered a Flask backend for efficient and secure KYC online verification processes, reducing verification time by 40% and enhancing security measures by 30%.
• Designed a responsive user interface using Bootstrap, enhancing user experience by 25%.
• Leveraged Supabase for scalable database storage, ensuring seamless data management and improving data retrieval efficiency by 30%.

Stock Price Prediction | Python, TensorFlow, Keras, Scikit-learn, Jupyter Notebook (Jan 2024)
• Developed 'StockPredictor' using LSTM for stock price forecasting with a 10% error rate.
• Integrated TensorFlow and Scikit-learn algorithms, achieving a 95% prediction accuracy.
• Used Jupyter Notebook for model testing and performance visualization, enhancing analysis clarity and reducing debugging time by 25%.

Restaurant Sentiment Analysis | Python, Random Forest Model, NLP (Nov 2023)
• Utilized Random Forest algorithm for effective sentiment classification, achieving 90% accuracy.
• Applied NLP techniques to extract features from textual data, resulting in a 20% enhancement of customer insights.

Skills
Languages: Python, C/C++, SQL, HTML, CSS
Developer Tools: Git, Docker, VS Code, Ubuntu, Visual Studio, Jupyter Notebook, Anaconda
Libraries: PyTorch, TensorFlow, Keras, scikit-learn, NumPy, pandas, OpenCV, Selenium, BeautifulSoup, Scrapy
"""

parsed_resume = parse_resume(resume_text)
print(parsed_resume)


{
    "Name": "",
    "Contact Information": {
        "Email": "sarthakaggarwal584@gmail.com",
        "Phone": "+91 7906557429",
        "LinkedIn": "",
        "GitHub": ""
    },
    "Education": [
        {
            "Degree": "SRM Institute Of Science and Technology at Chennai",
            "Institution": "Chennai, Tamil Nadu (2021 \u2013 2025)",
            "Location": "2021 \u2013 2025",
            "Start Date": "2021",
            "End Date": "2025"
        }
    ],
    "Experience": [
        {
            "Job Title": "Machine Learning Intern at Bornbhukkad",
            "Company": "Bornbhukkad, Hyderabad, Telangana (April 2024 \u2013 Present)",
            "Location": "April 2024 \u2013 Present",
            "Start Date": "2024",
            "End Date": "",
            "Responsibilities": [
                "Specialized in Selenium and WebDriver for data acquisition, improving data gathering efficiency by 40%.",
                "Developed models and conducted Exploratory 