In [54]:
import json
from utils import *
import re
import numpy as np

In [72]:
def create_resume(resume_json):
    """Re-construct the textual resume from the provided JSON data."""
    
    def flatten_and_convert(data):
        """Flatten and convert data to string."""
        if isinstance(data, list):
            return ' '.join(map(str, data))
        return str(data)

    resume_text = []

    # Basic Info
    basic_info = resume_json.get('Basic_Info', {})
    title = flatten_and_convert(basic_info.get('Current_Title', ''))
    company = flatten_and_convert(basic_info.get('Current_Company', ''))
    resume_text.append(title + " at " + company)
    resume_text.append(flatten_and_convert(basic_info.get('Location', '')))
    bio = flatten_and_convert(basic_info.get('Bio', ''))
    if bio:
        resume_text.append(bio)
    resume_text.append("\nExperience:")

    # Experience
    i = 0
    for exp in resume_json.get('Experience', []):
        job_title = flatten_and_convert(exp.get('Job_Title', ''))
        company = flatten_and_convert(exp.get('Company', ''))
        start_date = flatten_and_convert(exp.get('Start_Date', ''))
        end_date = flatten_and_convert(exp.get('End_Date', ''))
        responsibilities = flatten_and_convert(exp.get('Responsibilities', ''))
        resume_text.append(job_title + " at " + company + " (" + start_date + " - " + end_date + ")")
        resume_text.append(responsibilities)


    # Education
    resume_text.append("\nEducation:")
    for edu in resume_json.get('Education', []):
        degree = flatten_and_convert(edu.get('Degree', ''))
        field = flatten_and_convert(edu.get('Field', ''))
        institution = flatten_and_convert(edu.get('Institution', ''))
        resume_text.append(degree + " in " + field + " from " + institution)

    # Skills
    skills = flatten_and_convert(resume_json.get('Skills', []))
    if skills:
        resume_text.append("\nSkills:")
        resume_text.append(skills)

    # Links
    links = flatten_and_convert(resume_json.get('Links', []))
    if links:
        resume_text.append("\nLinks:")
        resume_text.append(links)
        
    # certifications
    certifications = flatten_and_convert(resume_json.get('Certifications', []))
    if certifications:
        
        resume_text.append("\nCertifications:")
        resume_text.append(certifications)
    # Additional Information
    additional_info = flatten_and_convert(resume_json.get('Additional_Information', ''))
    
    if additional_info:
        resume_text.append("\nAdditional Information:")
        resume_text.append(additional_info)

    # Joining all the parts to form a single string
    resume_formatted = "\n".join(resume_text)
    
    return resume_formatted

def printRes(resName, raw = False, to = -1):
    if "txt" not in resName:
        resName += ".txt"
    res = getCleanResume(resName).replace("?", "").replace("-", "").replace("  ", " ")[:to]
    # get rid of any non-aplhanumeric characters
    res = re.sub(r"[^a-zA-Z0-9]+", ' ', res) if not raw else res
    print(res)
    print()
    if "skills" in res.lower():
        print("Skills")
        print()
        # find last occurence of skills
        print(res[res.lower().find("skills"):])
        #print(res[res.lower().find("skill"):])
    else:
        print("No skills")
        
    if "education" in res.lower():
        print("Education")
        print()
        print(res[res.lower().find("education"):])
    else: 
        print("No education")

In [None]:
[llama2[keys[k]][EXPERIENCE][i][RESPONSIBILITIES] for k in range(len(keys)) for i in range(len(llama2[keys[k]][EXPERIENCE])) if RESPONSIBILITIES in llama2[keys[k]][EXPERIENCE][i] ]

In [20]:
create_resume(gpt4['20088.txt'])

'Business Intelligence/.NET Developer at Electronic Recyclers International\nHanford, CA\nBusiness Intelligence/.NET Developer Software Engineer authorized to work in the US for any employer.\n\nExperience:\nBusiness Intelligence/.NET Developer at Electronic Recyclers International (April 2019 - Present)\nI utilize Tableau, Tableau Online, Microsoft Azure Data Factory, SQL and SSIS to create and maintain data pipelines that help our executive staff make informed business decisions. In addition to my duties as a .NET developer, I was picked to take over duties of the BI Developer role as well. Every day I work directly with the executive staff to improve our business model.\nApplication Developer at Electronic Recyclers International (November 2018 - Present)\nAs a Developer at ERI, my responsibilities include contributing to an extremely diverse code-base, participating in daily standup meetings, improving business practices through software, providing support to employees and more. Ev

In [21]:
# with open("gpt4_parsing.json", "r") as f:
#     gpt4 = json.load(f)
    
# with open("new_baseline_comparison.json", "r") as f:
#     new_baseline = json.load(f)
    
keys = list(gpt4.keys())
#raw_resumes = [getCleanResume(gpt4[key]) for key in keys]

for key in keys:
    #print(key)
    joint_res = new_baseline['baseline'][key]
    res = getCleanResume(key)
    is_same = True
    length_diff = len(re.sub(r"[^a-zA-Z0-9]+", " ", create_resume(joint_res)).split()) - len(re.sub(r"[^a-zA-Z0-9]+", " ", res).split())
    for exp in joint_res['Experience']:
        res_gpt = exp['Responsibilities']
        res_gpt = re.sub(r"[^a-zA-Z0-9]+", '', res_gpt)

        is_same = is_same and (res_gpt in re.sub(r"[^a-zA-Z0-9]+", '', res)) 
    bio = joint_res['Basic_Info']['Bio']
    is_same = is_same and (re.sub(r"[^a-zA-Z0-9]+", '', bio) in re.sub(r"[^a-zA-Z0-9]+", '', res))
    
    # new_baseline['baseline'][key] = gpt4[key]
    # if not is_same:   
    #     print(key, is_same)
    if abs(length_diff) > 100:
        print(key, length_diff)
        
    # if abs(length_diff) > 100 and not is_same:
    #     print(key, length_diff)
    
    

13015.txt -117
21699.txt -306
04981.txt -141
20088.txt -102
07455.txt -103
16515.txt -143
22041.txt -104
26438.txt -289
11685.txt -115
03114.txt -162
02466.txt -295


In [36]:
len(new_baseline['baseline'])

118

In [76]:
# with open("new_baseline_comparison.json", "w") as f:
#     json.dump(new_baseline, f, indent=4)

In [31]:
re.sub(r"[^a-zA-Z0-9]+", " ", create_resume(joint_res))

' NET Web Developer at DataConcepts Richmond VA Authorized to work in the US for any employer Experience NET Web Developer at DataConcepts October 2017 Present Designed and developed an Online Open Enrollment System for the State of Virginia Department of Human Resources Created AngularJS SPA in an agile environment using Team Foundation Server QA as well as modifying UI to comply with ADA standards C and Javascript debugging and enhancing HTML5 CSS3 Bootstrap Rest API Services Javascript JQuery AngularJS ASP NET MVC C SQL 2014 Selenium EF6 Sr Front End Developer at Tahzoo LLC December 2016 July 2017 Assisted with POC modifications and an internal employee mapping web site within an Agile environment Trained with MarkLogic Data Hub and Cloud Service Achieving MarkLogic certification Entity Framework 6 Model First Javascript RAZOR modifications Front End Developer at Shockoe LLC July 2016 August 2016 Team member in training to develop a responsive web application for Capital One utilizi

In [30]:
re.sub(r"[^a-zA-Z0-9]+", " ", re.sub(r"\([^)]*\)", "", res))

' NET Web Developer NET Web Developer NET Web Developer DataConcepts Richmond VA Authorized to work in the US for any employer Work Experience NET Web Developer DataConcepts Richmond VA October 2017 to Present Designed and developed an Online Open Enrollment System for the State of Virginia Department of Human Resources Created AngularJS SPA in an agile environment using Team Foundation Server QA as well as modifying UI to comply with ADA standards C and Javascript debugging and enhancing HTML5 CSS3 Bootstrap Rest API Services Javascript JQuery AngularJS ASP NET MVC C SQL 2014 Selenium EF6 Sr Front End Developer Tahzoo LLC Richmond VA December 2016 to July 2017 Assisted with POC modifications and an internal employee mapping web site within an Agile environment Trained with MarkLogic Data Hub and Cloud Service Achieving MarkLogic certification Entity Framework 6 Javascript RAZOR modifications Front End Developer Shockoe LLC Richmond VA July 2016 to August 2016 Team member in training t


04981.txt -141
20088.txt -102
07455.txt -103
16515.txt -143
22041.txt -104
11685.txt -115
03114.txt -162

In [178]:
len(re.sub(r"[^a-zA-Z0-9]+", " ", create_resume(joint_res)).split())

387

In [32]:
res = getCleanResume("11685.txt")
joint_res = gpt4["11685.txt"]
bio = joint_res['Basic_Info']['Bio']
print(f'length difference: {len(re.sub(r"[^a-zA-Z0-9]+", " ", create_resume(joint_res)).split()) - len(re.sub(r"[^a-zA-Z0-9]+", " ", res).split())}')
print("Bio")
if not re.sub(r"[^a-zA-Z0-9]+", '', bio) in re.sub(r"[^a-zA-Z0-9]+", '', res):
    print(bio)
    print()
    
print("Experience")
for exp in joint_res['Experience']:
    res_gpt = exp['Responsibilities']
    #res_gpt = re.sub(r"[^a-zA-Z0-9]+", ' ', res_gpt)
    if not re.sub(r"[^a-zA-Z0-9]+", '', res_gpt) in re.sub(r"[^a-zA-Z0-9]+", '', res):
        print(res_gpt)
        print()
        


length difference: -115
Bio
Experience


In [100]:
# choose 30 random keys from new_baseline without replacement
chosen = iter(np.random.choice(list(new_baseline['baseline'].keys())[20:], 30, replace=False))

In [102]:
printRes(next(chosen), raw=True, to=400)

Current Student Current Student Orange, CA Work Experience Current Student Myself Orange County, CA December 2015 to Present Currently (and since December 2015, combined with my last job) I find myself in the selftraining to be a frontend developer. Performing courses and training in Ionic, Angular, Nodejs, MongoDB, HTML5, CSS3, WordPress, JavaScript, TypeScript, etc. Computer Technician Computado

No skills
No education


In [42]:
print(chosen[3])

TypeError: 'list_iterator' object is not subscriptable