# Read occupation CSV

# Extract competences from "job_description" using an LLM

# Augment the CSV with a new column called "competences_llm" with a list of the competences.

In [7]:
import os
import pandas as pd
import openai
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain


In [38]:
csv_file = "infocomm_jsearch_jobposts/Machine%20Learning%20engineer.csv"
occ_df = pd.read_csv(csv_file)

In [39]:
occ_df.head()

Unnamed: 0,job_id,employer_name,employer_logo,employer_website,employer_company_type,job_publisher,job_employment_type,job_title,job_apply_link,job_apply_is_direct,...,job_salary_currency,job_salary_period,job_highlights,job_job_title,job_posting_language,job_onet_soc,job_onet_job_zone,job_naics_code,job_naics_name,job_occupational_categories
0,KaFxMMyEQpfuwHn1AAAAAA==,Nubank,https://upload.wikimedia.org/wikipedia/commons...,http://nubank.com.br,Finance,Greenhouse,FULLTIME,Machine Learning Engineer - Mexico,https://boards.greenhouse.io/nubank/jobs/5607752,False,...,,,{},Learning engineer,en,15111100,5,522291.0,Consumer Lending,
1,XkFWbAU9j1BjLWYmAAAAAA==,Autodesk,https://brand.autodesk.com/app/uploads/2021/04...,https://www.autodesk.com,Information,Autodesk Jobs,FULLTIME,"Machine Learning Engineer, Productivity Services",https://autodesk.dejobs.org/mexico-city-mex/ma...,False,...,,,{},Learning engineer,en,15111100,5,51121.0,Software Publishers,
2,IZsR7jXKEtLWKCgZAAAAAA==,Cognizant Technology Solutions,https://news.cognizant.com/images/COG-Logo.svg,http://www.cognizant.com,Computer Services,Cognizant,FULLTIME,Machine Learning Engineer,https://careers.cognizant.com/professionals/gl...,False,...,,,{},Learning engineer,en,15111100,5,541511.0,Custom Computer Programming Services,['Technology & Engineering']
3,XcThS2EI7tOFhVbdAAAAAA==,Chubb,https://cdn.cookielaw.org/logos/90614a50-71bc-...,,Finance,Indeed,FULLTIME,Machine Learning Engineer,https://mx.indeed.com/viewjob?jk=8824d287e7650981,False,...,,,{},Learning engineer,en,15111100,5,524126.0,Direct Property and Casualty Insurance Carriers,
4,BDPcoj-BfiIkLkoWAAAAAA==,ALTEN MÉXICO,https://www.alten.com/wp-content/uploads/2023/...,https://www.alten.fr,Computer Services,Jobs By Workable,FULLTIME,Data Science & Machine Learning Engineer - SR,https://apply.workable.com/alten-mexico-1/j/6D...,True,...,,,{},Learning engineer,en,15111100,5,541512.0,Computer Systems Design Services,


In [40]:
occ_df.shape

(65, 41)

In [8]:
#OPEN AI KEY

os.environ["OPENAI_API_KEY"] = "use-your-openai-api-key"


In [9]:
OPENAI_API_KEY: str = os.environ.get("OPENAI_API_KEY")

In [42]:
competences = []

llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature = 0)
# TODO: Try other LLMS

# RAG
#Add output key to it

template = """ From the given document: {document}
Question: {question}"""
prompt = PromptTemplate(template=template, input_variables=["question", "document"])
llm_chain = LLMChain(prompt=prompt, llm=llm)

for x in occ_df["job_description"]:
    if len(str(x)) > 3:
        question = "List all the competences, skills, abilities and knowledge found in the text."
        #clean x
        clean_x = x.strip()
        output_competences = llm_chain.run(question = question, document = clean_x)
        competences.append(output_competences)    
    else:
        print("Job description not found")
        competences.append("Job description not found")

BadRequestError: Error code: 400 - {'error': {'message': "Sorry! We've encountered an issue with repetitive patterns in your prompt. Please try again with a different prompt.", 'type': 'invalid_request_error', 'param': 'prompt', 'code': 'invalid_prompt'}}

In [30]:
competences

['\n\n1. Mobile software engineering\n2. Kotlin\n3. Swift\n4. Scripting\n5. Programming\n6. Agile team environment\n7. Android and iOS native app development\n8. Software Development Lifecycle\n9. Test-driven development\n10. Computer Science\n11. Engineering\n12. Mobile standards and protocols\n13. Commercial software development best practices\n14. Inclusiveness\n15. Diversity\n16. Equity\n17. Inclusion\n18. Diversity, Equity, and Inclusion (DEI)\n19. Customer representation\n20. Community building\n21. Communication skills\n22. Collaboration\n23. Problem-solving\n24. Time management\n25. Adaptability\n26. Attention to detail\n27. Creativity\n28. Critical thinking\n29. Teamwork\n30. Professionalism\n31. Project management\n32. Leadership\n33. Interpersonal skills\n34. Cultural sensitivity\n35. Global impact\n36. Personal and professional growth\n37. Test automation\n38. Unit testing\n39. UI testing\n40. Bug fixing\n41. Client-side application development\n42. Familiarity with mobile 

In [32]:
occ_df['competences_llm'] = competences

In [33]:
occ_df.head()

Unnamed: 0,job_id,employer_name,employer_logo,employer_website,employer_company_type,job_publisher,job_employment_type,job_title,job_apply_link,job_apply_is_direct,...,job_salary_period,job_highlights,job_job_title,job_posting_language,job_onet_soc,job_onet_job_zone,job_naics_code,job_naics_name,job_occupational_categories,competences_llm
0,994AHznEilU8R-Z7AAAAAA==,Trimble,https://investor.trimble.com/files/design/svg/...,http://www.trimble.com,Manufacturing,Jobs At Trimble,FULLTIME,Mobile Software Engineering Intern,https://trimblecareers.trimble.com/careers/job...,False,...,,{},Software engineering,en,15113200,4,33429,Other Communications Equipment Manufacturing,,\n\n1. Mobile software engineering\n2. Kotlin\...


In [34]:
occ_df.shape

(1, 42)

In [35]:
occ_df['competences_llm']

0    \n\n1. Mobile software engineering\n2. Kotlin\...
Name: competences_llm, dtype: object

In [36]:
occ_df.to_csv("infocomm_jsearch_jobposts/Machine%20Learning%20engineer_competences_llm.csv")