In [1]:
!pip install requests pandas beautifulsoup4 openai



In [2]:
# Import relevant packages
import pandas as pd
import requests
import random
import re
from bs4 import BeautifulSoup
from OpenAI.openai_handler import OpenAIHandler

In [3]:
file_name = "LinkedIn_Jobs_Data_Scientist_Monterrey_2024-09-04_clean.csv"
df_jobs = pd.read_csv(file_name)
print(df_jobs.head())

    Location                                        Title           Company  \
0  Monterrey                           Jr. Data Scientist  Arca Continental   
1  Monterrey  ML Engineer (Engineer Software Development)            NEORIS   
2  Monterrey                                  AI Engineer            NEORIS   
3  Monterrey                               Data Scientist             Chubb   
4  Monterrey               AI/ML and MLOps Field Engineer         Canonical   

                                                 Url       JobID      Category  
0  https://mx.linkedin.com/jobs/view/jr-data-scie...  4002846143  Data Science  
1  https://mx.linkedin.com/jobs/view/ml-engineer-...  4002146229  Data Science  
2  https://mx.linkedin.com/jobs/view/ai-engineer-...  3984233060         AI/ML  
3  https://mx.linkedin.com/jobs/view/data-scienti...  3987318831  Data Science  
4  https://mx.linkedin.com/jobs/view/ai-ml-and-ml...  4013780012  Data Science  


In [4]:
df_jobs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119 entries, 0 to 118
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Location  119 non-null    object
 1   Title     119 non-null    object
 2   Company   119 non-null    object
 3   Url       119 non-null    object
 4   JobID     119 non-null    int64 
 5   Category  119 non-null    object
dtypes: int64(1), object(5)
memory usage: 5.7+ KB


In [5]:
def get_random_user_agent():

    headers = [
        {'User-Agent': 'Mozilla/5.0'},
        {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36'},
        {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Mobile Safari/537.36'},
        {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Mobile Safari/537.36'},
        {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36'}
    ]

    selected_header = random.choice(headers)
    return selected_header

In [6]:
def fetch_jobs_until_success(url):
    got_200 = False
    while not got_200:
        response = requests.get(url, headers=get_random_user_agent())
        got_200 = response.status_code == 200
    return response

In [7]:
def get_jobid_information(jobid):
   
    # Base URL for LinkedIn job search
    base_url = 'https://www.linkedin.com/jobs-guest/jobs/api/jobPosting/'
    
    url_search = base_url + jobid
    
    return url_search

In [9]:
jobID = str(df_jobs['JobID'][0])
target_url = get_jobid_information(jobID)
target_url

'https://www.linkedin.com/jobs-guest/jobs/api/jobPosting/4002846143'

In [10]:
print(F"Get job description for {df_jobs.shape[0]} postings")
extracted_data = []
for i in range(0, df_jobs.shape[0]):
    jobID = str(df_jobs['JobID'][i])
    target_url = get_jobid_information(jobID)
    response = fetch_jobs_until_success(target_url)
    
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find the criteria list first
    criteria_list = soup.find('ul', class_='description__job-criteria-list')

    # Initialize values as 'N/A'
    seniority_level = 'N/A'
    employment_type = 'N/A'
    job_function = 'N/A'
    industries = 'N/A'

    if criteria_list:
        criteria_items = criteria_list.find_all('li', class_='description__job-criteria-item')
        for item in criteria_items:
            # Check for Seniority level
            if 'Seniority level' in item.get_text():
                seniority_level = item.find('span', class_='description__job-criteria-text').get_text(strip=True)
            # Check for Employment type
            elif 'Employment type' in item.get_text():
                employment_type = item.find('span', class_='description__job-criteria-text').get_text(strip=True)
            # Check for Job function
            elif 'Job function' in item.get_text():
                job_function = item.find('span', class_='description__job-criteria-text').get_text(strip=True)
            # Check for Industries
            elif 'Industries' in item.get_text():
                industries = item.find('span', class_='description__job-criteria-text').get_text(strip=True)

    # Extract number of applicants (with multiple class check)
    num_applicants = 'N/A'
    num_applicants_tag = soup.find('figcaption', class_='num-applicants__caption') or \
                         soup.find('span', class_='num-applicants__caption topcard__flavor--metadata topcard__flavor--bullet')
    if num_applicants_tag:
        num_applicants = num_applicants_tag.get_text(strip=True)


    # Extract posted time
    posted_time = soup.find('span', class_='posted-time-ago__text')
    posted_time = posted_time.get_text(strip=True) if posted_time else 'N/A'

    # Extract job description text
    description_tag = soup.find('div', class_='show-more-less-html__markup')
    description = description_tag.get_text(separator=' ', strip=True) if description_tag else 'N/A'

    # Append the data to the list
    extracted_data.append({
        'SeniorityLevel': seniority_level,
        'EmploymentType': employment_type,
        'JobFunction': job_function,
        'Industries': industries,
        'PostedTime': posted_time,
        'NumApplicants': num_applicants,
        'Description': description
    })

# Convert the extracted data into a DataFrame
extracted_df = pd.DataFrame(extracted_data)

# Combine with the original dataframe
df_jobs = pd.concat([df_jobs, extracted_df], axis=1)
print("Done!")

Get job description for 119 postings
Done!


In [12]:
# Convert the 'Category' column to a categorical data type
categories = ['AI/ML', 'Data Science', 'Data Engineering', 'Data Analysis']
df_jobs['Category'] = pd.Categorical(df_jobs['Category'], categories=categories)
df_jobs.rename(columns={'Category': 'JobCategory'}, inplace=True)

In [14]:
df_jobs['SeniorityLevel'] = df_jobs['SeniorityLevel'].apply(lambda x: 'N/A' if 'Not Applicable' in x else x)
categories = ['Entry level', 'Mid-Senior level', 'Executive', 'N/A', 'Associate',
       'Internship']
df_jobs['SeniorityLevel'] = pd.Categorical(df_jobs['SeniorityLevel'], categories=categories)

In [15]:
df_jobs['EmploymentType'] = pd.Categorical(df_jobs['EmploymentType'], categories=df_jobs['EmploymentType'].unique())

In [16]:
# Clean and standardize the number of applicants
def extract_num_applicants(text):
    match = re.search(r'\d+', text)
    if match:
        return int(match.group())
    elif "Be among the first 25" in text:
        return 25
    elif "Over 200 applicants" in text:
        return 200
    else:
        return 'N/A'

df_jobs['NumApplicants'] = df_jobs['NumApplicants'].apply(extract_num_applicants)

In [17]:
df_jobs['JobFunction'] = df_jobs['JobFunction'].replace({
    'Research and Design': 'R&D',
    'Design and Product Management': 'Product Management'
})

# Standardize job functions
def standardize_job_function(text):
    # Replace 'and' with commas for two-element values
    if ' and ' in text:
        text = text.replace(' and ', ', ')
    
    # Split by commas and limit to the first 3 elements
    job_functions = text.split(', ')
    
    # If more than 3 functions, keep only the first three
    if len(job_functions) > 3:
        job_functions = job_functions[:3]
    
    # Join back the elements with commas
    return ', '.join(job_functions)

# Apply the function to standardize the JobFunction column
df_jobs['JobFunction'] = df_jobs['JobFunction'].apply(standardize_job_function)

In [18]:
def split_job_functions(text):
    # Split the job functions
    job_functions = text.split(', ')
    
    # Return the first 3 job functions, or None if not available
    job_function_1 = job_functions[0] if len(job_functions) > 0 else None
    job_function_2 = job_functions[1] if len(job_functions) > 1 else None
    job_function_3 = job_functions[2] if len(job_functions) > 2 else None
    
    return pd.Series([job_function_1, job_function_2, job_function_3])

# Apply the splitting function and assign new columns
df_jobs[['JobFunction1', 'JobFunction2', 'JobFunction3']] = df_jobs['JobFunction'].apply(split_job_functions)

In [19]:
# Replace the None values with 'N/A' by re-assigning the columns
df_jobs['JobFunction1'] = df_jobs['JobFunction1'].fillna('N/A')
df_jobs['JobFunction2'] = df_jobs['JobFunction2'].fillna('N/A')
df_jobs['JobFunction3'] = df_jobs['JobFunction3'].fillna('N/A')

# Extract unique values from each JobFunction column, excluding None values
job_function_categories = list(set(
    df_jobs['JobFunction1'].unique().tolist() + 
    df_jobs['JobFunction2'].unique().tolist() + 
    df_jobs['JobFunction3'].unique().tolist()))

# Remove any None from the category list (just in case)
job_function_categories = [x for x in job_function_categories if x is not None]

# Convert JobFunction1, JobFunction2, JobFunction3 to categorical data types using the merged categories
df_jobs['JobFunction1'] = pd.Categorical(df_jobs['JobFunction1'], categories=job_function_categories)
df_jobs['JobFunction2'] = pd.Categorical(df_jobs['JobFunction2'], categories=job_function_categories)
df_jobs['JobFunction3'] = pd.Categorical(df_jobs['JobFunction3'], categories=job_function_categories)

# Drop the 'JobFunction' column if it's not needed
df_jobs.drop(columns=['JobFunction'], inplace=True)

In [20]:
def convert_posted_time(text):
    if 'hour' in text:
        return 0
    
    if 'day' in text:
        days = int(re.search(r'\d+', text).group()) if re.search(r'\d+', text) else 1  # Default to 1 if no number
        return days
    
    elif 'week' in text:
        weeks = int(re.search(r'\d+', text).group()) if re.search(r'\d+', text) else 1  # Default to 1 if no number
        return weeks * 7
    
    elif 'month' in text:
        months = int(re.search(r'\d+', text).group()) if re.search(r'\d+', text) else 1  # Default to 1 if no number
        return months * 30
    
    return text

df_jobs['PostedTime'] = df_jobs['PostedTime'].apply(convert_posted_time)

df_jobs.rename(columns={'PostedTime': 'DaysSincePosted'}, inplace=True)

In [21]:
new_column_order = [
    'Title', 'Company', 'Location', 'JobID', 'JobCategory', 
    'SeniorityLevel', 'EmploymentType', 'Industries', 
    'DaysSincePosted', 'NumApplicants', 
    'JobFunction1', 'JobFunction2', 'JobFunction3', 
    'Description', 'Url'
]
df_jobs = df_jobs[new_column_order]

In [22]:
df_jobs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119 entries, 0 to 118
Data columns (total 15 columns):
 #   Column           Non-Null Count  Dtype   
---  ------           --------------  -----   
 0   Title            119 non-null    object  
 1   Company          119 non-null    object  
 2   Location         119 non-null    object  
 3   JobID            119 non-null    int64   
 4   JobCategory      119 non-null    category
 5   SeniorityLevel   119 non-null    category
 6   EmploymentType   119 non-null    category
 7   Industries       119 non-null    object  
 8   DaysSincePosted  119 non-null    int64   
 9   NumApplicants    119 non-null    int64   
 10  JobFunction1     119 non-null    category
 11  JobFunction2     119 non-null    category
 12  JobFunction3     119 non-null    category
 13  Description      119 non-null    object  
 14  Url              119 non-null    object  
dtypes: category(6), int64(3), object(6)
memory usage: 12.0+ KB


In [24]:
openai_handler = OpenAIHandler()
for index, row in df_jobs.iterrows():
    description = row['Description']
    
    messages = openai_handler.create_messages(description)
    
    response = openai_handler.generate_chat_completion(messages)
        
    # Add the parsed JSON fields into the DataFrame as new columns
    df_jobs.at[index, 'Workscheme'] = response.get('Workscheme', 'N/A')
    df_jobs.at[index, 'ShortDescription'] = response.get('Description', 'N/A')
    df_jobs.at[index, 'TechStack'] = ', '.join(response.get('TechStack', []))
    df_jobs.at[index, 'YoE'] = response.get('YoE', 'N/A')
    df_jobs.at[index, 'MinLevelStudies'] = response.get('MinLevelStudies', 'N/A')
    df_jobs.at[index, 'English'] = response.get('English', 'N/A')

Initializing OpenAI Handler
Configuring OpenAI Client
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTT

In [25]:
workscheme_mapping = {
    'On-site': ['On-site', 'Onsite'],
    'Remote': ['Remote', 'Fully Remote', 'Fully remote', 'Full-time remote', 'Remote with travel 2 to 4 weeks for events', 
               'Remote with global travel', 'Flexible/Remote', 'part-time, flexible work-from-home', 
               'Part-time, work from home', 'Flexible'],
    'Hybrid': ['Hybrid', 'Hybrid / Remote', 'Full-time, hybrid', 'Hybrid (Mon-Thur onsite, Fri remote)', 
               'Hybrid', 'In-person or hybrid', 'In-office (Tuesdays and Wednesdays)']
}
reverse_mapping = {value: key for key, values in workscheme_mapping.items() for value in values}
df_jobs['Workscheme'] = df_jobs['Workscheme'].map(lambda x: reverse_mapping.get(x, 'N/A'))

In [26]:
def extract_min_years(experience_str):
    # Ensure experience_str is a string
    experience_str = str(experience_str)
    
    # Handle 'N/A' and non-numeric cases
    if 'N/A' in experience_str or 'Professional software development experience required' in experience_str:
        return 'N/A'
    
    # Find all numeric values in the string
    numbers = re.findall(r'\d+', experience_str)
    
    # If no numbers found, return 'N/A'
    if not numbers:
        return 'N/A'
    
    # Convert found numbers to integers and return the minimum
    return min(map(int, numbers))

# Apply the function to the 'YoE' column and create a new 'MinYoE' column
df_jobs['MinYoE'] = df_jobs['YoE'].apply(extract_min_years)

In [27]:
def categorize_studies(level):
    level = level.lower()
    if any(keyword in level for keyword in ["student", "undergraduate"]):
        return "Undergraduate Student"
    elif any(keyword in level for keyword in ["bachelor", "bs", "b.sc", "bachelor's"]):
        return "Bachelor"
    elif any(keyword in level for keyword in ["master", "ms", "m.sc", "master's"]):
        return "Masters"
    elif "phd" in level:
        return "PhD"
    else:
        return "N/A"
df_jobs['MinLevelStudies'] = df_jobs['MinLevelStudies'].apply(categorize_studies)

In [28]:
tech_stack_categories = {
    'Agile Methodologies': [
        'Scrum', 'SAFe', 'Agile', 'Agile SDLC', 'Kanban', 'Agile'
    ],
    'Back-End Development': [
        'Node.js', 'ASP.NET', 'Spring Boot', 'Django', 'Flask', 'Ruby on Rails', 
        '.NET Core', 'FastAPI', 'Golang', 'C#'
    ],
    'Big Data Tools': [
        'Hadoop', 'Spark', 'Hive', 'Databricks', 'Airflow', 'BigQuery', 
        'Teradata', 'ClickHouse', 'AWS Glue', 'Big Data', 'Big Data Stack', 
        'SnapLogic', 'DataDog', 'Alteryx', 'Talend ETL'
    ],
    'Cloud Platforms': [
        'AWS', 'Azure', 'GCP', 'Google Cloud Platform (GCP)', 
        'Cloud Computing', 'Microsoft Azure', 'AWS Redshift', 
        'AWS S3', 'Azure SQL Databases', 'Google BigQuery', 'Azure Data Factory',
        'Azure Synapse', 'Azure API App Services', 'Azure Data Bricks', 
        'Azure Data Lake', 'Azure ADLS Gen2', 'AWS Lambda', 'Google Cloud', 
        'Azure Data Lake Storage', 'Amazon Web Services', 'Cloud Infrastructure'
    ],
    'Containerization and Orchestration': [
        'Docker', 'Kubernetes', 'Containerization', 'ECS', 'LXD'
    ],
    'Data Analysis': [
        'Data Analysis', 'Statistical Modeling', 'Statistical Analysis', 
        'Data Analytics', 'Data Mining', 'Data Quality', 'Data Cleansing', 
        'Data Normalization', 'Data Sanitization', 'Statistical Techniques', 
        'Statistical Methods', 'Analytical Tools', 'Data Management', 
        'Data Science', 'Data Science Tools', 'Data Modeling', 'Data Queries', 
        'Data Flows', 'Data Manipulation', 'Data Platforms', 'Data Warehousing', 
        'Data Engineering', 'Data Visualization', 'Data Visualization Tools'
    ],
    'Data Engineering': [
        'Data Engineering', 'Data Warehousing', 'ETL', 'Data Lakes', 
        'Data Flows', 'Data Migrations', 'Data Integration', 'Data Processing', 
        'Data Platforms', 'Data Quality', 'Data Management'
    ],
    'Data Modeling': [
        'Data Modeling', 'Data Architectures', 'Data Structures', 'Data Schema Design', 
        'Database Modeling', 'Conceptual Data Models', 'Logical Data Models', 
        'Physical Data Models'
    ],
    'Data Visualization': [
        'Power BI', 'Tableau', 'Qlik', 'Matplotlib', 'Plotly', 'D3.js', 
        'Excel', 'Looker', 'Apache Superset', 'Data Visualization Tools', 
        'BI Tools', 'Dashboard Development', 'Visualization Tools'
    ],
    'Database Management': [
        'SQL', 'NoSQL', 'MongoDB', 'SQL Server', 'CosmosDB', 'MySQL', 
        'PostgreSQL', 'Oracle', 'SAP HANA', 'SAP ECC', 'SAP S/4HANA', 
        'Non-SQL Databases', 'DB2', 'PL/SQL', 'Cassandra', 'Redis', 'Sybase', 
        'Data Lake', 'Database', 'Database Management', 'Data Warehouses', 
        'Database Schema Design', 'Stored Procedures', 'Data Migrations', 
        'SQL DW', 'PostgresSQL'
    ],
    'Front-End Development': [
        'React', 'Angular', 'Bootstrap', 'Vue.js', 'CSS', 'HTML', 'SwiftUI', 
        'Front End Development'
    ],
    'Infrastructure as Code (IaC) and Automation': [
        'Terraform', 'Ansible', 'Helm', 'OpenStack', 'Infrastructure-as-Code', 
        'ARM Templates', 'Automation', 'Git', 'CI/CD pipelines'
    ],
    'Machine Learning': [
        'Scikit-Learn', 'TensorFlow', 'PyTorch', 'Keras', 'MLFlow', 
        'Spark ML', 'XGBoost', 'LightGBM', 'Feature Engineering', 
        'A/B Testing', 'Machine Learning', 'Algorithms', 'Google AutoML', 
        'Hugging Face', 'Kubeflow', 'SciPy', 'ML Models'
    ],
    'Networking': [
        'WiFi', 'Networking', 'VPC', 'Network Security', 'Cloud Security'
    ],
    'Python': [
        'Python'
    ],
    'Testing and Quality Assurance': [
        'Unit Testing', 'Integration Testing', 'Feature Testing', 'Performance Tuning', 
        'Load Testing', 'Testing Tools', 'SOAP UI', 'Quality Assurance'
    ]
}

In [29]:
df = df_jobs.copy()

In [30]:
# Initialize columns with 0s
for category in tech_stack_categories:
    df[category] = 0

# Add 'Other' category
df['Other'] = 0

# Function to categorize tech stack
def categorize_tech_stack(tech_stack):
    tech_stack_elements = [element.strip() for element in tech_stack.split(',')]
    category_found = False

    for category, items in tech_stack_categories.items():
        for item in items:
            if any(item in element for element in tech_stack_elements):
                df.at[index, category] = 1
                category_found = True
    
    if not category_found:
        df.at[index, 'Other'] = 1

# Apply categorization
for index, row in df.iterrows():
    categorize_tech_stack(row['TechStack'])

In [31]:
pd.set_option('display.max_columns', None)  # None means no limit
pd.set_option('display.max_rows', None)  # None means no limit
pd.set_option('display.max_colwidth', 50)  # Default value for max_colwidth

In [32]:
columns_to_drop = ['JobID', 'Industries', 'DaysSincePosted', 'NumApplicants', 'Description','JobFunction1', 'JobFunction2', 'JobFunction3',
                   'Agile Methodologies', 'Back-End Development', 'Containerization and Orchestration', 'Front-End Development', 
                   'Infrastructure as Code (IaC) and Automation', 'Networking', 'Testing and Quality Assurance']
df_new = df.drop(columns=columns_to_drop)

In [33]:
df_new.head(1)

Unnamed: 0,Title,Company,Location,JobCategory,SeniorityLevel,EmploymentType,Url,Workscheme,ShortDescription,TechStack,YoE,MinLevelStudies,English,MinYoE,Big Data Tools,Cloud Platforms,Data Analysis,Data Engineering,Data Modeling,Data Visualization,Database Management,Machine Learning,Python,Other
0,Jr. Data Scientist,Arca Continental,Monterrey,Data Science,Associate,Full-time,https://mx.linkedin.com/jobs/view/jr-data-scie...,,We are looking for a Junior Data Scientist wit...,"Python, Machine Learning, Algorithms, Data Ana...",,Bachelor,True,,0,0,1,0,0,0,0,1,1,0


In [34]:
df_new['MinYoE'] = pd.to_numeric(df['MinYoE'], errors='coerce')

filtered_df = df_new[
     ((df_new['MinYoE'] < 5) | pd.isna(df_new['MinYoE'])) &
    (df_new['MinLevelStudies'] == 'Bachelor') & 
    (df_new['English'] == True) & 
    (df_new['EmploymentType'] == 'Full-time') &
    (df_new['Workscheme'].isin(['Remote', 'N/A']))
].reset_index(drop=True)

In [35]:
filtered_df.head(5)

Unnamed: 0,Title,Company,Location,JobCategory,SeniorityLevel,EmploymentType,Url,Workscheme,ShortDescription,TechStack,YoE,MinLevelStudies,English,MinYoE,Big Data Tools,Cloud Platforms,Data Analysis,Data Engineering,Data Modeling,Data Visualization,Database Management,Machine Learning,Python,Other
0,Jr. Data Scientist,Arca Continental,Monterrey,Data Science,Associate,Full-time,https://mx.linkedin.com/jobs/view/jr-data-scie...,,We are looking for a Junior Data Scientist wit...,"Python, Machine Learning, Algorithms, Data Ana...",,Bachelor,True,,0,0,1,0,0,0,0,1,1,0
1,ML Engineer (Engineer Software Development),NEORIS,Monterrey,Data Science,Entry level,Full-time,https://mx.linkedin.com/jobs/view/ml-engineer-...,,We are looking for an ML Engineer to support t...,"Python, Snowflake, SQL, FastAPI, Django, Flask...","4+ years in Development, 4+ years in storage t...",Bachelor,True,2.0,0,1,0,0,0,0,1,0,1,0
2,AI Engineer,NEORIS,Monterrey,AI/ML,Entry level,Full-time,https://mx.linkedin.com/jobs/view/ai-engineer-...,,We are looking for an AI Engineer in Monterrey...,"Python, Scikit Learn, TensorFlow, PyTorch, Ker...",,Bachelor,True,,0,1,0,0,0,0,1,1,1,0
3,Data Scientist,Chubb,Monterrey,Data Science,Entry level,Full-time,https://mx.linkedin.com/jobs/view/data-scienti...,,We are seeking a talented and motivated Machin...,"Python, R, Java, TensorFlow, PyTorch, scikit-l...",3,Bachelor,True,3.0,1,1,0,0,0,0,1,1,1,0
4,AI Developer,SAP,San Pedro,AI/ML,,Full-time,https://mx.linkedin.com/jobs/view/ai-developer...,,"As an GCID SRRC AI Developer, you will contrib...","Python, Go, Cloud Computing, Big Data, Front E...",,Bachelor,True,,1,1,0,0,0,0,0,1,1,0


In [36]:
df.to_csv("LinkedIn_Jobs_Data_Scientist_Monterrey_2024-09-04_FullInfo_Stack.csv", index=False, encoding='utf-8-sig')