In [1]:
import json, glob, os
import random, datetime

import logging
import pandas as pd
import csv

from jobspy import scrape_jobs


In [2]:
"""

ids are unique, global static

Agent Fisher
    - run tru all users, their settings, and their keywords
    - save the results in their corresponding folders

Agent Perry
`   
    - add new users and jobs. check if both are valid

    - ELT the data - remove duplicates, clean, etc
    - save the (agg)results in their corresponding folders

    - Generate Reports and Stats
    - sent emails, text messages?

Agent Krieger
    - run tru all jobs and apply to them


    LOGGING

    - folder structure does a lot for logging
    - log all errors and warnings - and the files that triggered them
    - saving files (advace state) is ~ permisive as of now.

    -----------

    - Log all errors and warnings
    - Log all successful runs
    - Log all failed runs

     [RUN-LOGS-(package)-E-Mail]

     encypted zip - stats.csv, sigma_quo.txt, 

     - polymorphic/other dp, encap, graph inherit


"""


class Agent_Krieger():
    pass




In [21]:

class Agent_Perry():

    """
    
    Perry is the Brain of the operation. He is responsible for the ELT of the data, and the 
    generation of reports and stats.
    
    for each user,
        for each date
            one dataframe with the columns found in the kw csv (standard) as well as date and 
            config folder columns to identify the source of the data



        jobs = jobs.drop_duplicates(subset=["title", "company"], keep="first")
        print(f"Number of (unique) jobs found: {len(jobs)}")

    """

    def __init__(self):
          
          print(" / ***Agent Perry is online *** /")
          print("Instantiating Perry...")
          self.user_configs = self.get_user_configs()
          self.perrys_todo = self.get_perrys_todo()
    
    # using the config files you can determine the users
    # then determine the dates for each user
    # then determine the search configs for each user

    @staticmethod # redundant, given it is a static method chekc later
    def get_user_configs() -> list[dict]: 

        user_config_paths = glob.glob('users/**/*.json')
        print(f" ---> Config file paths found : {user_config_paths}")
        user_configs = []

        for user_config_path in user_config_paths:
            with open(user_config_path) as f:
                tmp_dict = json.load(f)
                user_configs.append(tmp_dict)
        
        return user_configs


    def get_perrys_todo(self) -> dict:

        """
        This function helps with the ELT of the data. It returns a dictionary, which contains is 
        constructed to easily iterate over the data and apply the ELT process to it.

        """
        print(" ---> Getting Perry's To-Do List...")
        perrys_todo = {}

        for user_config in self.user_configs:
            username = user_config['user']

            try:
                #each search config folder we find has to be in this list, otherwise it can't be right.
                search_settings_user = [search_settings['name'] for search_settings in user_config['search_settings']]
                perrys_todo[username] = {}

            except KeyError:
                print(f"User: {username} - No search settings found - Key Error")
                continue

            if not search_settings_user:
                print(f"User: {username} - No search settings found - Empty List")
                continue

            
            dates = glob.glob(f'users/{username}/*')
            dates = [os.path.basename(date) for date in dates if "." not in date]
                     #(date.endswith('.json') == False) and (date.endswith('.csv') == False)]  

            for date in dates:
                search_setting_folder_paths = glob.glob(f'users/{username}/{date}/*')
                search_setting_folder_paths = [folder for folder in search_setting_folder_paths if "." not in folder]
                perrys_todo[username][date] = search_setting_folder_paths


            # for each date in each user --->
            #   [username][date] = destination folder
            #   [username][date][search_setting_folderpath] = source folder(s)
            #   ... -> {'lucky': {'20240306': ['users/lucky/20240306/ger_py', 'users/lucky/20240306/lux_py']}}

            # Example:
            
            for user, scraper_runs in perrys_todo.items():
                
                print(f"\n --- Destination folders for {user}: ---")

                for date_entry, search_setting_folders in scraper_runs.items():
                    destination_folder = os.path.join('users', user, date_entry)
                    print(f"{destination_folder}")
                    
                    for search_setting_folder in search_setting_folders:
                        search_name = os.path.basename(search_setting_folder)

                        # check if search setting folder found by glob is actually in the user config
                        if search_name in search_settings_user:
                            print(f"    - {os.path.basename(search_setting_folder)} --- OK")
                        else:
                            print(f"    - {os.path.basename(search_setting_folder)} --- NOT FOUND")
                            return 0 # *** int has no attr items Error if this happens

        return perrys_todo

    def daily_aggregate(self) -> None:

        print(" ---> Creating Daily Aggregates...")

        for user, scraper_runs in self.perrys_todo.items():

            print(f"\n --- Aggregating data for {user}: ---")

            for date_entry, search_setting_folders in scraper_runs.items():
                destination_folder = os.path.join('users', user, date_entry)
                template = pd.DataFrame(columns=[ "job_url",
                            "site", "title", "company", "company_url", "location", "job_type",
                            "date_posted", "interval", "min_amount", "max_amount", "currency",
                            "is_remote", "num_urgent_words", "benefits", "emails", "description",
                            "user","date","SS","KW"])
                                
                for search_setting_folder in search_setting_folders:
                    files_to_concat = glob.glob(f"{search_setting_folder}/*.csv")

                    for file in files_to_concat:
                    
                        read_df = pd.read_csv(file)

                        if read_df.empty:
                            print(f"    - {os.path.basename(file)} --- EMPTY")
                            continue

                        read_df["user"] = user
                        read_df["date"] = date_entry
                        read_df["SS"] = os.path.basename(search_setting_folder)
                        read_df["KW"] = os.path.basename(file).replace(".csv", "")
                        template = pd.concat([template, read_df])
                
                # listing with the same title from the same company are considered duplicates, because
                # they are usually the same job offer, just posted multiple times for different locations
                template = template.drop_duplicates("job_url", keep="first")
                template = template.drop_duplicates(subset=["title", "company"], keep="first")

                template.to_csv(f"{destination_folder}/agg.csv", index=False)

            
    def agg_the_agg(self) -> pd.DataFrame:

        print(" ---> Aggregating the Aggregates...")

        for user in self.perrys_todo:

            agg_files = glob.glob(f"users/{user}/**/agg.csv")
            if not agg_files:
                print(f"    - No Aggregates found for {user}")
                continue

            print(f"\n --- Aggregating Aggregates for {user}: ---")
            agg_df = pd.DataFrame(columns=[ "job_url",
                            "site", "title", "company", "company_url", "location", "job_type",
                            "date_posted", "interval", "min_amount", "max_amount", "currency",
                            "is_remote", "num_urgent_words", "benefits", "emails", "description",
                            "user","date","SS","KW"])
            
            for agg_file in agg_files:
                read_df = pd.read_csv(agg_file)
                agg_df = pd.concat([agg_df, read_df])


            agg_df.drop(["interval", "min_amount", "max_amount", "currency",
                            "num_urgent_words", "benefits", "emails", "description"], axis=1, inplace=True)
        

            # sort by date, oldest first
            agg_df = agg_df.sort_values(by="date", ascending=True)

            # given that the table is now sorted by date (oldest first), we can drop duplicates
            #  --- scrapes on consecutive days will have the same job offers, so we keep the first
            # that also mean that the last date will only contain jobs that were posted on that day,
            # even if the scrape setting is set to 3 days!
            agg_df = agg_df.drop_duplicates("job_url", keep="first")
            agg_df = agg_df.drop_duplicates(subset=["title", "company"], keep="first")

            # all jobs ever found for this user (except duplicates)
            agg_df.to_csv(f"users/{user}/agg_agg.csv", index=False)
            
            # now we make a cut with only the final date

            final_date = agg_df["date"].max()
            agg_df_quo = agg_df[agg_df["date"] == final_date]
            agg_df_quo.to_csv(f"users/{user}/agg_agg_{final_date}.csv", index=False)
            
            # save all links of the job to a txt file - to list
            
            list_of_links = agg_df_quo["job_url"].tolist()

            print(len(list_of_links))

            with open(f"users/{user}/list_of_links.txt", "w") as f:

                count = 0
                for link in list_of_links:
                    f.write(f"{link}\n")
                    count += 1
                    if count % 25 == 0:
                        f.write(f"\n     --- {count} Links ---\n\n")
                
                if count % 25 != 0: f.write(f"\n     --- {count} Links ---\n\n")



    def send_logs() -> None:
        pass

    def send_email() -> None:
        pass


agent_perry = Agent_Perry()
agent_perry.daily_aggregate()
agent_perry.agg_the_agg()


 / ***Agent Perry is online *** /
Instantiating Perry...
 ---> Config file paths found : ['users/tdawg/tdawg_config.json', 'users/lucky/lucky_config.json']
 ---> Getting Perry's To-Do List...
User: tdawg - No search settings found - Key Error

 --- Destination folders for lucky: ---
users/lucky/20240306
    - ger_py --- OK
    - lux_py --- OK
users/lucky/20240307
    - ger_py --- OK
    - lux_py --- OK
 ---> Creating Daily Aggregates...

 --- Aggregating data for lucky: ---
    - signal processing.csv --- EMPTY
    - prompt engineer.csv --- EMPTY
    - generative model.csv --- EMPTY
    - openai.csv --- EMPTY
    - genai.csv --- EMPTY
    - prompt engineer.csv --- EMPTY
    - generative model.csv --- EMPTY
    - openai.csv --- EMPTY
    - künstliche intelligenz.csv --- EMPTY


  template = pd.concat([template, read_df])


    - ki.csv --- EMPTY
    - signal processing.csv --- EMPTY
    - prompt engineer.csv --- EMPTY
    - generative model.csv --- EMPTY
    - openai.csv --- EMPTY
    - genai.csv --- EMPTY


  template = pd.concat([template, read_df])


    - prompt engineer.csv --- EMPTY
    - openai.csv --- EMPTY
    - künstliche intelligenz.csv --- EMPTY
    - ki.csv --- EMPTY
 ---> Aggregating the Aggregates...

 --- Aggregating Aggregates for lucky: ---
67


  agg_df = pd.concat([agg_df, read_df])


In [3]:

class Agent_Fisher():
    def __init__(self):

        """
        Agent Fisher is the muscle of the operation. It is responsible for running the scrapes.

        get_user_configs - returns a list of dictionaries, each dictionary is a user config.
        get_proxy - returns a string of the proxy to be passed to the scraper as a string.
        get_date - returns a string of the date in the format YYYYMMDD.
        Date, user and search_settings(stored in user_configs - eg. IT jobs in london, 
        remote teaching jobs in Germany). 
        """
        print(" / ***Agent Fisher is online *** /")
        print("Instantiating Fisher...")
        self.date_run = self.get_date()
        self.proxy = self.get_proxy()
        self.user_configs = self.get_user_configs()

    # --- INIT Functions --- #

    @staticmethod
    def get_date() -> str:
        now = datetime.datetime.now()
        date = now.strftime("%Y%m%d")
        
        return date
    
    @staticmethod
    def get_user_configs() -> list[dict]: 

        user_config_paths = glob.glob('users/**/*.json')
        print(f" ---> Config file paths found : {user_config_paths}")
        user_configs = []

        for user_config_path in user_config_paths:
            with open(user_config_path) as f:
                tmp_dict = json.load(f)
                user_configs.append(tmp_dict)
        
        return user_configs

    @staticmethod # proxy path is hardcoded
    def get_proxy() -> str:
        
        proxy_path = os.path.join("results", "proxy.txt")
        with open(proxy_path, "r") as f:
            proxy = f.read().strip()

        return proxy


    # --- SEARCH --- #

    def update_keywords_left(self, kewords_for_this_search: list, ss_path: str) -> list[str]:

        """
        This function checks which keywords have already been run and determines the state of the run.
        If a keyword has already been run (clean, no exception thrown) it is removed from the list (state).
        Exceptions are there to handle the case where the run was not completed, incomplete or the proxy
        was blocked.

        ss_path: str - user/date/search_setting - path to the search setting folder

        Note - the order of the keywods run is picked randomly. To spice things up a bit ...
        """
        
        files = glob.glob(f"{ss_path}/*.csv") ### os path join
        files = [os.path.basename(file) for file in files]

        keywords_done = [keyword for keyword in kewords_for_this_search if f"{keyword}.csv" in files]
        keywords_left = [keyword for keyword in kewords_for_this_search if keyword not in keywords_done]

        print(f"--- Path: {ss_path}/*.csv ---")
        print(f"Keywords done: {len(keywords_done)} : {keywords_done}")
        print(f"Keywords left: {len(keywords_left)} : {keywords_left}")
        return keywords_left


    def run_search_setting(self, username: str, search_setting: dict, date: str) -> bool:
            
        empty_jobs = pd.DataFrame(columns=[ "job_url",
        "site", "title", "company", "company_url", "location", "job_type",
        "date_posted", "interval", "min_amount", "max_amount", "currency",
        "is_remote", "num_urgent_words", "benefits", "emails", "description"])

        keywords = search_setting['keywords']
        jobs = empty_jobs

        ss_path = os.path.join("users",username, date, search_setting['name'])
        keywords_left = self.update_keywords_left(keywords, ss_path)

        # ---- Iterate over keywords ---- #
        while keywords_left:

            keyword = random.choice(keywords_left)
            kw_path = os.path.join(ss_path, f"{keyword}.csv")

            print(f"Keyword: *** {keyword} *** -> Starting search...")

            try:
                jobs = scrape_jobs(
                    site_name=search_setting['site_name'],

                    search_term=keyword,
                    proxy=self.proxy,

                    hours_old=search_setting['hours_old'],
                    is_remote=search_setting['is_remote'],
                    results_wanted=search_setting['results_wanted'],
                    country_indeed=search_setting['country_indeed']  # only needed for indeed / glassdoor
                )

            except Exception as e:
                print(f"Error with keyword: {keyword}")
                print(f"{e}")
                if "Bad proxy" in str(e):
                    # if the proxy is not working, the program will never advance, so we stop it.
                    print("Bad proxy, stopping the program")

                    " TODO: Outbound email to notify the admin, maybe change proxy"
                    return False
                
                elif "Could not find any results for the search" in str(e):
                    # if this happens, we write an empty file and continue, because the prog does not advance
                    # NOTE : even tho this error is thrown, searching indeed for the kw often returns results
                    print(f"No jobs found for keyword: {keyword}. Writing empty file...")
                    empty_jobs.to_csv(kw_path, quoting=csv.QUOTE_NONNUMERIC, escapechar="\\", index=False)
                    keywords_left = self.update_keywords_left(keywords, ss_path)
                
                continue

                #2024-03-06 13:36:15,683 - JobSpy - ERROR - Indeed: failed to do request: Get "https://lu.indeed.com/m/jobs?q=openai&l=luxembourg&filter=0&start=70&sort=date&fromage=3": http: server gave HTTP response to HTTPS client
                #HTTPSConnectionPool(host='apis.indeed.com', port=443): Max retries exceeded with url: /graphql (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1007)')))
                #Error with keyword: openai Could not find any results for the search. /graphql (Caused by ProxyError('Unable to connect to proxy', 

                # Not all exceptions are critical, and for most we can try again
                # this is why we continue and not save the empty file
            
            print(f"Number of jobs found: {len(jobs)}")
            jobs = jobs.drop_duplicates(subset=["job_url"], keep="first")
            
            if jobs.empty:
                print(f"No jobs found for keyword: {keyword}. Writing empty file...")
                empty_jobs.to_csv(kw_path, quoting=csv.QUOTE_NONNUMERIC, escapechar="\\", index=False)
                
            else:
                print(f"Writing jobs to file: {kw_path}")
                jobs.to_csv(kw_path, quoting=csv.QUOTE_NONNUMERIC, escapechar="\\", index=False)
                jobs = empty_jobs

            keywords_left = self.update_keywords_left(keywords, ss_path)
        # --- End of while loop --- #
        
        return True

    # --- Overarching --- #

    def run_user(self, user_config: dict) -> None:
        
        username = user_config['user']
        print(f"Running user: {username}")
        
        successful_run = []

        try:
            search_settings = user_config['search_settings']
        except KeyError:
            print(f"User: {username} - No search settings found - Key Error")
            return

        if not search_settings:
            print(f"User: {username} - No search settings found - Empty List")
            return

        for search_setting in user_config['search_settings']:

            # try creqting the folder
            date_path = os.path.join("users", username, self.date_run)
            search_setting_path = os.path.join(date_path, search_setting['name'])

            #if folders already exist, we skip the creation. Take this into account for Perry
            if not os.path.exists(date_path):
                os.makedirs(date_path)
                print(f"Created folder: {date_path}")
            if not os.path.exists(search_setting_path):
                os.makedirs(search_setting_path)
                print(f"Created folder: {search_setting_path}")

            print(f"Running search setting: {search_setting['name']} for user: {username}")
            successful_run.append(self.run_search_setting(username, search_setting, self.date_run))

        
        if all(successful_run):
            print(f"User: {username} - All search settings ran successfully")
        else:
            print(f"User: {username} - Some search settings failed. Run is incomplete")


    def run_all_users(self):

        #date = self.date_run
        #user_configs = self.user_configs


        #print(f" ---> Date : {date}")
        pass

agent_fish = Agent_Fisher()
agent_fish.run_user(agent_fish.user_configs[1])





 / ***Agent Fisher is online *** /
Instantiating Fisher...
 ---> Config file paths found : ['users/tdawg/tdawg_config.json', 'users/lucky/lucky_config.json']
Running user: lucky
Created folder: users/lucky/20240307
Created folder: users/lucky/20240307/lux_py
Running search setting: lux_py for user: lucky
--- Path: users/lucky/20240307/lux_py/*.csv ---
Keywords done: 0 : []
Keywords left: 14 : ['python', 'database', 'nlp', 'natural language', 'signal processing', 'prompt engineer', 'openai', 'language model', 'generative model', 'genai', 'ki', 'künstliche intelligenz', 'ai', 'artificial intelligence']
Keyword: *** artificial intelligence *** -> Starting search...


2024-03-07 14:53:12,136 - JobSpy - ERROR - Indeed response status code 403


Number of jobs found: 3
Writing jobs to file: users/lucky/20240307/lux_py/artificial intelligence.csv
--- Path: users/lucky/20240307/lux_py/*.csv ---
Keywords done: 1 : ['artificial intelligence']
Keywords left: 13 : ['python', 'database', 'nlp', 'natural language', 'signal processing', 'prompt engineer', 'openai', 'language model', 'generative model', 'genai', 'ki', 'künstliche intelligenz', 'ai']
Keyword: *** genai *** -> Starting search...
Number of jobs found: 1
Writing jobs to file: users/lucky/20240307/lux_py/genai.csv
--- Path: users/lucky/20240307/lux_py/*.csv ---
Keywords done: 2 : ['genai', 'artificial intelligence']
Keywords left: 12 : ['python', 'database', 'nlp', 'natural language', 'signal processing', 'prompt engineer', 'openai', 'language model', 'generative model', 'ki', 'künstliche intelligenz', 'ai']
Keyword: *** language model *** -> Starting search...


2024-03-07 14:53:30,583 - JobSpy - ERROR - Indeed response status code 403


Number of jobs found: 23
Writing jobs to file: users/lucky/20240307/lux_py/language model.csv
--- Path: users/lucky/20240307/lux_py/*.csv ---
Keywords done: 3 : ['language model', 'genai', 'artificial intelligence']
Keywords left: 11 : ['python', 'database', 'nlp', 'natural language', 'signal processing', 'prompt engineer', 'openai', 'generative model', 'ki', 'künstliche intelligenz', 'ai']
Keyword: *** signal processing *** -> Starting search...
Number of jobs found: 2
Writing jobs to file: users/lucky/20240307/lux_py/signal processing.csv
--- Path: users/lucky/20240307/lux_py/*.csv ---
Keywords done: 4 : ['signal processing', 'language model', 'genai', 'artificial intelligence']
Keywords left: 10 : ['python', 'database', 'nlp', 'natural language', 'prompt engineer', 'openai', 'generative model', 'ki', 'künstliche intelligenz', 'ai']
Keyword: *** database *** -> Starting search...
Number of jobs found: 30
Writing jobs to file: users/lucky/20240307/lux_py/database.csv
--- Path: users/l

2024-03-07 14:55:20,202 - JobSpy - ERROR - Indeed response status code 403


Number of jobs found: 0
No jobs found for keyword: künstliche intelligenz. Writing empty file...
--- Path: users/lucky/20240307/lux_py/*.csv ---
Keywords done: 12 : ['database', 'nlp', 'natural language', 'signal processing', 'openai', 'language model', 'generative model', 'genai', 'ki', 'künstliche intelligenz', 'ai', 'artificial intelligence']
Keywords left: 2 : ['python', 'prompt engineer']
Keyword: *** python *** -> Starting search...


2024-03-07 14:55:58,293 - JobSpy - ERROR - Indeed: net/http: request canceled (Client.Timeout or context cancellation while reading body)


Number of jobs found: 32
Writing jobs to file: users/lucky/20240307/lux_py/python.csv
--- Path: users/lucky/20240307/lux_py/*.csv ---
Keywords done: 13 : ['python', 'database', 'nlp', 'natural language', 'signal processing', 'openai', 'language model', 'generative model', 'genai', 'ki', 'künstliche intelligenz', 'ai', 'artificial intelligence']
Keywords left: 1 : ['prompt engineer']
Keyword: *** prompt engineer *** -> Starting search...
Number of jobs found: 0
No jobs found for keyword: prompt engineer. Writing empty file...
--- Path: users/lucky/20240307/lux_py/*.csv ---
Keywords done: 14 : ['python', 'database', 'nlp', 'natural language', 'signal processing', 'prompt engineer', 'openai', 'language model', 'generative model', 'genai', 'ki', 'künstliche intelligenz', 'ai', 'artificial intelligence']
Keywords left: 0 : []
Created folder: users/lucky/20240307/ger_py
Running search setting: ger_py for user: lucky
--- Path: users/lucky/20240307/ger_py/*.csv ---
Keywords done: 0 : []
Keywor

  jobs_df = pd.concat(jobs_dfs, ignore_index=True)


Number of jobs found: 4
Writing jobs to file: users/lucky/20240307/ger_py/nlp.csv
--- Path: users/lucky/20240307/ger_py/*.csv ---
Keywords done: 1 : ['nlp']
Keywords left: 13 : ['python', 'database', 'natural language', 'signal processing', 'prompt engineer', 'openai', 'language model', 'generative model', 'genai', 'ki', 'künstliche intelligenz', 'ai', 'artificial intelligence']
Keyword: *** signal processing *** -> Starting search...
Error with keyword: signal processing
Could not find any results for the search
No jobs found for keyword: signal processing. Writing empty file...
--- Path: users/lucky/20240307/ger_py/*.csv ---
Keywords done: 2 : ['nlp', 'signal processing']
Keywords left: 12 : ['python', 'database', 'natural language', 'prompt engineer', 'openai', 'language model', 'generative model', 'genai', 'ki', 'künstliche intelligenz', 'ai', 'artificial intelligence']
Keyword: *** python *** -> Starting search...


  jobs_df = pd.concat(jobs_dfs, ignore_index=True)


Number of jobs found: 69
Writing jobs to file: users/lucky/20240307/ger_py/python.csv
--- Path: users/lucky/20240307/ger_py/*.csv ---
Keywords done: 3 : ['python', 'nlp', 'signal processing']
Keywords left: 11 : ['database', 'natural language', 'prompt engineer', 'openai', 'language model', 'generative model', 'genai', 'ki', 'künstliche intelligenz', 'ai', 'artificial intelligence']
Keyword: *** artificial intelligence *** -> Starting search...


2024-03-07 14:57:12,622 - JobSpy - ERROR - Indeed response status code 403


Number of jobs found: 4
Writing jobs to file: users/lucky/20240307/ger_py/artificial intelligence.csv
--- Path: users/lucky/20240307/ger_py/*.csv ---
Keywords done: 4 : ['python', 'nlp', 'signal processing', 'artificial intelligence']
Keywords left: 10 : ['database', 'natural language', 'prompt engineer', 'openai', 'language model', 'generative model', 'genai', 'ki', 'künstliche intelligenz', 'ai']
Keyword: *** künstliche intelligenz *** -> Starting search...


  jobs_df = pd.concat(jobs_dfs, ignore_index=True)


Number of jobs found: 150
Writing jobs to file: users/lucky/20240307/ger_py/künstliche intelligenz.csv
--- Path: users/lucky/20240307/ger_py/*.csv ---
Keywords done: 5 : ['python', 'nlp', 'signal processing', 'künstliche intelligenz', 'artificial intelligence']
Keywords left: 9 : ['database', 'natural language', 'prompt engineer', 'openai', 'language model', 'generative model', 'genai', 'ki', 'ai']
Keyword: *** database *** -> Starting search...


2024-03-07 14:57:43,732 - JobSpy - ERROR - Indeed response status code 403
  jobs_df = pd.concat(jobs_dfs, ignore_index=True)


Number of jobs found: 42
Writing jobs to file: users/lucky/20240307/ger_py/database.csv
--- Path: users/lucky/20240307/ger_py/*.csv ---
Keywords done: 6 : ['python', 'database', 'nlp', 'signal processing', 'künstliche intelligenz', 'artificial intelligence']
Keywords left: 8 : ['natural language', 'prompt engineer', 'openai', 'language model', 'generative model', 'genai', 'ki', 'ai']
Keyword: *** generative model *** -> Starting search...
Error with keyword: generative model
Could not find any results for the search
No jobs found for keyword: generative model. Writing empty file...
--- Path: users/lucky/20240307/ger_py/*.csv ---
Keywords done: 7 : ['python', 'database', 'nlp', 'signal processing', 'generative model', 'künstliche intelligenz', 'artificial intelligence']
Keywords left: 7 : ['natural language', 'prompt engineer', 'openai', 'language model', 'genai', 'ki', 'ai']
Keyword: *** natural language *** -> Starting search...


  jobs_df = pd.concat(jobs_dfs, ignore_index=True)


Number of jobs found: 8
Writing jobs to file: users/lucky/20240307/ger_py/natural language.csv
--- Path: users/lucky/20240307/ger_py/*.csv ---
Keywords done: 8 : ['python', 'database', 'nlp', 'natural language', 'signal processing', 'generative model', 'künstliche intelligenz', 'artificial intelligence']
Keywords left: 6 : ['prompt engineer', 'openai', 'language model', 'genai', 'ki', 'ai']
Keyword: *** language model *** -> Starting search...


2024-03-07 14:58:26,796 - JobSpy - ERROR - Indeed response status code 403
  jobs_df = pd.concat(jobs_dfs, ignore_index=True)


Number of jobs found: 39
Writing jobs to file: users/lucky/20240307/ger_py/language model.csv
--- Path: users/lucky/20240307/ger_py/*.csv ---
Keywords done: 9 : ['python', 'database', 'nlp', 'natural language', 'signal processing', 'language model', 'generative model', 'künstliche intelligenz', 'artificial intelligence']
Keywords left: 5 : ['prompt engineer', 'openai', 'genai', 'ki', 'ai']
Keyword: *** genai *** -> Starting search...
Error with keyword: genai
Could not find any results for the search
No jobs found for keyword: genai. Writing empty file...
--- Path: users/lucky/20240307/ger_py/*.csv ---
Keywords done: 10 : ['python', 'database', 'nlp', 'natural language', 'signal processing', 'language model', 'generative model', 'genai', 'künstliche intelligenz', 'artificial intelligence']
Keywords left: 4 : ['prompt engineer', 'openai', 'ki', 'ai']
Keyword: *** openai *** -> Starting search...
Error with keyword: openai
Could not find any results for the search
No jobs found for keywo

  jobs_df = pd.concat(jobs_dfs, ignore_index=True)


Number of jobs found: 196
Writing jobs to file: users/lucky/20240307/ger_py/ki.csv
--- Path: users/lucky/20240307/ger_py/*.csv ---
Keywords done: 12 : ['python', 'database', 'nlp', 'natural language', 'signal processing', 'openai', 'language model', 'generative model', 'genai', 'ki', 'künstliche intelligenz', 'artificial intelligence']
Keywords left: 2 : ['prompt engineer', 'ai']
Keyword: *** ai *** -> Starting search...


2024-03-07 14:59:41,701 - JobSpy - ERROR - Indeed: failed to do request: Get "https://de.indeed.com/m/jobs?q=ai&l=germany&filter=0&start=130&sort=date&fromage=3&sc=0kf%3Aattr%28DSQF7%29%3B": http: server gave HTTP response to HTTPS client


Error with keyword: ai
HTTPSConnectionPool(host='apis.indeed.com', port=443): Max retries exceeded with url: /graphql (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1007)')))
Keyword: *** ai *** -> Starting search...


  jobs_df = pd.concat(jobs_dfs, ignore_index=True)


Number of jobs found: 82
Writing jobs to file: users/lucky/20240307/ger_py/ai.csv
--- Path: users/lucky/20240307/ger_py/*.csv ---
Keywords done: 13 : ['python', 'database', 'nlp', 'natural language', 'signal processing', 'openai', 'language model', 'generative model', 'genai', 'ki', 'künstliche intelligenz', 'ai', 'artificial intelligence']
Keywords left: 1 : ['prompt engineer']
Keyword: *** prompt engineer *** -> Starting search...
Error with keyword: prompt engineer
Could not find any results for the search
No jobs found for keyword: prompt engineer. Writing empty file...
--- Path: users/lucky/20240307/ger_py/*.csv ---
Keywords done: 14 : ['python', 'database', 'nlp', 'natural language', 'signal processing', 'prompt engineer', 'openai', 'language model', 'generative model', 'genai', 'ki', 'künstliche intelligenz', 'ai', 'artificial intelligence']
Keywords left: 0 : []
User: lucky - All search settings ran successfully
