nicobrenner · nicobrenner · Jun 12, 2024 · Apr 11, 2024 · May 8, 2024 · May 15, 2024
diff --git a/config/requirements.txt b/config/requirements.txt
@@ -2,4 +2,4 @@ beautifulsoup4==4.9.3
 requests==2.25.1
 openai
 python-dotenv
-windows-curses
+windows-curses; sys_platform == 'win32'
diff --git a/config/sample.env b/config/sample.env
@@ -1,4 +1,4 @@
-OPENAI_API_KEY=
+OPENAI_API_KEY=your_openai_api_key_here
 OPENAI_GPT_MODEL=gpt-3.5-turbo
 BASE_RESUME_PATH=base_resume.txt
 HN_START_URL=https://news.ycombinator.com/item?id=39894820&p=1

diff --git a/src/work_startup_scraper.py b/src/work_startup_scraper.py
@@ -104,18 +104,35 @@ def scrape_jobs(self, stdscr, update_func=None, done_event=None, result_queue=No
         update_func(f"Scraping: {self.base_url}")
         try: 
             company_links = self.get_company_links()
-
+            count = 0
+            flag1 = False
+            flag2 = False
+            flag3 = False
             for company_link in company_links:
+                count += 1
                 job_links = self.get_job_links(company_link)
                 for job_link in job_links:
                     job_details = self.get_job_details(job_link)
                     if job_details:
                         jobs_list.append(job_details)
+                if update_func:
+                    update_func(f"Scraping: {company_link}")
+                # Updates the progress of the scraping
+                if  count / len(company_links)>= 0.25 and not flag1:
+                    update_func("Scraping: 25% of companies completed")
+                    flag1 = True
+                elif count / len(company_links)>= 0.5 and not flag2:
+                    update_func("Scraping: 50% of companies completed")
+                    flag2 = True
+                elif count / len(company_links)>= 0.75:
+                    update_func("Scraping: 75% of companies completed")
+                    flag3 = True
 
             for job in jobs_list:
                 inserted= self.save_to_database(job['original_text'], job['original_html'], job['source'], job['external_id'])
                 if inserted:
                     self.new_entries_count += 1
+
                 if job==jobs_list[-1]:
                     if done_event:
                         result_queue.put(self.new_entries_count)