In [1]:
import pandas as pd
import numpy as np

from methods import sites, actions, scrape, urls
from schedule import tech_collect

#### Option A: gather data from script

In [None]:
info = tech_collect.main()
results = info["results"]

#### Option B: reload data

In [2]:
path = "source/save/postings/tech/"
results = actions.load_file_if_str(path + actions.get_filename_from_dir(path=path, index=-1))

TODO: clean all time postings (update to new dict structure (fill in missing with NaN), recalculate points), or at least sort into "good" and "old" folders

### Analyze current data

In [3]:
interesting = {post:value for post,value in results.items() if any([keyword in results[post]['description'].lower()
                                                                        for keyword in tech_collect.MAIN_DESCRIPTION_KEYWORDS])}
interesting = dict(sorted(interesting.items(), key=lambda x: x[1]['points'], reverse=True))

In [4]:
file_name = actions.get_filename_from_dir(path, index = -2)
added, removed = actions.compare_postings(results, f'{path}{file_name}',
                                           print_attrs=["title", "company", "points", "salary_monthly_guessed", "snippet"], printed_text_max_length = 200)

New items above points threshold: 

karriere.at7594694: 
	title: Senior Full Stack .NET Developer (m/w/x),
	company: BearingPoint GmbH,
	salary_monthly_guessed: 3843,
	points: 2.215,
	snippet: Deine Aufgaben: Entwicklung und Weiterentwicklung moderner Web- und Cloud-Anwendungen auf Basis von .NET, .NET Core, C# und Azure, Design, Konfiguration und Deployment von APIs, Services und zentralen...

karriere.at7594100: 
	title: Energiesystem Engineer (w/m/x),
	company: epunkt GmbH,
	salary_monthly_guessed: 4200,
	points: 2.115,
	snippet: Deine Aufgaben: Mitverantwortlich für den Aufbau, die Programmierung und Anwendung eines Energiesystemmodells auf Basis von PyPSA Entwicklung fundierter Energieszenarien zur Erreichung der Klimaneutra...

karriere.at7594679: 
	title: (Senior) Kundenbetreuer:in Kompetenzzentrum Immobilien (w/m/d),
	company: Raiffeisen Niederösterreich-Wien,
	salary_monthly_guessed: 4642,
	points: 1.698,
	snippet: (Senior) Kundenbetreuer:in Kompetenzzentrum Immobilien (w/m/d)

In [None]:
results

In [None]:
interesting

In [None]:
added

### Historical data analysis

In [5]:
postings_h = (actions.combine_postings(folder_path=path))
df_h = pd.DataFrame.from_dict(postings_h, orient='index')
print("No. postings so far: ", len(postings_h))

interesting_postings_h = {post:value for post,value in postings_h.items() if any([keyword in postings_h[post]['description'].lower()
                                                                        for keyword in tech_collect.MAIN_DESCRIPTION_KEYWORDS
                                                                        if 'description' in postings_h[post] and postings_h[post]['description']
                                                                        ])}
interesting_postings_h = dict(sorted(interesting_postings_h.items(), key=lambda x: x[1]['points'] if 'points' in x[1] else 0, reverse=True))

rare_companies = [company for company in df_h['company'].unique() if len(df_h[df_h['company'] == company]) <= 3]

df_rare_companies = df_h[df_h['company'].isin(rare_companies)].sort_values(by='points', ascending=False)[['title', 'company', 'salary',
       'locations', 'url', 'snippet', 'date',
       'id', 'salary_monthly_guessed', 'description', 'points', 'keywords',
       'collected_on', 'role', 'requirements', 'nice_to_have', 'benefits']]

file_name = 'postings_2025-01-19-00-33-53'
_, removed_h = actions.compare_postings(results, f'{path}{file_name}.json', print_attrs=[])

No. postings so far:  6169


In [None]:
interesting_postings_h

In [None]:
postings_h

In [None]:
df_rare_companies

In [None]:
removed_h