In [None]:
import requests
r = requests.get("https://example.com")
print(r.status_code)

# install package direcly in notebook: %pip install requests-html

In [None]:
import pandas as pd
from collections import Counter
import re

def most_frequent_terms(df, column, top_n=10):
    """
    Return the most frequent terms from a text column in a DataFrame.
    
    Args:
        df (pd.DataFrame): Input dataframe.
        column (str): Column name containing text.
        top_n (int): Number of most frequent terms to return.
        
    Returns:
        pd.DataFrame: DataFrame with term counts.
    """
    # Join all text in column into one big string
    text = " ".join(df[column].astype(str).tolist())
    
    # Tokenize: lowercase words, only keep a–z characters
    tokens = re.findall(r"\b[a-zA-ZåäöÅÄÖ]+\b", text.lower())
    
    # Count terms
    counter = Counter(tokens)
    
    # Convert to DataFrame
    most_common = counter.most_common(top_n)
    return pd.DataFrame(most_common, columns=["term", "count"])



In [8]:
import pandas as pd
raw_data = pd.read_csv('../data/raw/jobs.csv')
#raw_data = raw_data.loc[raw_data['site']=='Aliant']
raw_data[['site', 'job_title','ingestion_ts']]

Unnamed: 0,site,job_title,ingestion_ts
0,Afry,Automotive Senior konstruktör Oskarshamn,2025-09-12 16:33:57
1,Afry,Senior Konstruktör inom Gjutning,2025-09-12 16:33:57
2,Afry,Senior Mechanical Designer - Automotive,2025-09-12 16:33:57
3,Afry,Senior designer whole vehicle development,2025-09-12 16:33:57
4,Afry,Senior Scala/Java developer,2025-09-12 16:33:57
...,...,...,...
503,Upgraded,Kravanalytiker,2025-09-12 16:41:43
504,Upgraded,Säkerhetsarkitekt,2025-09-12 16:41:43
505,Upgraded,Lösningsarkitekt inom Apputveckling,2025-09-12 16:41:43
506,Upgraded,"Senior Data Engineer, Region Skåne/Skånetrafik...",2025-09-12 16:41:43


In [9]:
raw_group = raw_data.groupby(by=['site']).count()
raw_group

Unnamed: 0_level_0,site_id,job_title,raw_payload,ingestion_ts
site,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A Society,95,95,95,95
Afry,85,85,85,85
Aliant,6,6,6,6
Combitech,24,24,24,24
Emagine,46,46,46,46
Ework,115,115,115,115
Nikita,17,17,17,17
Regent,24,24,24,24
Upgraded,96,96,96,96


In [14]:
df = most_frequent_terms(raw_data, "job_title", top_n=100)
df.to_csv('most_frequent.csv')

In [10]:
bronze_data = pd.read_csv('../data/bronze/jobs.csv')
bronze_data[['site', 'job_title', 'ingestion_ts','work_location','link']]


Unnamed: 0,site,job_title,ingestion_ts,work_location,link
0,Aliant,Elektronikkonstruktör (Altium Designer),2025-09-12 11:45:04,Stockholm,https://aliant.recman.page/job/449555
1,Aliant,Scala-utvecklare,2025-09-12 11:45:04,Göteborg,https://aliant.recman.page/job/449553
2,Aliant,Medior Project Manager,2025-09-12 11:45:04,Göteborg,https://aliant.recman.page/job/449546
3,Aliant,Computer Vision - Android Automotive,2025-09-12 11:45:04,Göteborg,https://aliant.recman.page/job/448256
4,Aliant,Utvecklare - MES-system,2025-09-12 11:45:04,Jönköping,https://aliant.recman.page/job/448170
...,...,...,...,...,...
406,Upgraded,Kravanalytiker,2025-09-12 16:23:01,Stockholm,https://upgraded.se/konsultuppdrag/122795-krav...
407,Upgraded,Säkerhetsarkitekt,2025-09-12 16:23:01,Stockholm,https://upgraded.se/konsultuppdrag/122711-sake...
408,Upgraded,Lösningsarkitekt inom Apputveckling,2025-09-12 16:23:01,Norrköping,https://upgraded.se/konsultuppdrag/122542-losn...
409,Upgraded,"Senior Data Engineer, Region Skåne/Skånetrafik...",2025-09-12 16:23:01,Malmö,https://upgraded.se/konsultuppdrag/122340-seni...


# Afry

In [1]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), ".."))) # lägg till training i path

from src.afry_scraper import AfryScraper
afry = AfryScraper()
response = afry.request_status()
job_posts = afry.return_raw_job_posts_data(response)
raw_data = afry.parse_raw_data(job_posts)
afry.unload_data(file_path = "../data/raw/jobs.csv", new_data=raw_data)
last_raw_data = afry.load_last_added_raw_data()
bronze_data = afry.parse_bronze_data(last_raw_data)
afry.unload_data(file_path="../data/bronze/jobs.csv", new_data=bronze_data)
last_raw_data

Afry > Response: 200
Afry > Nmr of scraped adds: 85
Afry > Unloading data to ../data/raw/jobs.csv. Nmr of new added jobs:85
Afry > Loading last scraped jobs, nr: 85
Afry > Parsing bronze data: 85
Afry > Unloading data to ../data/bronze/jobs.csv. Nmr of new added jobs:85


Unnamed: 0,site,site_id,job_title,raw_payload,ingestion_ts
0,Afry,23d6e07e-94c1-41ee-a3e5-7bb5c60e9c6b,Automotive Senior konstruktör Oskarshamn,"{'Id': '23d6e07e-94c1-41ee-a3e5-7bb5c60e9c6b',...",2025-09-12 16:33:57
1,Afry,587cd899-aff5-42b9-9cd8-302a09876a18,Senior Konstruktör inom Gjutning,"{'Id': '587cd899-aff5-42b9-9cd8-302a09876a18',...",2025-09-12 16:33:57
2,Afry,6429e1c5-ce4e-4af8-8398-3943ea32a520,Senior Mechanical Designer - Automotive,"{'Id': '6429e1c5-ce4e-4af8-8398-3943ea32a520',...",2025-09-12 16:33:57
3,Afry,49a8e978-7aaf-4ad7-b58a-e17324fa8090,Senior designer whole vehicle development,"{'Id': '49a8e978-7aaf-4ad7-b58a-e17324fa8090',...",2025-09-12 16:33:57
4,Afry,df648343-a1f5-4347-9d6c-f5b6495d91a2,Senior Scala/Java developer,"{'Id': 'df648343-a1f5-4347-9d6c-f5b6495d91a2',...",2025-09-12 16:33:57
...,...,...,...,...,...
80,Afry,cec74887-ac01-4b35-9fda-c79dd6f51679,Projektledare ledningsprojekt 4651 (OPP-0265773),"{'Id': 'cec74887-ac01-4b35-9fda-c79dd6f51679',...",2025-09-12 16:33:57
81,Afry,72ba7893-5606-4060-b44a-70bd5dc0bfd6,BPM008401 Vibrationsexpert (SYS4),"{'Id': '72ba7893-5606-4060-b44a-70bd5dc0bfd6',...",2025-09-12 16:33:57
82,Afry,0a4e8c31-054a-4e9f-90be-cf2ffbff5640,Scrum Master till HW-nära Mjukvaruteam - Försv...,"{'Id': '0a4e8c31-054a-4e9f-90be-cf2ffbff5640',...",2025-09-12 16:33:57
83,Afry,46d37366-7ae3-4740-b0a8-2e5e8fe01e03,"Project Manager, Engineering","{'Id': '46d37366-7ae3-4740-b0a8-2e5e8fe01e03',...",2025-09-12 16:33:57


# Aliant

In [2]:
import sys
import os
import pandas as pd
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), ".."))) # lägg till training i path

from src.aliant_scraper import AliantScraper
aliant = AliantScraper()
response = aliant.request_status()
job_posts = aliant.return_raw_job_posts_data(response)
raw_data = aliant.parse_raw_data(job_posts)
aliant.unload_data(file_path = "../data/raw/jobs.csv", new_data = raw_data)
last_raw_data = aliant.load_last_added_raw_data()
bronze_data = aliant.parse_bronze_data(last_raw_data)
aliant.unload_data(file_path="../data/bronze/jobs.csv", new_data=bronze_data)
last_raw_data


#job_posts[0]

Aliant > Response: 200
Aliant > Nmr of scraped adds: 6
Aliant > Unloading data to ../data/raw/jobs.csv. Nmr of new added jobs:6
Aliant > Loading last scraped jobs, nr: 6
Aliant > Parsing bronze data: 6
Aliant > Unloading data to ../data/bronze/jobs.csv. Nmr of new added jobs:6


Unnamed: 0,site,site_id,job_title,raw_payload,ingestion_ts
85,Aliant,449555,Elektronikkonstruktör (Altium Designer),"{'AdID': 449555, 'Positions': 1, 'AdLogo': 'ht...",2025-09-12 16:34:03
86,Aliant,449553,Scala-utvecklare,"{'AdID': 449553, 'Positions': 1, 'AdLogo': 'ht...",2025-09-12 16:34:03
87,Aliant,449546,Medior Project Manager,"{'AdID': 449546, 'Positions': 1, 'AdLogo': 'ht...",2025-09-12 16:34:03
88,Aliant,448256,Computer Vision - Android Automotive,"{'AdID': 448256, 'Positions': 1, 'AdLogo': 'ht...",2025-09-12 16:34:03
89,Aliant,448170,Utvecklare - MES-system,"{'AdID': 448170, 'Positions': 1, 'AdLogo': 'ht...",2025-09-12 16:34:03
90,Aliant,432534,Sitecore-utvecklare,"{'AdID': 432534, 'Positions': 1, 'AdLogo': 'ht...",2025-09-12 16:34:03


# Asociety

In [1]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), ".."))) # lägg till training i path

from src.asociety_scraper import ASocietyScraper
asociety = ASocietyScraper()
response = asociety.request_status()
job_posts = asociety.return_raw_job_posts_data(response)
raw_data = asociety.parse_raw_data(job_posts)
asociety.unload_data(file_path = "../data/raw/jobs.csv", new_data=raw_data)
last_raw_data = asociety.load_last_added_raw_data()
bronze_data = asociety.parse_bronze_data(last_raw_data)
asociety.unload_data(file_path="../data/bronze/jobs.csv", new_data=bronze_data)

A Society > Response: 200
A Society > Nmr of scraped adds: 95
A Society > Unloading data to ../data/raw/jobs.csv. Nmr of new added jobs:0
A Society > Loading last scraped jobs, nr: 95


SyntaxError: unmatched ')' (<unknown>, line 1)

# Combitech

In [2]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), ".."))) # lägg till training i path

from src.combitech_scraper import CombitechScraper
combitech = CombitechScraper()
response = await combitech.request_status()
job_posts = combitech.return_raw_job_posts_data(response)
raw_data = combitech.parse_raw_data(job_posts)
combitech.unload_data(file_path = "../data/raw/jobs.csv", new_data=raw_data)
last_raw_data = combitech.load_last_added_raw_data()
bronze_data = combitech.parse_bronze_data(last_raw_data)
combitech.unload_data(file_path="../data/bronze/jobs.csv", new_data=bronze_data)


Combitech > Status code: 200
Combitech > Nmr of scraped adds: 24
Combitech > Unloading data to ../data/raw/jobs.csv. Nmr of new added jobs:24
Combitech > Loading last scraped jobs, nr: 24
Combitech > Parsing bronze data: 24
Combitech > Unloading data to ../data/bronze/jobs.csv. Nmr of new added jobs:24


# Emagine

In [3]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), ".."))) # lägg till training i path

from src.emagine_scraper import EmagineScraper
emagine = EmagineScraper()
response = emagine.request_status()
job_posts = emagine.return_raw_job_posts_data(response)
raw_data = emagine.parse_raw_data(job_posts)
emagine.unload_data(file_path = "../data/raw/jobs.csv", new_data=raw_data)
last_raw_data = emagine.load_last_added_raw_data()
bronze_data = emagine.parse_bronze_data(last_raw_data)
emagine.unload_data(file_path="../data/bronze/jobs.csv", new_data=bronze_data)

Emagine > Response: 200
Emagine > Nmr of scraped adds: 46
Emagine > Unloading data to ../data/raw/jobs.csv. Nmr of new added jobs:46
Emagine > Loading last scraped jobs, nr: 46
Emagine > Parsing bronze data: 46
Emagine > Unloading data to ../data/bronze/jobs.csv. Nmr of new added jobs:46


# Ework

In [4]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), ".."))) # lägg till training i path

from src.ework_scraper import EworkScraper
ework = EworkScraper()
response = ework.request_status()
job_posts = ework.return_raw_job_posts_data(response)
raw_data = ework.parse_raw_data(job_posts)
ework.unload_data(file_path = "../data/raw/jobs.csv", new_data=raw_data)
last_raw_data = ework.load_last_added_raw_data()
bronze_data = ework.parse_bronze_data(last_raw_data)
ework.unload_data(file_path="../data/bronze/jobs.csv", new_data=bronze_data)

Ework > Response: 200
Ework > Nmr of scraped adds: 115
Ework > Unloading data to ../data/raw/jobs.csv. Nmr of new added jobs:115
Ework > Loading last scraped jobs, nr: 115
Ework > Parsing bronze data: 115
Ework > Unloading data to ../data/bronze/jobs.csv. Nmr of new added jobs:115


# Levigo 
(Inga uppdrag ute)

# Nikita 
Hemsida som renderar innehållet via JavaScript efter att själva HTML:en har laddats. I detta fall hittar man inte hittar uppdragen i Fetch/XHR i nätverksinspektionen – själva jobblistan hämtas inte via ett öppet API som returnerar JSON, utan genereras dynamiskt i webbläsaren.

In [5]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), ".."))) # lägg till training i path

from src.nikita_scraper import NikitaScraper
nikita = NikitaScraper()
response = await nikita.request_status()
job_posts = nikita.return_raw_job_posts_data(response)
raw_data = nikita.parse_raw_data(job_posts)
nikita.unload_data(file_path = "../data/raw/jobs.csv", new_data=raw_data)
last_raw_data = nikita.load_last_added_raw_data()
bronze_data = nikita.parse_bronze_data(last_raw_data)
nikita.unload_data(file_path="../data/bronze/jobs.csv", new_data=bronze_data)

Nikita > Status code: 200
Nikita > Nmr of scraped adds: 17
Nikita > Unloading data to ../data/raw/jobs.csv. Nmr of new added jobs:17
Nikita > Loading last scraped jobs, nr: 17
Nikita > Parsing bronze data: 17
Nikita > Unloading data to ../data/bronze/jobs.csv. Nmr of new added jobs:17


# Regent

In [6]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), ".."))) # lägg till training i path

from src.regent_scraper import RegentScraper
regent = RegentScraper()
response = await regent.request_status()
job_posts = regent.return_raw_job_posts_data(response)
raw_data = regent.parse_raw_data(job_posts)
regent.unload_data(file_path = "../data/raw/jobs.csv", new_data=raw_data)
last_raw_data = regent.load_last_added_raw_data()
bronze_data = regent.parse_bronze_data(last_raw_data)
regent.unload_data(file_path="../data/bronze/jobs.csv", new_data=bronze_data)


Regent > Status code: 200
Regent > Nmr of scraped adds: 24
Regent > Unloading data to ../data/raw/jobs.csv. Nmr of new added jobs:24
Regent > Loading last scraped jobs, nr: 24
Regent > Parsing bronze data: 24
Regent > Unloading data to ../data/bronze/jobs.csv. Nmr of new added jobs:24


# Updraged 

In [7]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), ".."))) # lägg till training i path

from src.upgraded_scraper import UpgradedScraper
upgraded = UpgradedScraper()
response = await upgraded.request_status()
job_posts = upgraded.return_raw_job_posts_data(response)
raw_data = upgraded.parse_raw_data(job_posts)
upgraded.unload_data(file_path = "../data/raw/jobs.csv", new_data=raw_data)
last_raw_data = upgraded.load_last_added_raw_data()
bronze_data = upgraded.parse_bronze_data(last_raw_data)
upgraded.unload_data(file_path="../data/bronze/jobs.csv", new_data=bronze_data)

Upgraded > Status code: 200
Upgraded > Nmr of scraped adds: 96
Upgraded > Unloading data to ../data/raw/jobs.csv. Nmr of new added jobs:96
Upgraded > Loading last scraped jobs, nr: 96
Upgraded > Parsing bronze data: 96
Upgraded > Unloading data to ../data/bronze/jobs.csv. Nmr of new added jobs:96
