In [22]:
from bs4 import BeautifulSoup
import requests
from datetime import *
import pandas as pd
head = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36 Edg/87.0.664.75'}

In [24]:
jobs = []
today = datetime.today().date()
cutoff_date = today - timedelta(days=30)
page_num = 1
stop_pagination = False
while not stop_pagination:
    page_url = f"https://freshersrecruitment.co.in/category/jobs/page/{page_num}/"
    print(f"Fetching: {page_url}") # Checking
    response = requests.get(page_url, headers=head, timeout=10)
    soup = BeautifulSoup(response.content, "html.parser")
    articles = soup.find_all("article")
    if not articles:
        break  
    for article in articles:
        job = {}
        title_tag = article.find("h2", class_="entry-title")
        if title_tag:
            a_tag = title_tag.find("a", href=True)
            if a_tag:
                job["title"] = a_tag.text.strip()
                job["listing_url"] = a_tag["href"]
        time_tag = article.find("time")
        if time_tag:
            job_date = datetime.strptime(time_tag.text.strip(), "%B %d, %Y").date()
            job["posted_date"] = job_date
            if job_date < cutoff_date:
                stop_pagination = True
                break
        if job.get("listing_url"):
            jobs.append(job)
    if stop_pagination:
        break
    page_num += 1
for job in jobs:
    print(job)

Fetching: https://freshersrecruitment.co.in/category/jobs/page/1/
Fetching: https://freshersrecruitment.co.in/category/jobs/page/2/
Fetching: https://freshersrecruitment.co.in/category/jobs/page/3/
Fetching: https://freshersrecruitment.co.in/category/jobs/page/4/
Fetching: https://freshersrecruitment.co.in/category/jobs/page/5/
Fetching: https://freshersrecruitment.co.in/category/jobs/page/6/
Fetching: https://freshersrecruitment.co.in/category/jobs/page/7/
Fetching: https://freshersrecruitment.co.in/category/jobs/page/8/
{'title': 'Sutherland Walk-in Drive 2026 | Hiring for Freshers with Salary Rs 3 LPA', 'listing_url': 'https://freshersrecruitment.co.in/sutherland-walk-in-drive-2026/', 'posted_date': datetime.date(2026, 1, 26)}
{'title': 'Tech Mahindra Walk-in Interview 2026 | Hiring for Freshers | Salary Rs 4.75 LPA', 'listing_url': 'https://freshersrecruitment.co.in/tech-mahindra-walk-in-interview-2026/', 'posted_date': datetime.date(2026, 1, 26)}
{'title': 'Wipro Off Campus Hiring

In [3]:
import requests
from bs4 import BeautifulSoup
job_details = []
for job in jobs:
    job_info = job.copy()
    try:
        response = requests.get(job_info["listing_url"])
        response.raise_for_status()
        job_page = BeautifulSoup(response.content, "html.parser")
        apply_urls = set()
        for a in job_page.find_all("a", href=True):
            anchor_text = a.get_text(strip=True).lower()
            if any(word in anchor_text for word in ("apply here", "click here", "apply now")):
                apply_urls.add(a["href"])
        if apply_urls:
            job_info["apply_urls"] = list(apply_urls)
        for ul in job_page.find_all("ul", class_="wp-block-list"):
            for li in ul.find_all("li"):
                strong = li.find("strong")
                if strong:
                    label = strong.get_text(strip=True).lower()
                    value = li.get_text(strip=True).replace(strong.get_text(strip=True), "").lstrip(": ").strip()
                    if "location" in label:
                        job_info["location"] = value
                    elif "experience" in label:
                        job_info["experience"] = value
        descriptions = []
        for ul in job_page.find_all("ul", class_="wp-block-list"):
            for li in ul.find_all("li"):
                txt = li.get_text(strip=True)
                if txt:
                    descriptions.append(txt)
        for p in job_page.find_all("p"):
            txt = p.get_text(strip=True)
            if txt.startswith(("•", "-", "–", "*")):
                descriptions.append(txt.lstrip("•-–* ").strip())
        if descriptions:
            job_info["description"] = " ".join(dict.fromkeys(descriptions))
        job_details.append(job_info)
    except Exception as e:
        print(f"Error: {job_info['listing_url']} → {e}")

In [4]:
cleaned_jobs = None
BLOCKED_KEYWORDS = {'telegram', 'freshersrecruitment', 'whatsapp'}
for job in job_details:
    urls = job.get('apply_urls', [])
    cleaned_urls = {url for url in urls if not any(block in url.lower() for block in BLOCKED_KEYWORDS)}
    job['apply_urls'] = list(cleaned_urls)
normalized_jobs = []
for job in job_details:
    urls = job.get('apply_urls', [])
    if urls:
        for url in urls:
            new_job = job.copy()
            new_job['apply_url'] = url
            new_job.pop('apply_urls', None)
            normalized_jobs.append(new_job)
    else:
        new_job = job.copy()
        new_job['apply_url'] = None
        new_job.pop('apply_urls', None)
        normalized_jobs.append(new_job)
cleaned_jobs = normalized_jobs

In [5]:
import pandas as pd
df = pd.DataFrame(cleaned_jobs)
# data.to_csv('jobs1.csv', index=False)

In [6]:
# df1 = pd.read_csv('jobs.csv')
# df2 = pd.read_csv('jobs1.csv')
# df = pd.concat([df1,df2])
df.location = df.location.fillna('Across India')
df.experience = df.experience.fillna('Freshers')
df.description = df.description.fillna('No detailed description available. Please visit the apply link for more information.')
df['apply_url'] = df['apply_url'].fillna(df['listing_url'])
df.head()

Unnamed: 0,title,listing_url,posted_date,experience,location,description,apply_url
0,Sutherland Walk-in Drive 2026 | Hiring for Fre...,https://freshersrecruitment.co.in/sutherland-w...,2026-01-26,0 – 2 Years,Chennai,Job Role:Technical Voice Support Category:Walk...,https://www.naukri.com/job-listings-technical-...
1,Tech Mahindra Walk-in Interview 2026 | Hiring ...,https://freshersrecruitment.co.in/tech-mahindr...,2026-01-26,0 – 3 Years,Noida,Job Role:International Voice & Chat Process Qu...,https://www.naukri.com/job-listings-wak-in-dri...
2,Wipro Off Campus Hiring 2026 | Recruitment for...,https://freshersrecruitment.co.in/wipro-off-ca...,2026-01-25,Freshers,Pan India,Company Website:www.wipro.com Job Role:School ...,https://app.joinsuperset.com/join/#/signup/stu...
3,American Express Off Campus Drive 2026 | Mass ...,https://freshersrecruitment.co.in/american-exp...,2026-01-25,Freshers,Bangalore,Job Role:Analyst – Data Science Job Category:I...,https://aexp.eightfold.ai/careers/job/39656450...
4,L&T Off Campus Drive 2026 | Hiring M.Tech Fres...,https://freshersrecruitment.co.in/lt-off-campu...,2026-01-24,Freshers,Across India,Company:Larsen & Toubro Job Role:Fresher Job T...,https://app.joinsuperset.com/join/#/signup/stu...


In [20]:
from sqlalchemy import create_engine
import pandas as pd

DB_URL = "postgresql+psycopg2://postgres:Sunny$123@localhost:35432/pgvector"
engine = create_engine(DB_URL)

In [21]:
df.to_sql(
    name="jobs",
    con=engine,
    if_exists="append",
    index=False,
    method="multi",
    chunksize=500
)

76