In [5]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import json
import time

In [6]:
# Base URLs
base_url = "https://m.freightbook.net/"
country_url = "https://m.freightbook.net/member/results?country={}"


In [7]:
# Headers to mimic a browser request
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
}


In [8]:
# Function to extract country IDs from the dropdown
def get_country_ids():
    response = requests.get(base_url, headers=headers)
    if response.status_code != 200:
        print("Failed to fetch country list")
        return {}

    soup = BeautifulSoup(response.text, "html.parser")
    country_options = soup.select("select option")  # Extract dropdown options

    country_list = {}
    for option in country_options:
        if option.get("value") and option.text.strip():
            country_list[option.text.strip()] = option["value"]

    return country_list


In [9]:
# Function to scrape companies from a given country
def scrape_country_forwarders(country_name, country_id):
    url = country_url.format(country_id)
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to fetch companies for {country_name}")
        return []

    soup = BeautifulSoup(response.text, "html.parser")
    forwarders = []

    # Locate all company blocks
    company_blocks = soup.find_all("div", class_="company-box")  # Adjust class if needed

    for company in company_blocks:
        try:
            name = company.find("a").text.strip() if company.find("a") else "N/A"
            profile_link = company.find("a")["href"] if company.find("a") else None
            address = company.find("p", class_="address").text.strip() if company.find("p", class_="address") else "N/A"
            country = company.find("p", class_="country").text.strip() if company.find("p", class_="country") else "N/A"

            # If profile link exists, fetch detailed data
            full_details = scrape_company_details(profile_link) if profile_link else {}

            forwarders.append({
                "Company Name": name,
                "Address": address,
                "Country": country,
                "Profile Link": base_url + profile_link if profile_link else "N/A",
                **full_details  # Merge detailed data if available
            })

        except Exception as e:
            print(f"Error extracting company details: {e}")

    return forwarders


In [10]:
# Function to extract details from company profile pages
def scrape_company_details(profile_link):
    url = base_url + profile_link
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to fetch company details for {url}")
        return {}

    soup = BeautifulSoup(response.text, "html.parser")

    details = {}
    try:
        details["Telephone"] = soup.find("p", class_="tel").text.strip() if soup.find("p", class_="tel") else "N/A"
        details["Year Business Started"] = soup.find(text="Year Business Started:").find_next().text.strip() if soup.find(text="Year Business Started:") else "N/A"
        details["Owner"] = soup.find(text="Owner").find_next().text.strip() if soup.find(text="Owner") else "N/A"
        details["Services"] = [s.text.strip() for s in soup.find_all("li")]  # Extracts all listed services
        details["IATA Member"] = soup.find(text="IATA Member").find_next().text.strip() if soup.find(text="IATA Member") else "N/A"

    except Exception as e:
        print(f"Error extracting details: {e}")

    return details


In [11]:
if __name__ == "__main__":
    country_ids = get_country_ids()
    
    all_data = {}
    for country, country_id in country_ids.items():
        print(f"Scraping {country}...")
        country_data = scrape_country_forwarders(country, country_id)
        all_data[country] = country_data
        time.sleep(3)  # Avoid too many requests quickly

    # Save data to JSON file
    output_file = "freightbook_data.json"
    with open(output_file, "w", encoding="utf-8") as json_file:
        json.dump(all_data, json_file, indent=4, ensure_ascii=False)

    print(f"Scraping completed. Data saved in {output_file}")


Scraping Search by country......
Scraping AFGHANISTAN...
Scraping ARGENTINA...
Scraping ARMENIA...
Scraping AUSTRALIA...
Scraping AZERBAIJAN...
Scraping BAHRAIN...
Scraping BANGLADESH...
Scraping BELARUS...
Scraping BELGIUM...
Scraping BENIN...
Scraping BRAZIL...
Scraping BULGARIA...
Scraping CAMBODIA...
Scraping CAMEROON...
Scraping CANADA...
Scraping CHILE...
Scraping CHINA...
Scraping COLOMBIA...
Scraping CONGO, DEMOCRATIC REPUBLIC...
Scraping COSTA RICA...
Scraping COTE D IVOIRE...
Scraping CYPRUS...
Scraping CZECH REPUBLIC...
Scraping DENMARK...
Scraping DOMINICAN REPUBLIC...
Scraping ECUADOR...
Scraping EGYPT...
Scraping EL SALVADOR...
Scraping EQUATORIAL GUINEA...
Scraping ESTONIA...
Scraping FRANCE...
Scraping GEORGIA...
Scraping GERMANY...
Scraping GHANA...
Scraping GIBRALTAR...
Scraping GREECE...
Scraping GUATEMALA...
Scraping HAITI...
Scraping HONDURAS...
Scraping HONG KONG...
Scraping HUNGARY...
Scraping INDIA...
Scraping INDONESIA...
Scraping IRAQ...
Scraping IRELAND...
Sc