## NOTE:
- Ensure all required Python libraries are installed before running the script.
- Required libraries include: requests, pandas, beautifulsoup4.
- You can install them via pip if needed:
- pip install requests pandas beautifulsoup4
- You can run the script as-is if all libraries are installed correctly.
- Before running, verify the **years** and **countries** for which you want to fetch holiday data.
- In the code, there is a list called **country_codes = ['australia']**.
- You can add or remove countries in this list based on your requirements.
- The year range is controlled by the loop:
- **for year in range(2024, 2041):**
- You can adjust the start and end years here.
- For example, to get holiday data up to 2040, set the range to range(2024, 2041) — note the end year is exclusive, so you add 1 to include 2040.
- The output CSV files should have a **consistent naming format**, e.g., Holidays_.
- This consistency helps later when consolidating all files into a single file containing data from all countries.
- In the workflow, different code cells have different countries listed, but the code structure remains the same.
- This approach is designed so that scraping runs faster and avoids overloading the server.
- Ensure you have a stable internet connection during execution as the script fetches live data.
- The script includes a delay (time.sleep(2)) between requests to be respectful of the website and avoid rate limiting.
- Before consloidating the file, make sure all excel files are in one folder

# List of Countries 

* Australia  
* Bangladesh  
* Brazil  
* Bulgaria  
* Canada  
* China  
* Czech  
* Denmark  
* Finland  
* France  
* Germany  
* Greece  
* Hong-Kong  
* Hungary  
* India  
* Indonesia  
* Israel  
* Italy  
* Japan  
* Kenya  
* North-Korea  
* South-Korea  
* Malaysia  
* Mongolia  
* Morocco  
* Netherlands  
* Norway  
* Pakistan  
* Philippines  
* Poland  
* Portugal  
* Romania  
* Russia  
* Slovenia  
* South-Africa  
* Spain  
* Sri-Lanka  
* Sweden  
* Taiwan  
* Thailand  
* Turkey  
* Vietnam  
* Ukraine  
* UK  
* New-Zealand  
* Belgium  
* Luxembourg  
* Chile  
* Paraguay  
* Slovakia  
* Ireland  
* Iceland  
* US


In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from datetime import datetime
import glob

'australia',
'bangladesh',
'brazil',
'bulgaria',
'canada'

In [2]:
def fetch_holidays(country, year):
    # Fetches holiday data for a given country and year in desired format
    # url = f"https://www.timeanddate.com/holidays/{country.lower()}/{year}"
    url = f"https://www.timeanddate.com/holidays/{country.lower()}/{year}?hl=en"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US,en;q=0.9',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Referer': 'https://www.google.com/',
        'DNT': '1',
        'Connection': 'keep-alive'
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        if response.status_code != 200:
            print(f"Failed to fetch {country} {year}: {response.status_code}")
            return pd.DataFrame()

        soup = BeautifulSoup(response.text, 'html.parser')
        holiday_table = soup.find('table', {'id': 'holidays-table'})

        if not holiday_table:
            print(f"No holiday table found for {country} {year}")
            return pd.DataFrame()

        holidays = []

        for row in holiday_table.find_all('tr')[1:]:
            date_cell = row.find('th')
            cells = row.find_all('td')

            if date_cell and len(cells) >= 2:
                date_text = date_cell.get_text(strip=True)
                day_of_week = cells[0].get_text(strip=True)
                name = cells[1].get_text(strip=True)
                type_ = cells[2].get_text(strip=True) if len(cells) > 2 else ""

                if name:
                    holidays.append({
                        'Date': f"{date_text} {year}",  # Append year to date
                        'Day': day_of_week,
                        'Name': name,
                        'Type': type_
                    })

        return pd.DataFrame(holidays)

    except requests.RequestException as e:
        print(f"Error fetching holidays for {country} {year}: {e}")
        return pd.DataFrame()

def scrape_holidays():
    # Scrapes and combines holidays for all selected countries.
    all_holidays = pd.DataFrame()
    country_codes = ['australia', 'bangladesh', 'brazil', 'bulgaria', 'canada']

    for country in country_codes:
        for year in range(2021, 2041):
            print(f"Fetching holidays for {country} in {year}...")
            df = fetch_holidays(country, year)

            if not df.empty:
                df['Country'] = country  # Add country name as a new column
                df['Year'] = year    
                all_holidays = pd.concat([all_holidays, df], ignore_index=True)

            time.sleep(2)  # Delay between requests

    return all_holidays[['Date', 'Day', 'Name', 'Type', 'Country']]

# Run scraper and save output
if __name__ == "__main__":
    holiday_data = scrape_holidays()

    # Save to CSV
    holiday_data.to_csv("holidays_1.csv", index=False, encoding="utf-8-sig")

    # Display output
    print(holiday_data.head(15))  # Show top 15 rows as preview

Fetching holidays for australia in 2021...
Fetching holidays for australia in 2022...
Fetching holidays for australia in 2023...
Fetching holidays for australia in 2024...
Fetching holidays for australia in 2025...
Fetching holidays for australia in 2026...
Fetching holidays for australia in 2027...
Fetching holidays for australia in 2028...
Fetching holidays for australia in 2029...
Fetching holidays for australia in 2030...
Fetching holidays for australia in 2031...
Fetching holidays for australia in 2032...
Fetching holidays for australia in 2033...
Fetching holidays for australia in 2034...
Fetching holidays for australia in 2035...
Fetching holidays for australia in 2036...
Fetching holidays for australia in 2037...
Fetching holidays for australia in 2038...
Fetching holidays for australia in 2039...
Fetching holidays for australia in 2040...
Fetching holidays for bangladesh in 2021...
Fetching holidays for bangladesh in 2022...
Fetching holidays for bangladesh in 2023...
Fetching

'china',
'czech',
'denmark',
'finland',
'france'

In [3]:
def fetch_holidays(country, year):
    # Fetches holiday data for a given country and year in desired format
    url = f"https://www.timeanddate.com/holidays/{country.lower()}/{year}?hl=en"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US,en;q=0.9',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Referer': 'https://www.google.com/',
        'DNT': '1',
        'Connection': 'keep-alive'
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        if response.status_code != 200:
            print(f"Failed to fetch {country} {year}: {response.status_code}")
            return pd.DataFrame()

        soup = BeautifulSoup(response.text, 'html.parser')
        holiday_table = soup.find('table', {'id': 'holidays-table'})

        if not holiday_table:
            print(f"No holiday table found for {country} {year}")
            return pd.DataFrame()

        holidays = []

        for row in holiday_table.find_all('tr')[1:]:
            date_cell = row.find('th')
            cells = row.find_all('td')

            if date_cell and len(cells) >= 2:
                date_text = date_cell.get_text(strip=True)
                day_of_week = cells[0].get_text(strip=True)
                name = cells[1].get_text(strip=True)
                type_ = cells[2].get_text(strip=True) if len(cells) > 2 else ""

                if name:
                    holidays.append({
                        'Date': f"{date_text} {year}",
                        'Day': day_of_week,
                        'Name': name,
                        'Type': type_
                    })

        return pd.DataFrame(holidays)

    except requests.RequestException as e:
        print(f"Error fetching holidays for {country} {year}: {e}")
        return pd.DataFrame()

def scrape_holidays():
    # Scrapes and combines holidays for all selected countries.
    all_holidays = pd.DataFrame()
    country_codes = ['china', 'czech', 'denmark', 'finland', 'france']

    for country in country_codes:
        for year in range(2021, 2041):
            print(f"Fetching holidays for {country} in {year}...")
            df = fetch_holidays(country, year)

            if not df.empty:
                df['Country'] = country
                df['Year'] = year    
                all_holidays = pd.concat([all_holidays, df], ignore_index=True)

            time.sleep(2)  # Delay between requests

    return all_holidays[['Date', 'Day', 'Name', 'Type', 'Country']]

# Run scraper and save output
if __name__ == "__main__":
    holiday_data = scrape_holidays()

    # Save to CSV
    holiday_data.to_csv("holidays_2.csv", index=False, encoding="utf-8-sig")

    # Display output
    print(holiday_data.head(15))

Fetching holidays for china in 2021...
Fetching holidays for china in 2022...
Fetching holidays for china in 2023...
Fetching holidays for china in 2024...
Fetching holidays for china in 2025...
Fetching holidays for china in 2026...
Fetching holidays for china in 2027...
Fetching holidays for china in 2028...
Fetching holidays for china in 2029...
Fetching holidays for china in 2030...
Fetching holidays for china in 2031...
Fetching holidays for china in 2032...
Fetching holidays for china in 2033...
Fetching holidays for china in 2034...
Fetching holidays for china in 2035...
Fetching holidays for china in 2036...
Fetching holidays for china in 2037...
Fetching holidays for china in 2038...
Fetching holidays for china in 2039...
Fetching holidays for china in 2040...
Fetching holidays for czech in 2021...
Fetching holidays for czech in 2022...
Fetching holidays for czech in 2023...
Fetching holidays for czech in 2024...
Fetching holidays for czech in 2025...
Fetching holidays for cze

'germany',
'greece',
'hong-kong',
'hungary',
'india'

In [4]:
def fetch_holidays(country, year):
    # Fetches holiday data for a given country and year in desired format
    url = f"https://www.timeanddate.com/holidays/{country.lower()}/{year}?hl=en"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US,en;q=0.9',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Referer': 'https://www.google.com/',
        'DNT': '1',
        'Connection': 'keep-alive'
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        if response.status_code != 200:
            print(f"Failed to fetch {country} {year}: {response.status_code}")
            return pd.DataFrame()

        soup = BeautifulSoup(response.text, 'html.parser')
        holiday_table = soup.find('table', {'id': 'holidays-table'})

        if not holiday_table:
            print(f"No holiday table found for {country} {year}")
            return pd.DataFrame()

        holidays = []

        for row in holiday_table.find_all('tr')[1:]:
            date_cell = row.find('th')
            cells = row.find_all('td')

            if date_cell and len(cells) >= 2:
                date_text = date_cell.get_text(strip=True)
                day_of_week = cells[0].get_text(strip=True)
                name = cells[1].get_text(strip=True)
                type_ = cells[2].get_text(strip=True) if len(cells) > 2 else ""

                if name:
                    holidays.append({
                        'Date': f"{date_text} {year}",
                        'Day': day_of_week,
                        'Name': name,
                        'Type': type_
                    })

        return pd.DataFrame(holidays)

    except requests.RequestException as e:
        print(f"Error fetching holidays for {country} {year}: {e}")
        return pd.DataFrame()

def scrape_holidays():
    # Scrapes and combines holidays for all selected countries.
    all_holidays = pd.DataFrame()
    country_codes = ['germany', 'greece', 'hong-kong', 'hungary', 'india']

    for country in country_codes:
        for year in range(2021, 2041):
            print(f"Fetching holidays for {country} in {year}...")
            df = fetch_holidays(country, year)

            if not df.empty:
                df['Country'] = country
                df['Year'] = year    
                all_holidays = pd.concat([all_holidays, df], ignore_index=True)

            time.sleep(2)  # Delay between requests

    return all_holidays[['Date', 'Day', 'Name', 'Type', 'Country']]

# Run scraper and save output
if __name__ == "__main__":
    holiday_data = scrape_holidays()

    # Save to CSV
    holiday_data.to_csv("holidays_3.csv", index=False, encoding="utf-8-sig")

    # Display output
    print(holiday_data.head(15))

Fetching holidays for germany in 2021...
Fetching holidays for germany in 2022...
Fetching holidays for germany in 2023...
Fetching holidays for germany in 2024...
Fetching holidays for germany in 2025...
Fetching holidays for germany in 2026...
Fetching holidays for germany in 2027...
Fetching holidays for germany in 2028...
Fetching holidays for germany in 2029...
Fetching holidays for germany in 2030...
Fetching holidays for germany in 2031...
Fetching holidays for germany in 2032...
Fetching holidays for germany in 2033...
Fetching holidays for germany in 2034...
Fetching holidays for germany in 2035...
Fetching holidays for germany in 2036...
Fetching holidays for germany in 2037...
Fetching holidays for germany in 2038...
Fetching holidays for germany in 2039...
Fetching holidays for germany in 2040...
Fetching holidays for greece in 2021...
Fetching holidays for greece in 2022...
Fetching holidays for greece in 2023...
Fetching holidays for greece in 2024...
Fetching holidays fo

'indonesia',
'israel',
'italy',
'japan',
'kenya'

In [5]:
def fetch_holidays(country, year):
    # Fetches holiday data for a given country and year in desired format
    url = f"https://www.timeanddate.com/holidays/{country.lower()}/{year}?hl=en"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US,en;q=0.9',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Referer': 'https://www.google.com/',
        'DNT': '1',
        'Connection': 'keep-alive'
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        if response.status_code != 200:
            print(f"Failed to fetch {country} {year}: {response.status_code}")
            return pd.DataFrame()

        soup = BeautifulSoup(response.text, 'html.parser')
        holiday_table = soup.find('table', {'id': 'holidays-table'})

        if not holiday_table:
            print(f"No holiday table found for {country} {year}")
            return pd.DataFrame()

        holidays = []

        for row in holiday_table.find_all('tr')[1:]:
            date_cell = row.find('th')
            cells = row.find_all('td')

            if date_cell and len(cells) >= 2:
                date_text = date_cell.get_text(strip=True)
                day_of_week = cells[0].get_text(strip=True)
                name = cells[1].get_text(strip=True)
                type_ = cells[2].get_text(strip=True) if len(cells) > 2 else ""

                if name:
                    holidays.append({
                        'Date': f"{date_text} {year}",
                        'Day': day_of_week,
                        'Name': name,
                        'Type': type_
                    })

        return pd.DataFrame(holidays)

    except requests.RequestException as e:
        print(f"Error fetching holidays for {country} {year}: {e}")
        return pd.DataFrame()

def scrape_holidays():
    # Scrapes and combines holidays for all selected countries.
    all_holidays = pd.DataFrame()
    country_codes = ['indonesia', 'israel', 'italy', 'japan', 'kenya']

    for country in country_codes:
        for year in range(2021, 2041):
            print(f"Fetching holidays for {country} in {year}...")
            df = fetch_holidays(country, year)

            if not df.empty:
                df['Country'] = country
                df['Year'] = year    
                all_holidays = pd.concat([all_holidays, df], ignore_index=True)

            time.sleep(2)  # Delay between requests

    return all_holidays[['Date', 'Day', 'Name', 'Type', 'Country']]

# Run scraper and save output
if __name__ == "__main__":
    holiday_data = scrape_holidays()

    # Save to CSV
    holiday_data.to_csv("holidays_4.csv", index=False, encoding="utf-8-sig")

    # Display output
    print(holiday_data.head(15))

Fetching holidays for indonesia in 2021...
Fetching holidays for indonesia in 2022...
Fetching holidays for indonesia in 2023...
Fetching holidays for indonesia in 2024...
Fetching holidays for indonesia in 2025...
Fetching holidays for indonesia in 2026...
No holiday table found for indonesia 2026
Fetching holidays for indonesia in 2027...
No holiday table found for indonesia 2027
Fetching holidays for indonesia in 2028...
No holiday table found for indonesia 2028
Fetching holidays for indonesia in 2029...
No holiday table found for indonesia 2029
Fetching holidays for indonesia in 2030...
No holiday table found for indonesia 2030
Fetching holidays for indonesia in 2031...
No holiday table found for indonesia 2031
Fetching holidays for indonesia in 2032...
No holiday table found for indonesia 2032
Fetching holidays for indonesia in 2033...
No holiday table found for indonesia 2033
Fetching holidays for indonesia in 2034...
No holiday table found for indonesia 2034
Fetching holidays fo

north-korea',
south-korea',
'malaysia',
'mongolia',
'morocco'

In [8]:
def fetch_holidays(country, year):
    # Fetches holiday data for a given country and year in desired format
    url = f"https://www.timeanddate.com/holidays/{country.lower()}/{year}?hl=en"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US,en;q=0.9',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Referer': 'https://www.google.com/',
        'DNT': '1',
        'Connection': 'keep-alive'
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        if response.status_code != 200:
            print(f"Failed to fetch {country} {year}: {response.status_code}")
            return pd.DataFrame()

        soup = BeautifulSoup(response.text, 'html.parser')
        holiday_table = soup.find('table', {'id': 'holidays-table'})

        if not holiday_table:
            print(f"No holiday table found for {country} {year}")
            return pd.DataFrame()

        holidays = []

        for row in holiday_table.find_all('tr')[1:]:
            date_cell = row.find('th')
            cells = row.find_all('td')

            if date_cell and len(cells) >= 2:
                date_text = date_cell.get_text(strip=True)
                day_of_week = cells[0].get_text(strip=True)
                name = cells[1].get_text(strip=True)
                type_ = cells[2].get_text(strip=True) if len(cells) > 2 else ""

                if name:
                    holidays.append({
                        'Date': f"{date_text} {year}",
                        'Day': day_of_week,
                        'Name': name,
                        'Type': type_
                    })

        return pd.DataFrame(holidays)

    except requests.RequestException as e:
        print(f"Error fetching holidays for {country} {year}: {e}")
        return pd.DataFrame()

def scrape_holidays():
    # Scrapes and combines holidays for all selected countries.
    all_holidays = pd.DataFrame()
    country_codes = ['north-korea', 'south-korea', 'malaysia', 'mongolia', 'morocco']

    for country in country_codes:
        for year in range(2021, 2041):
            print(f"Fetching holidays for {country} in {year}...")
            df = fetch_holidays(country, year)

            if not df.empty:
                df['Country'] = country
                df['Year'] = year    
                all_holidays = pd.concat([all_holidays, df], ignore_index=True)

            time.sleep(2)  # Delay between requests

    return all_holidays[['Date', 'Day', 'Name', 'Type', 'Country']]

# Run scraper and save output
if __name__ == "__main__":
    holiday_data = scrape_holidays()

    # Save to CSV
    holiday_data.to_csv("holidays_5.csv", index=False, encoding="utf-8-sig")

    # Display output
    print(holiday_data.head(15))

Fetching holidays for north-korea in 2021...
Fetching holidays for north-korea in 2022...
Fetching holidays for north-korea in 2023...
Fetching holidays for north-korea in 2024...
Fetching holidays for north-korea in 2025...
Fetching holidays for north-korea in 2026...
Fetching holidays for north-korea in 2027...
Fetching holidays for north-korea in 2028...
Fetching holidays for north-korea in 2029...
Fetching holidays for north-korea in 2030...
Fetching holidays for north-korea in 2031...
Fetching holidays for north-korea in 2032...
Fetching holidays for north-korea in 2033...
Fetching holidays for north-korea in 2034...
Fetching holidays for north-korea in 2035...
Fetching holidays for north-korea in 2036...
Fetching holidays for north-korea in 2037...
Fetching holidays for north-korea in 2038...
Fetching holidays for north-korea in 2039...
Fetching holidays for north-korea in 2040...
Fetching holidays for south-korea in 2021...
Fetching holidays for south-korea in 2022...
Fetching h

'netherlands',
'norway',
'pakistan',
'philippines',
'poland'

In [9]:
def fetch_holidays(country, year):
    # Fetches holiday data for a given country and year in desired format
    url = f"https://www.timeanddate.com/holidays/{country.lower()}/{year}?hl=en"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US,en;q=0.9',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Referer': 'https://www.google.com/',
        'DNT': '1',
        'Connection': 'keep-alive'
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        if response.status_code != 200:
            print(f"Failed to fetch {country} {year}: {response.status_code}")
            return pd.DataFrame()

        soup = BeautifulSoup(response.text, 'html.parser')
        holiday_table = soup.find('table', {'id': 'holidays-table'})

        if not holiday_table:
            print(f"No holiday table found for {country} {year}")
            return pd.DataFrame()

        holidays = []

        for row in holiday_table.find_all('tr')[1:]:
            date_cell = row.find('th')
            cells = row.find_all('td')

            if date_cell and len(cells) >= 2:
                date_text = date_cell.get_text(strip=True)
                day_of_week = cells[0].get_text(strip=True)
                name = cells[1].get_text(strip=True)
                type_ = cells[2].get_text(strip=True) if len(cells) > 2 else ""

                if name:
                    holidays.append({
                        'Date': f"{date_text} {year}",
                        'Day': day_of_week,
                        'Name': name,
                        'Type': type_
                    })

        return pd.DataFrame(holidays)

    except requests.RequestException as e:
        print(f"Error fetching holidays for {country} {year}: {e}")
        return pd.DataFrame()

def scrape_holidays():
    # Scrapes and combines holidays for all selected countries.
    all_holidays = pd.DataFrame()
    country_codes = ['netherlands', 'norway', 'pakistan', 'philippines', 'poland']

    for country in country_codes:
        for year in range(2021, 2041):
            print(f"Fetching holidays for {country} in {year}...")
            df = fetch_holidays(country, year)

            if not df.empty:
                df['Country'] = country
                df['Year'] = year    
                all_holidays = pd.concat([all_holidays, df], ignore_index=True)

            time.sleep(2)  # Delay between requests

    return all_holidays[['Date', 'Day', 'Name', 'Type', 'Country']]

# Run scraper and save output
if __name__ == "__main__":
    holiday_data = scrape_holidays()

    # Save to CSV
    holiday_data.to_csv("holidays_6.csv", index=False, encoding="utf-8-sig")

    # Display output
    print(holiday_data.head(15))

Fetching holidays for netherlands in 2021...
Fetching holidays for netherlands in 2022...
Fetching holidays for netherlands in 2023...
Fetching holidays for netherlands in 2024...
Fetching holidays for netherlands in 2025...
Fetching holidays for netherlands in 2026...
Fetching holidays for netherlands in 2027...
Fetching holidays for netherlands in 2028...
Fetching holidays for netherlands in 2029...
Fetching holidays for netherlands in 2030...
Fetching holidays for netherlands in 2031...
Fetching holidays for netherlands in 2032...
Fetching holidays for netherlands in 2033...
Fetching holidays for netherlands in 2034...
Fetching holidays for netherlands in 2035...
Fetching holidays for netherlands in 2036...
Fetching holidays for netherlands in 2037...
Fetching holidays for netherlands in 2038...
Fetching holidays for netherlands in 2039...
Fetching holidays for netherlands in 2040...
Fetching holidays for norway in 2021...
Fetching holidays for norway in 2022...
Fetching holidays fo

'portugal',
'romania',
'russia',
'slovenia',
'south-africa'

In [10]:
def fetch_holidays(country, year):
    # Fetches holiday data for a given country and year in desired format
    url = f"https://www.timeanddate.com/holidays/{country.lower()}/{year}?hl=en"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US,en;q=0.9',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Referer': 'https://www.google.com/',
        'DNT': '1',
        'Connection': 'keep-alive'
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        if response.status_code != 200:
            print(f"Failed to fetch {country} {year}: {response.status_code}")
            return pd.DataFrame()

        soup = BeautifulSoup(response.text, 'html.parser')
        holiday_table = soup.find('table', {'id': 'holidays-table'})

        if not holiday_table:
            print(f"No holiday table found for {country} {year}")
            return pd.DataFrame()

        holidays = []

        for row in holiday_table.find_all('tr')[1:]:
            date_cell = row.find('th')
            cells = row.find_all('td')

            if date_cell and len(cells) >= 2:
                date_text = date_cell.get_text(strip=True)
                day_of_week = cells[0].get_text(strip=True)
                name = cells[1].get_text(strip=True)
                type_ = cells[2].get_text(strip=True) if len(cells) > 2 else ""

                if name:
                    holidays.append({
                        'Date': f"{date_text} {year}",
                        'Day': day_of_week,
                        'Name': name,
                        'Type': type_
                    })

        return pd.DataFrame(holidays)

    except requests.RequestException as e:
        print(f"Error fetching holidays for {country} {year}: {e}")
        return pd.DataFrame()

def scrape_holidays():
    # Scrapes and combines holidays for all selected countries.
    all_holidays = pd.DataFrame()
    country_codes = ['portugal', 'romania', 'russia', 'slovenia', 'south-africa']

    for country in country_codes:
        for year in range(2021, 2041):
            print(f"Fetching holidays for {country} in {year}...")
            df = fetch_holidays(country, year)

            if not df.empty:
                df['Country'] = country
                df['Year'] = year    
                all_holidays = pd.concat([all_holidays, df], ignore_index=True)

            time.sleep(2)  # Delay between requests

    return all_holidays[['Date', 'Day', 'Name', 'Type', 'Country']]

# Run scraper and save output
if __name__ == "__main__":
    holiday_data = scrape_holidays()

    # Save to CSV
    holiday_data.to_csv("holidays_7.csv", index=False, encoding="utf-8-sig")

    # Display output
    print(holiday_data.head(15))

Fetching holidays for portugal in 2021...
Fetching holidays for portugal in 2022...
Fetching holidays for portugal in 2023...
Fetching holidays for portugal in 2024...
Fetching holidays for portugal in 2025...
Fetching holidays for portugal in 2026...
Fetching holidays for portugal in 2027...
Fetching holidays for portugal in 2028...
Fetching holidays for portugal in 2029...
Fetching holidays for portugal in 2030...
Fetching holidays for portugal in 2031...
Fetching holidays for portugal in 2032...
Fetching holidays for portugal in 2033...
Fetching holidays for portugal in 2034...
Fetching holidays for portugal in 2035...
Fetching holidays for portugal in 2036...
Fetching holidays for portugal in 2037...
Fetching holidays for portugal in 2038...
Fetching holidays for portugal in 2039...
Fetching holidays for portugal in 2040...
Fetching holidays for romania in 2021...
Fetching holidays for romania in 2022...
Fetching holidays for romania in 2023...
Fetching holidays for romania in 2024

'spain',
'sri-lanka',
'sweden',
'taiwan',
'thailand'

In [11]:
def fetch_holidays(country, year):
    # Fetches holiday data for a given country and year in desired format
    url = f"https://www.timeanddate.com/holidays/{country.lower()}/{year}?hl=en"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US,en;q=0.9',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Referer': 'https://www.google.com/',
        'DNT': '1',
        'Connection': 'keep-alive'
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        if response.status_code != 200:
            print(f"Failed to fetch {country} {year}: {response.status_code}")
            return pd.DataFrame()

        soup = BeautifulSoup(response.text, 'html.parser')
        holiday_table = soup.find('table', {'id': 'holidays-table'})

        if not holiday_table:
            print(f"No holiday table found for {country} {year}")
            return pd.DataFrame()

        holidays = []

        for row in holiday_table.find_all('tr')[1:]:
            date_cell = row.find('th')
            cells = row.find_all('td')

            if date_cell and len(cells) >= 2:
                date_text = date_cell.get_text(strip=True)
                day_of_week = cells[0].get_text(strip=True)
                name = cells[1].get_text(strip=True)
                type_ = cells[2].get_text(strip=True) if len(cells) > 2 else ""

                if name:
                    holidays.append({
                        'Date': f"{date_text} {year}",
                        'Day': day_of_week,
                        'Name': name,
                        'Type': type_
                    })

        return pd.DataFrame(holidays)

    except requests.RequestException as e:
        print(f"Error fetching holidays for {country} {year}: {e}")
        return pd.DataFrame()

def scrape_holidays():
    # Scrapes and combines holidays for all selected countries.
    all_holidays = pd.DataFrame()
    country_codes = ['spain', 'sri-lanka', 'sweden', 'taiwan', 'thailand']

    for country in country_codes:
        for year in range(2021, 2041):
            print(f"Fetching holidays for {country} in {year}...")
            df = fetch_holidays(country, year)

            if not df.empty:
                df['Country'] = country
                df['Year'] = year    
                all_holidays = pd.concat([all_holidays, df], ignore_index=True)

            time.sleep(2)  # Delay between requests

    return all_holidays[['Date', 'Day', 'Name', 'Type', 'Country']]

# Run scraper and save output
if __name__ == "__main__":
    holiday_data = scrape_holidays()

    # Save to CSV
    holiday_data.to_csv("holidays_8.csv", index=False, encoding="utf-8-sig")

    # Display output
    print(holiday_data.head(15))

Fetching holidays for spain in 2021...
Fetching holidays for spain in 2022...
Fetching holidays for spain in 2023...
Fetching holidays for spain in 2024...
Fetching holidays for spain in 2025...
Fetching holidays for spain in 2026...
Fetching holidays for spain in 2027...
Fetching holidays for spain in 2028...
Fetching holidays for spain in 2029...
Fetching holidays for spain in 2030...
Fetching holidays for spain in 2031...
Fetching holidays for spain in 2032...
Fetching holidays for spain in 2033...
Fetching holidays for spain in 2034...
Fetching holidays for spain in 2035...
Fetching holidays for spain in 2036...
Fetching holidays for spain in 2037...
Fetching holidays for spain in 2038...
Fetching holidays for spain in 2039...
Fetching holidays for spain in 2040...
Fetching holidays for sri-lanka in 2021...
Fetching holidays for sri-lanka in 2022...
Fetching holidays for sri-lanka in 2023...
Fetching holidays for sri-lanka in 2024...
Fetching holidays for sri-lanka in 2025...
Fetch

'turkey',
'vietnam',
'ukraine',
'uk',
'new-zealand'

In [12]:
def fetch_holidays(country, year):
    # Fetches holiday data for a given country and year in desired format
    url = f"https://www.timeanddate.com/holidays/{country.lower()}/{year}?hl=en"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US,en;q=0.9',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Referer': 'https://www.google.com/',
        'DNT': '1',
        'Connection': 'keep-alive'
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        if response.status_code != 200:
            print(f"Failed to fetch {country} {year}: {response.status_code}")
            return pd.DataFrame()

        soup = BeautifulSoup(response.text, 'html.parser')
        holiday_table = soup.find('table', {'id': 'holidays-table'})

        if not holiday_table:
            print(f"No holiday table found for {country} {year}")
            return pd.DataFrame()

        holidays = []

        for row in holiday_table.find_all('tr')[1:]:
            date_cell = row.find('th')
            cells = row.find_all('td')

            if date_cell and len(cells) >= 2:
                date_text = date_cell.get_text(strip=True)
                day_of_week = cells[0].get_text(strip=True)
                name = cells[1].get_text(strip=True)
                type_ = cells[2].get_text(strip=True) if len(cells) > 2 else ""

                if name:
                    holidays.append({
                        'Date': f"{date_text} {year}",
                        'Day': day_of_week,
                        'Name': name,
                        'Type': type_
                    })

        return pd.DataFrame(holidays)

    except requests.RequestException as e:
        print(f"Error fetching holidays for {country} {year}: {e}")
        return pd.DataFrame()

def scrape_holidays():
    # Scrapes and combines holidays for all selected countries.
    all_holidays = pd.DataFrame()
    country_codes = ['turkey', 'vietnam', 'ukraine', 'uk', 'new-zealand']

    for country in country_codes:
        for year in range(2021, 2041):
            print(f"Fetching holidays for {country} in {year}...")
            df = fetch_holidays(country, year)

            if not df.empty:
                df['Country'] = country
                df['Year'] = year    
                all_holidays = pd.concat([all_holidays, df], ignore_index=True)

            time.sleep(2)  # Delay between requests

    return all_holidays[['Date', 'Day', 'Name', 'Type', 'Country']]

# Run scraper and save output
if __name__ == "__main__":
    holiday_data = scrape_holidays()

    # Save to CSV
    holiday_data.to_csv("holidays_9.csv", index=False, encoding="utf-8-sig")

    # Display output
    print(holiday_data.head(15))

Fetching holidays for turkey in 2021...
Fetching holidays for turkey in 2022...
Fetching holidays for turkey in 2023...
Fetching holidays for turkey in 2024...
Fetching holidays for turkey in 2025...
Fetching holidays for turkey in 2026...
Fetching holidays for turkey in 2027...
Fetching holidays for turkey in 2028...
Fetching holidays for turkey in 2029...
Fetching holidays for turkey in 2030...
Fetching holidays for turkey in 2031...
Fetching holidays for turkey in 2032...
Fetching holidays for turkey in 2033...
Fetching holidays for turkey in 2034...
Fetching holidays for turkey in 2035...
Fetching holidays for turkey in 2036...
Fetching holidays for turkey in 2037...
Fetching holidays for turkey in 2038...
Fetching holidays for turkey in 2039...
Fetching holidays for turkey in 2040...
Fetching holidays for vietnam in 2021...
Fetching holidays for vietnam in 2022...
Fetching holidays for vietnam in 2023...
Fetching holidays for vietnam in 2024...
Fetching holidays for vietnam in 202

'belgium',
'luxembourg',
'chile',
'paraguay'

In [13]:
def fetch_holidays(country, year):
    # Fetches holiday data for a given country and year in desired format
    url = f"https://www.timeanddate.com/holidays/{country.lower()}/{year}?hl=en"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US,en;q=0.9',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Referer': 'https://www.google.com/',
        'DNT': '1',
        'Connection': 'keep-alive'
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        if response.status_code != 200:
            print(f"Failed to fetch {country} {year}: {response.status_code}")
            return pd.DataFrame()

        soup = BeautifulSoup(response.text, 'html.parser')
        holiday_table = soup.find('table', {'id': 'holidays-table'})

        if not holiday_table:
            print(f"No holiday table found for {country} {year}")
            return pd.DataFrame()

        holidays = []

        for row in holiday_table.find_all('tr')[1:]:
            date_cell = row.find('th')
            cells = row.find_all('td')

            if date_cell and len(cells) >= 2:
                date_text = date_cell.get_text(strip=True)
                day_of_week = cells[0].get_text(strip=True)
                name = cells[1].get_text(strip=True)
                type_ = cells[2].get_text(strip=True) if len(cells) > 2 else ""

                if name:
                    holidays.append({
                        'Date': f"{date_text} {year}",
                        'Day': day_of_week,
                        'Name': name,
                        'Type': type_
                    })

        return pd.DataFrame(holidays)

    except requests.RequestException as e:
        print(f"Error fetching holidays for {country} {year}: {e}")
        return pd.DataFrame()

def scrape_holidays():
    # Scrapes and combines holidays for all selected countries.
    all_holidays = pd.DataFrame()
    country_codes = ['belgium', 'luxembourg', 'chile', 'paraguay']

    for country in country_codes:
        for year in range(2021, 2041):
            print(f"Fetching holidays for {country} in {year}...")
            df = fetch_holidays(country, year)

            if not df.empty:
                df['Country'] = country
                df['Year'] = year    
                all_holidays = pd.concat([all_holidays, df], ignore_index=True)

            time.sleep(2)  # Delay between requests

    return all_holidays[['Date', 'Day', 'Name', 'Type', 'Country']]

# Run scraper and save output
if __name__ == "__main__":
    holiday_data = scrape_holidays()

    # Save to CSV
    holiday_data.to_csv("holidays_10.csv", index=False, encoding="utf-8-sig")

    # Display output
    print(holiday_data.head(15))

Fetching holidays for belgium in 2021...
Fetching holidays for belgium in 2022...
Fetching holidays for belgium in 2023...
Fetching holidays for belgium in 2024...
Fetching holidays for belgium in 2025...
Fetching holidays for belgium in 2026...
Fetching holidays for belgium in 2027...
Fetching holidays for belgium in 2028...
Fetching holidays for belgium in 2029...
Fetching holidays for belgium in 2030...
Fetching holidays for belgium in 2031...
Fetching holidays for belgium in 2032...
Fetching holidays for belgium in 2033...
Fetching holidays for belgium in 2034...
Fetching holidays for belgium in 2035...
Fetching holidays for belgium in 2036...
Fetching holidays for belgium in 2037...
Fetching holidays for belgium in 2038...
Fetching holidays for belgium in 2039...
Fetching holidays for belgium in 2040...
Fetching holidays for luxembourg in 2021...
Fetching holidays for luxembourg in 2022...
Fetching holidays for luxembourg in 2023...
Fetching holidays for luxembourg in 2024...
Fetc

'slovakia',
'ireland',
'iceland',
'us'

In [14]:
def fetch_holidays(country, year):
    # Fetches holiday data for a given country and year in desired format
    url = f"https://www.timeanddate.com/holidays/{country.lower()}/{year}?hl=en"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US,en;q=0.9',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Referer': 'https://www.google.com/',
        'DNT': '1',
        'Connection': 'keep-alive'
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)
        if response.status_code != 200:
            print(f"Failed to fetch {country} {year}: {response.status_code}")
            return pd.DataFrame()

        soup = BeautifulSoup(response.text, 'html.parser')
        holiday_table = soup.find('table', {'id': 'holidays-table'})

        if not holiday_table:
            print(f"No holiday table found for {country} {year}")
            return pd.DataFrame()

        holidays = []

        for row in holiday_table.find_all('tr')[1:]:
            date_cell = row.find('th')
            cells = row.find_all('td')

            if date_cell and len(cells) >= 2:
                date_text = date_cell.get_text(strip=True)
                day_of_week = cells[0].get_text(strip=True)
                name = cells[1].get_text(strip=True)
                type_ = cells[2].get_text(strip=True) if len(cells) > 2 else ""

                if name:
                    holidays.append({
                        'Date': f"{date_text} {year}",
                        'Day': day_of_week,
                        'Name': name,
                        'Type': type_
                    })

        return pd.DataFrame(holidays)

    except requests.RequestException as e:
        print(f"Error fetching holidays for {country} {year}: {e}")
        return pd.DataFrame()

def scrape_holidays():
    # Scrapes and combines holidays for all selected countries.
    all_holidays = pd.DataFrame()
    country_codes = ['slovakia', 'ireland', 'iceland', 'us']

    for country in country_codes:
        for year in range(2021, 2041):
            print(f"Fetching holidays for {country} in {year}...")
            df = fetch_holidays(country, year)

            if not df.empty:
                df['Country'] = country
                df['Year'] = year    
                all_holidays = pd.concat([all_holidays, df], ignore_index=True)

            time.sleep(2)  # Delay between requests

    return all_holidays[['Date', 'Day', 'Name', 'Type', 'Country']]

# Run scraper and save output
if __name__ == "__main__":
    holiday_data = scrape_holidays()

    # Save to CSV
    holiday_data.to_csv("holidays_11.csv", index=False, encoding="utf-8-sig")

    # Display output
    print(holiday_data.head(15))

Fetching holidays for slovakia in 2021...
Fetching holidays for slovakia in 2022...
Fetching holidays for slovakia in 2023...
Fetching holidays for slovakia in 2024...
Fetching holidays for slovakia in 2025...
Fetching holidays for slovakia in 2026...
Fetching holidays for slovakia in 2027...
Fetching holidays for slovakia in 2028...
Fetching holidays for slovakia in 2029...
Fetching holidays for slovakia in 2030...
Fetching holidays for slovakia in 2031...
Fetching holidays for slovakia in 2032...
Fetching holidays for slovakia in 2033...
Fetching holidays for slovakia in 2034...
Fetching holidays for slovakia in 2035...
Fetching holidays for slovakia in 2036...
Fetching holidays for slovakia in 2037...
Fetching holidays for slovakia in 2038...
Fetching holidays for slovakia in 2039...
Fetching holidays for slovakia in 2040...
Fetching holidays for ireland in 2021...
Fetching holidays for ireland in 2022...
Fetching holidays for ireland in 2023...
Fetching holidays for ireland in 2024

### Consolidated File

In [15]:
# Get all matching CSV files
csv_files = glob.glob("holidays_*.csv")

# Check if any files are found
if not csv_files:
    print("No matching CSV files found.")
else:
    # Print all CSV files that will be merged
    print("Merging the following CSV files:")
    for file in csv_files:
        print(file)

    # Read all CSV files using utf-8-sig encoding
    df_list = [pd.read_csv(file, encoding="utf-8-sig") for file in csv_files]

    # Concatenate them into a single DataFrame
    df = pd.concat(df_list, ignore_index=True)

    # Save the merged DataFrame using utf-8-sig encoding
    output_file = "holidays_2021_2040.csv"
    df.to_csv(output_file, index=False, encoding="utf-8-sig")

    # Display the first few rows
    print(df.head())

    print(f"CSV files merged successfully! The consolidated file is saved as '{output_file}'.")


Merging the following CSV files:
holidays_1.csv
holidays_10.csv
holidays_11.csv
holidays_2.csv
holidays_3.csv
holidays_4.csv
holidays_5.csv
holidays_6.csv
holidays_7.csv
holidays_8.csv
holidays_9.csv
          Date        Day                    Name              Type    Country
0   1 Jan 2021     Friday          New Year's Day  National Holiday  australia
1   6 Jan 2021  Wednesday                Epiphany         Christian  australia
2   7 Jan 2021   Thursday  Orthodox Christmas Day          Orthodox  australia
3  14 Jan 2021   Thursday       Orthodox New Year          Orthodox  australia
4  26 Jan 2021    Tuesday           Australia Day  National Holiday  australia
CSV files merged successfully! The consolidated file is saved as 'holidays_2021_2040.csv'.
