# Website 1

link = https://www.timeanddate.com/holidays/iran/

In [129]:
import requests
from bs4 import BeautifulSoup

In [130]:
STAR_YEAR = 2014
END_YEAR = 2019

In [131]:
main_url = "https://www.timeanddate.com/holidays/iran/"
years = [str(year) for year in range(STAR_YEAR, END_YEAR+1)]
years

['2014', '2015', '2016', '2017', '2018', '2019']

In [132]:
# getting the page contents
all_pages = {}
for year in years:
    r = requests.get(main_url+year)
    if not r.status_code == 200:
        raise RuntimeError('Problem accessing page data.')
    all_pages[year] = r.text
all_pages.keys()

dict_keys(['2014', '2015', '2016', '2017', '2018', '2019'])

In [133]:
# selecting the holiday tables
all_holiday_tables = {}
for year in years:
    soup = BeautifulSoup(all_pages[year], 'html.parser')
    holiday_table = soup.find("table", {"id":"holidays-table"})
    if not holiday_table == None:
        all_holiday_tables[year] = holiday_table
all_holiday_tables.keys()

dict_keys(['2014', '2015', '2016', '2017', '2018', '2019'])

In [134]:
# getting actual holiday rows 
all_holiday_rows = {}
for year in years:
    all_holiday_rows[year] = []
    holiday_rows = all_holiday_tables[year].find_all('tr')
    for row in holiday_rows:
        if 'data-date' in str(row):
            all_holiday_rows[year].append(row)
    print('For year {} the number of holidays are  {}'.format(year, len(all_holiday_rows[year])))

For year 2014 the number of holidays are  31
For year 2015 the number of holidays are  30
For year 2016 the number of holidays are  31
For year 2017 the number of holidays are  31
For year 2018 the number of holidays are  30
For year 2019 the number of holidays are  22


In [135]:
all_holidays_and_occasions = {}
for year in years:
    all_holidays_and_occasions[year] = []
    for i in range(len(all_holiday_rows[year])):
        holiday_date = all_holiday_rows[year][i].find('th').text
        occasion = all_holiday_rows[year][i].find('a').text
        all_holidays_and_occasions[year].append([holiday_date, occasion])
all_holidays_and_occasions

{'2014': [['2 jan', 'Martyrdom of Imam Reza'],
  ['19 jan', 'Birthday of Prophet Muhammad and Imam Sadeq'],
  ['11 fév', 'Revolution Day'],
  ['20 mar', 'March Equinox'],
  ['20 mar', 'Oil Nationalization Day'],
  ['21 mar', 'Norooz (Persian New Year)'],
  ['22 mar', 'Norooz Holiday'],
  ['23 mar', 'Norooz Holiday'],
  ['24 mar', 'Norooz Holiday'],
  ['1 avr', 'Islamic Republic Day'],
  ['2 avr', 'Nature Day'],
  ['3 avr', 'Martyrdom of Fatima'],
  ['13 mai', 'Birthday of Imam Ali'],
  ['27 mai', "Prophet's Ascension"],
  ['4 juin', "Anniversary of Khomeini's Death"],
  ['5 juin', 'Revolt of Khordad 15'],
  ['13 juin', "Imam Mahdi's birthday"],
  ['21 juin', 'June Solstice'],
  ['19 juil', 'Martyrdom of Imam Ali'],
  ['29 juil', 'Eid-e-Fetr (End of Ramadan)'],
  ['30 juil', 'Eid-e-Fetr (Additional Holiday)'],
  ['22 aoû', 'Martyrdom of Imam Sadeq'],
  ['23 sep', 'September Equinox'],
  ['5 oct', 'Eid-e-Ghorban (Feast of Sacrifice)'],
  ['13 oct', 'Eid-e-Ghadir'],
  ['3 nov', 'Tassoua']

In [136]:
finalized_holidays =[]
finalized_holidays.append('Date,Occasion\n')
month_mappings = {'jan': '01', 'fév': '02', 'mar': '03', 'avr': '04', 'mai': '05', 'juin': '06', 'juil': '07', 'aoû': '08', 'sep': '09', 'oct':'10', 'nov': '11', 'déc': '12'}
for year in years:
    for row in all_holidays_and_occasions[year]:
        date = row[0].split()
        h_date = str(year)+'-'+month_mappings[date[1]]+'-'+date[0]
        finalized_row = h_date+','+row[1]+'\n'
        finalized_holidays.append(finalized_row)
finalized_holidays[:5]

['Date,Occasion\n',
 '2014-01-2,Martyrdom of Imam Reza\n',
 '2014-01-19,Birthday of Prophet Muhammad and Imam Sadeq\n',
 '2014-02-11,Revolution Day\n',
 '2014-03-20,March Equinox\n']

In [137]:
with open('holidays_'+str(STAR_YEAR)+'_'+str(END_YEAR)+'_v1.csv', 'w+') as fp:
    for row in finalized_holidays:
        fp.write(row)

In [138]:
import pandas as pd
file_content = pd.read_csv('holidays_'+str(STAR_YEAR)+'_'+str(END_YEAR)+'_v1.csv')
file_content.head()

Unnamed: 0,Date,Occasion
0,2014-01-2,Martyrdom of Imam Reza
1,2014-01-19,Birthday of Prophet Muhammad and Imam Sadeq
2,2014-02-11,Revolution Day
3,2014-03-20,March Equinox
4,2014-03-20,Oil Nationalization Day


# Website 2

link = https://calendarific.com/holidays/2019/IR

In [139]:
url_start = "https://calendarific.com/holidays/"
url_end = "/IR"

In [140]:
all_pages_2 = {}
for year in years:
    r = requests.get(url_start+year+url_end)
    if not r.status_code == 200:
        raise RuntimeError('Problem accessing page data.')
    all_pages_2[year] = r.text
all_pages_2.keys()

dict_keys(['2014', '2015', '2016', '2017', '2018', '2019'])

In [141]:
all_holiday_tables_2 = {}
for year in years:
    soup = BeautifulSoup(all_pages_2[year], 'html.parser')
    holiday_table = soup.find("table")
    if 'holiday-list' in str(holiday_table):
        all_holiday_tables_2[year] = holiday_table
all_holiday_tables_2.keys()

dict_keys(['2014', '2015', '2016', '2017', '2018', '2019'])

In [142]:
all_holiday_rows_2 = {}
for year in years:
    all_holiday_rows_2[year] = []
    holiday_rows = all_holiday_tables_2[year].find_all('tr')
    count_is_zero = True
    for row in holiday_rows:
        if count_is_zero:
            count_is_zero = False
            continue
        else:
            all_holiday_rows_2[year].append(row)
    print('For year {} the number of holidays are  {}'.format(year, len(all_holiday_rows_2[year])))

For year 2014 the number of holidays are  28
For year 2015 the number of holidays are  27
For year 2016 the number of holidays are  29
For year 2017 the number of holidays are  29
For year 2018 the number of holidays are  28
For year 2019 the number of holidays are  22


In [143]:
all_holidays_and_occasions_2 = {}
for year in years:
    all_holidays_and_occasions_2[year] = []
    for i in range(len(all_holiday_rows_2[year])):
        this_holidays_data = all_holiday_rows_2[year][i].find_all('td')
        date = this_holidays_data[2].text
        day = this_holidays_data[1].text
        occasion = this_holidays_data[0].text.strip()
        all_holidays_and_occasions_2[year].append([date, day, occasion])
all_holidays_and_occasions_2['2014'][:4]

[['December 23', 'Tuesday', 'Martyrdom of Imam Reza'],
 ['January 19', 'Sunday', 'Birthday of Prophet Muhammad and Imam Sadeq'],
 ['February 11', 'Tuesday', 'Revolution Day'],
 ['March 20', 'Thursday', 'March Equinox']]

In [156]:
finalized_holidays_2 =[]
finalized_holidays_2.append('Date,Day,Occasion\n')
month_mappings_2 = {'January': '01', 'February': '02', 'March': '03', 'April': '04', 'May': '05', 'June': '06', 'July': '07', 'August': '08', 'September': '09', 'October':'10', 'November': '11', 'December': '12'}
for year in years:
    for row in all_holidays_and_occasions_2[year]:
        date = row[0].split()
        h_date = str(year)+'-'+month_mappings_2[date[0]]+'-'+date[1]
        row[2] = row[2].replace(',',' ')
        row[2] = row[2].replace('\n',' ')
        finalized_row = h_date+','+row[1]+','+row[2]+'\n'
        finalized_holidays_2.append(finalized_row)
finalized_holidays_2[:5]

['Date,Day,Occasion\n',
 '2014-12-23,Tuesday,Martyrdom of Imam Reza\n',
 '2014-01-19,Sunday,Birthday of Prophet Muhammad and Imam Sadeq\n',
 '2014-02-11,Tuesday,Revolution Day\n',
 '2014-03-20,Thursday,March Equinox\n']

In [157]:
with open('holidays_'+str(STAR_YEAR)+'_'+str(END_YEAR)+'_v2.csv', 'w+') as fp:
    for row in finalized_holidays_2:
        fp.write(row)

In [158]:
file_content = pd.read_csv('holidays_'+str(STAR_YEAR)+'_'+str(END_YEAR)+'_v2.csv')
file_content

Unnamed: 0,Date,Day,Occasion
0,2014-12-23,Tuesday,Martyrdom of Imam Reza
1,2014-01-19,Sunday,Birthday of Prophet Muhammad and Imam Sadeq
2,2014-02-11,Tuesday,Revolution Day
3,2014-03-20,Thursday,March Equinox
4,2014-03-20,Thursday,Oil Nationalization Day
5,2014-03-21,Friday,Norooz (Persian New Year)
6,2014-03-24,Monday,Norooz Holiday
7,2014-04-1,Tuesday,Islamic Republic Day
8,2014-04-2,Wednesday,Nature Day
9,2014-04-3,Thursday,Martyrdom of Fatima
