In [1]:
# Prerequisites
# !pip install selenium
# !pip install webdriver_manager

In [2]:
# downloading Selenium libraries 
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# downloading BeautifulSoup
from bs4 import BeautifulSoup

# downloading Numpy & Pandas
import pandas as pd
import numpy as np

# Downloading sleep function 
from time import sleep

In [3]:
# setting Selenium options 
options = Options()
options.add_argument("start-maximized")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
# setting website that should be scraped 
driver.get("https://fred.stlouisfed.org/releases/calendar")

In [4]:
def get_table():
    
    """
    get_table function retrieves tables from the website, transforms it into a dataframe and 
    modifies it in order to obtain the most important information in a transparent way
    """
        
    html=driver.page_source
    html = html.replace("N/A","Unspecified")
    
    soup=BeautifulSoup(html,'html.parser')
    
    # downloading the table from the website
    div=soup.find_all('table', attrs={'class':'table table-condensed table-standard-theme'})
    table=pd.read_html(str(div))
    
    # transforming the table to string
    ans = np.array(table)
    ans = (ans[0])
    
    # saving array as a dataframe 
    df = pd.DataFrame(ans, columns = ['Hour','Variable'])
    
    # saving new variable
    globals()['last_day_of_the_week'] = df['Variable'].iloc[0]

    # editing the dataframe
    df['Hour'].fillna(method='pad', inplace=True)
    df['Date'] = df['Variable'].iloc[0]
    df = df.iloc[1: , :]
    dfx = df['Date'].str.split(' ', expand=True)
    df = df.drop('Date', axis=1)
    df = pd.concat([dfx, df.reindex(dfx.index)], axis=1)
    df[2] = df[2].replace(',','', regex=True)
    df.rename(columns = {1:'Month', 2:'Day', 3:'Year'}, inplace = True)
    get_table.df = df
    
    # saving the weekday of the table to be able to delte it in next step
    weekday = df[0].iloc[0]
    df = df.drop(0, axis=1)
    
    # saving results as a global df 
    globals()[f'df_{weekday}'] = df
    
    # writed to confirm the execution of the function 
    print(weekday + ", " + df['Day'].iloc[0] + " " + df['Month'].iloc[0] + " downloaded")

<div id="release-dates-pager">    No release dates are available for the selected options.
</div>

In [5]:
# TESTS

def get_table():
    
    """
    get_table function retrieves tables from the website, transforms it into a dataframe and 
    modifies it in order to obtain the most important information in a transparent way
    """
        
    html=driver.page_source
    html = html.replace("N/A","Unspecified")
    
    soup=BeautifulSoup(html,'html.parser')
    
    # downloading the table from the website
    if soup.find_all('table', attrs={'class':'table table-condensed table-standard-theme'}):
        div=soup.find_all('table', attrs={'class':'table table-condensed table-standard-theme'})
        table=pd.read_html(str(div)) # tu się wywala błąd - tj. nie ma tabeli 
        
        # transforming the table to string
        ans = np.array(table)
        ans = (ans[0])
    
        # saving array as a dataframe 
        df = pd.DataFrame(ans, columns = ['Hour','Variable'])
    
        # saving new variable
        globals()['last_day_of_the_week'] = df['Variable'].iloc[0]

        # editing the dataframe
        df['Hour'].fillna(method='pad', inplace=True)
        df['Date'] = df['Variable'].iloc[0]
        df = df.iloc[1: , :]
        dfx = df['Date'].str.split(' ', expand=True)
        df = df.drop('Date', axis=1)
        df = pd.concat([dfx, df.reindex(dfx.index)], axis=1)
        df[2] = df[2].replace(',','', regex=True)
        df.rename(columns = {1:'Month', 2:'Day', 3:'Year'}, inplace = True)
        get_table.df = df
    
        # saving the weekday of the table to be able to delte it in next step
        weekday = df[0].iloc[0]
        df = df.drop(0, axis=1)
    
        # saving results as a global df 
        globals()[f'df_{weekday}'] = df
    
        # writed to confirm the execution of the function 
        print(weekday + ", " + df['Day'].iloc[0] + " " + df['Month'].iloc[0] + " downloaded")
        
    else:
        print('N/A')

In [6]:
def download_data(name):
    
    """
    a function that combines a dataframe from individual days into one large dataframe for a whole week
    """
    
    # merging all df's to week_df
    week_df = pd.concat([df_Sunday, df_Monday, df_Tuesday, df_Wednesday, df_Thursday, df_Friday, df_Saturday])
    

    week_df.rename(columns = {4:'Updated'}, inplace = True)
    week_df = week_df.reset_index()
    globals()[name] = week_df
        
    #(f'df_{last_day_of_the_week}.csv')
    
    #saving it as csv file
    #week_df.to_csv(r"C:\Users\ddawiec001\Desktop\FRED\dataframe.csv")

In [7]:
def wait():
    
    """
    feature designed for delays allowing the full page loading
    """
    
    WebDriverWait(driver,100).until(EC.presence_of_element_located((By.XPATH, "//table[@class='fc-border-separate']/tbody/tr/td")));
    sleep(3)

In [8]:
def week():
    
    """
    a function that combines all the previous functions together, 
    it retrieves data from the whole week and combines them into one dataframe
    """
    
    # setting Selenium buttons
    next_week = driver.find_element(By.XPATH, "//span[text()='›']")
    sunday = driver.find_element(By.XPATH, "//table[@class='fc-border-separate']/tbody/tr/td[1]")
    monday = driver.find_element(By.XPATH, "//table[@class='fc-border-separate']/tbody/tr/td[2]")
    tuesday = driver.find_element(By.XPATH, "//table[@class='fc-border-separate']/tbody/tr/td[3]")
    wednesday = driver.find_element(By.XPATH, "//table[@class='fc-border-separate']/tbody/tr/td[4]")
    thursday = driver.find_element(By.XPATH, "//table[@class='fc-border-separate']/tbody/tr/td[5]")
    friday = driver.find_element(By.XPATH, "//table[@class='fc-border-separate']/tbody/tr/td[6]")
    saturday = driver.find_element(By.XPATH, "//table[@class='fc-border-separate']/tbody/tr/td[7]")
    
    # Executing the process
    saturday.click();
    wait();
    
    sunday.click();
    wait();
    get_table();
    
    monday.click();
    wait();
    get_table();
    
    tuesday.click();
    wait();
    get_table();  
    
    wednesday.click();
    wait();
    get_table();
    
    thursday.click();
    wait();
    get_table();  
        
    friday.click();
    wait();
    get_table();
    
    saturday.click();
    wait();
    get_table();
    
    next_week.click();

In [9]:
week()

Sunday, 04 December downloaded
Monday, 05 December downloaded
Tuesday, 06 December downloaded
Wednesday, 07 December downloaded
Thursday, 08 December downloaded
Friday, 09 December downloaded
Saturday, 10 December downloaded


In [10]:
download_data('week_1')

In [11]:
week()
download_data('week_1')

week()
download_data('week_2')

week()
download_data('week_3')

week()
download_data('week_4')

week()
download_data('week_5')

week()
download_data('week_6')

week()
download_data('week_7')

week()
download_data('week_8')

week()
download_data('week_9')

week()
download_data('week_10')

week()
download_data('week_11')

week()
download_data('week_12')

week()
download_data('week_13')

week()
download_data('week_14')

week()
download_data('week_15')

week()
download_data('week_16')

week()
download_data('week_17')

week()
download_data('week_18')

week()
download_data('week_19')

week()
download_data('week_20')

week()
download_data('week_21')

week()
download_data('week_22')

week()
download_data('week_23')

week()
download_data('week_24')


all_weeks_df = pd.concat([week_1, week_2, week_3, week_4, week_5, week_6, week_7, week_8, week_9, week_10, week_11, week_12, week_13, week_14, week_15, week_16, week_17, week_18, week_19, week_20, week_21, week_22, week_23, week_24, ])
    
    
#saving it as csv file
all_weeks_df.to_csv(r"./FRED/all_weeks_dataframe.csv")

print(all_weeks_df)

Sunday, 11 December downloaded
Monday, 12 December downloaded
Tuesday, 13 December downloaded
Wednesday, 14 December downloaded
Thursday, 15 December downloaded
Friday, 16 December downloaded
Saturday, 17 December downloaded
Sunday, 18 December downloaded
Monday, 19 December downloaded
Tuesday, 20 December downloaded
Wednesday, 21 December downloaded
Thursday, 22 December downloaded
Friday, 23 December downloaded
Saturday, 24 December downloaded
Sunday, 25 December downloaded
Monday, 26 December downloaded
Tuesday, 27 December downloaded
Wednesday, 28 December downloaded
Thursday, 29 December downloaded
Friday, 30 December downloaded
Saturday, 31 December downloaded
N/A
N/A
N/A
N/A
Thursday, 05 January downloaded
N/A
N/A
N/A
N/A
Tuesday, 10 January downloaded
N/A
N/A
N/A
N/A
N/A


NoSuchWindowException: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=107.0.5304.121)
Stacktrace:
0   chromedriver                        0x0000000100f502c8 chromedriver + 4752072
1   chromedriver                        0x0000000100ed0463 chromedriver + 4228195
2   chromedriver                        0x0000000100b33b18 chromedriver + 441112
3   chromedriver                        0x0000000100b10210 chromedriver + 295440
4   chromedriver                        0x0000000100b95e3d chromedriver + 843325
5   chromedriver                        0x0000000100ba9719 chromedriver + 923417
6   chromedriver                        0x0000000100b91b33 chromedriver + 826163
7   chromedriver                        0x0000000100b629fd chromedriver + 633341
8   chromedriver                        0x0000000100b64051 chromedriver + 639057
9   chromedriver                        0x0000000100f1d30e chromedriver + 4543246
10  chromedriver                        0x0000000100f21a88 chromedriver + 4561544
11  chromedriver                        0x0000000100f296df chromedriver + 4593375
12  chromedriver                        0x0000000100f228fa chromedriver + 4565242
13  chromedriver                        0x0000000100ef82cf chromedriver + 4391631
14  chromedriver                        0x0000000100f415b8 chromedriver + 4691384
15  chromedriver                        0x0000000100f41739 chromedriver + 4691769
16  chromedriver                        0x0000000100f5781e chromedriver + 4782110
17  libsystem_pthread.dylib             0x00007ff802a454e1 _pthread_start + 125
18  libsystem_pthread.dylib             0x00007ff802a40f6b thread_start + 15
