In [4]:
import pandas as pd
import requests
import io
from datetime import date, timedelta

# Define the date range
start_date = date(2022, 2, 24)
end_date = date(2024, 8, 8)

# Initialize the current date
current_date = start_date

# Initialize an empty list to hold dataframes
gpr_dfs = []

# Base URL for the repository
base_url = "https://github.com/iacoviel/iacoviel.github.io/raw/master/gpr_archive_files/"

# Loop through the date range, incrementing by one week each iteration
while current_date <= end_date:
    # Generate the filename for the current date
    current_filename = f"data_gpr_export_{current_date.strftime('%Y%m')}.xls"
    
    # Construct the URL for the current XLS file
    url = base_url + current_filename
    
    # Fetch the data from the URL
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code == 200:
        print(f"Fetched {current_filename}")
        
        # Read the XLS data into a dataframe
        file_data = io.BytesIO(response.content)
        current_gpr_df = pd.read_excel(file_data)
        
        # Check if 'GPRHC_USA' column exists before filtering
        if 'GPRHC_USA' in current_gpr_df.columns:
            gpr_dfs.append(current_gpr_df)
        else:
            print(f"'GPRHC_USA' column not found in {current_filename}")
    else:
        print(f"Failed to fetch {current_filename}")
    
    # Increment the current date by 20 days
    current_date += timedelta(days=20)

# Concatenate all dataframes in the list into a single dataframe
if gpr_dfs:
    merged_df = pd.concat(gpr_dfs, ignore_index=True)
    # Display the merged dataframe
    merged_df.to_csv('merged_gpr_data.csv', index=False)
    
else:
    print("No dataframes were successfully fetched and filtered.")

Failed to fetch data_gpr_export_202202.xls
Fetched data_gpr_export_202203.xls
Fetched data_gpr_export_202204.xls
Fetched data_gpr_export_202204.xls
Fetched data_gpr_export_202205.xls
Fetched data_gpr_export_202206.xls
Fetched data_gpr_export_202206.xls
Fetched data_gpr_export_202207.xls
Fetched data_gpr_export_202208.xls
Fetched data_gpr_export_202208.xls
Fetched data_gpr_export_202209.xls
Fetched data_gpr_export_202210.xls
Fetched data_gpr_export_202210.xls
Fetched data_gpr_export_202211.xls
Fetched data_gpr_export_202212.xls
Fetched data_gpr_export_202212.xls
Fetched data_gpr_export_202301.xls
Fetched data_gpr_export_202301.xls
Fetched data_gpr_export_202302.xls
Fetched data_gpr_export_202303.xls
Fetched data_gpr_export_202303.xls
Fetched data_gpr_export_202304.xls
Fetched data_gpr_export_202305.xls
Fetched data_gpr_export_202305.xls
Fetched data_gpr_export_202306.xls
Fetched data_gpr_export_202307.xls
Fetched data_gpr_export_202307.xls
Fetched data_gpr_export_202308.xls
Fetched data