In [52]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

# URL of the webpage
url = "https://www.phoenixopendata.com/dataset/public-works-disposal-transfer-station-loads"

try:
    # Send a GET request to the webpage
    response = requests.get(url)
    
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the HTML content
        soup = BeautifulSoup(response.content, "html.parser")
        
        # Find all elements with the class "resource-item"
        resource_elements = soup.find_all(class_="resource-item")
        
        # Extract the data-id attributes and titles from the elements
        data_ids_and_titles = [(element["data-id"], element.find(class_="heading").text.strip()) for element in resource_elements]
        
        # Find the index of the target resource
        start_index = None
        for i, (_, title) in enumerate(data_ids_and_titles):
            if title == 'Public Works Disposal Transfer Station Loads Jan 2021CSV':
                start_index = i
                break
        
        if start_index is not None:
            # Iterate over the remaining resources starting from the target index
            for data_id, title in data_ids_and_titles[start_index:]:
                # Here you can retrieve the data for each resource and process it as needed
                print(data_id, title)
        else:
            print("Error: Target resource not found.")
    else:
        print("Error: Failed to retrieve webpage. Status code:", response.status_code)
except requests.RequestException as e:
    print("Request error:", e)
    


559b454b-e8f8-464f-b7f8-3bc9739e2262 Public Works Disposal Transfer Station Loads Jan 2021CSV
33f4c43b-1a9e-4e94-8099-97cc81973a7d Public Works Disposal Transfer Station Loads Mar 2021CSV
7e413dc9-4d28-4ed2-ad23-5f7a9da7acea Public Works Disposal Transfer Station Loads Feb 2021CSV
9b9388d6-249a-4196-98ae-b4fff19c5eec Public Works Disposal Transfer Station Loads Apr 2021CSV
0e0613f1-9d37-47ae-9a6d-4208a52b4b8e Public Works Disposal Transfer Station Loads May 2021CSV
50fe2120-6d51-4fac-ba40-1dedbf312ef7 Public Works Disposal Transfer Station Loads Jun 2021CSV
85abfaef-1fd4-44d9-a553-97c8e78b76d1 Public Works Disposal Transfer Station Loads Jul 2021CSV
05526d04-9d82-4610-a6fc-36b15cbde981 Public Works Disposal Transfer Station Loads Aug 2021CSV
8c214ede-6da3-4ff4-ad48-e1345f801c71 Public Works Disposal Transfer Station Loads Sep 2021CSV
38a2c3ee-d052-40f8-bb44-9bae9aa218db Public Works Disposal Transfer Station Loads Oct 2021CSV
b5f5dfbd-db19-47cf-95c8-86c540a0f54c Public Works Disposal T

In [57]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import csv

# Function to retrieve data from a given URL
def retrieve_data_function(url):
    try:
        # Send a GET request to the API endpoint
        response = requests.get(url)
        
        # Check if the request was successful (status code 200)
        if response.status_code == 200:
            # Parse the JSON response
            data = response.json()
            return data
        else:
            print("Error: Failed to retrieve data. Status code:", response.status_code)
            return None
    except requests.RequestException as e:
        print("Request error:", e)
        return None

# Function to retrieve all pages of data for a resource ID
def retrieve_all_data(resource_id):
    all_records = []
    offset = 0
    limit = 100
    
    while True:
        # Construct the URL for the current page of data
        url = f"https://www.phoenixopendata.com/api/3/action/datastore_search?resource_id={resource_id}&limit={limit}&offset={offset}"
        
        # Retrieve data for the current page
        data = retrieve_data_function(url)
        
        # Check if data is retrieved successfully
        if data and data.get("result", {}).get("records"):
            # Append records from the current page to the list of all records
            all_records.extend(data["result"]["records"])
            
            # Check if there are more pages of data
            if len(data["result"]["records"]) < limit:
                break  # No more pages of data
            else:
                offset += limit  # Move to the next page of data
        else:
            break  # No more data or error occurred
    
    return all_records

# URL of the webpage
url = "https://www.phoenixopendata.com/dataset/public-works-disposal-transfer-station-loads"

try:
    # Send a GET request to the webpage
    response = requests.get(url)
    
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the HTML content
        soup = BeautifulSoup(response.content, "html.parser")
        
        # Find all elements with the class "resource-item"
        resource_elements = soup.find_all(class_="resource-item")
        
        # Extract the data-id attributes and titles from the elements
        data_ids_and_titles = [(element["data-id"], element.find(class_="heading").text.strip()) for element in resource_elements]
        
        # Find the index of the target resource
        start_index = None
        for i, (_, title) in enumerate(data_ids_and_titles):
            if title == 'Public Works Disposal Transfer Station Loads Jan 2021CSV':
                start_index = i
                break
        
        if start_index is not None:
            # Iterate over the remaining resources starting from the target index
            for data_id, title in data_ids_and_titles[start_index:]:
                # Retrieve all data for the current resource
                all_records = retrieve_all_data(data_id)
                
                # Process the data and store it in a CSV file
                if all_records:
                    # Define CSV file name
                    csv_filename = f"{title}.csv"
                    
                    # Write records to CSV file
                    with open(csv_filename, "w", newline="", encoding="utf-8") as csvfile:
                        writer = csv.DictWriter(csvfile, fieldnames=all_records[0].keys())
                        writer.writeheader()
                        writer.writerows(all_records)
                    
                    print(f"All data for '{title}' has been successfully saved to '{csv_filename}'")
                else:
                    print(f"No data found for '{title}'")
        else:
            print("Error: Target resource not found.")
    else:
        print("Error: Failed to retrieve webpage. Status code:", response.status_code)
except requests.RequestException as e:
    print("Request error:", e)


All data for 'Public Works Disposal Transfer Station Loads Jan 2021CSV' has been successfully saved to 'Public Works Disposal Transfer Station Loads Jan 2021CSV.csv'
All data for 'Public Works Disposal Transfer Station Loads Mar 2021CSV' has been successfully saved to 'Public Works Disposal Transfer Station Loads Mar 2021CSV.csv'
All data for 'Public Works Disposal Transfer Station Loads Feb 2021CSV' has been successfully saved to 'Public Works Disposal Transfer Station Loads Feb 2021CSV.csv'
All data for 'Public Works Disposal Transfer Station Loads Apr 2021CSV' has been successfully saved to 'Public Works Disposal Transfer Station Loads Apr 2021CSV.csv'
All data for 'Public Works Disposal Transfer Station Loads May 2021CSV' has been successfully saved to 'Public Works Disposal Transfer Station Loads May 2021CSV.csv'
All data for 'Public Works Disposal Transfer Station Loads Jun 2021CSV' has been successfully saved to 'Public Works Disposal Transfer Station Loads Jun 2021CSV.csv'
All 