In [3]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import csv

# Get a list of all states.
us_states = {
    'AR': 'Arkansas',
    'AL': 'Alabama',
    'OK': 'Oklahoma',
    'SC': 'SouthCarolina',
    'TX': 'Texas',
    'FL': 'Florida',
    'GA': 'Georgia',
    'LA': 'Louisiana',
    'TN': 'Tennessee',
    'NV': 'Nevada',
    'MO': 'Missouri',
    'MS': 'Mississippi',
    'KS': 'Kansas',
    'VA': 'Virginia',
    'MI': 'Michigan',
    'IL': 'Illinois',
    'OH': 'Ohio'
}

sorted_states = sorted(us_states.items(), key=lambda x: x[1])

business_data = []

def getLinksFromStatePage(state):
    
    websiteUrl = "https://partnercarrier.com/" + state 
    response = requests.get(websiteUrl)
    replaceString = "/" + state + "/"

    # Create a BeautifulSoup object to parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find the H4 element with the specified text
    h4_element = soup.find('h4', {'id': 'A'})

    # Find the first script tag with the specified source URL
    script_tag = soup.find('script', {'src': '/Themes/Bootstrap/plugins/jquery-1.10.2/jquery-1.10.2.js'})

    # Find all the div tags that contain the city links
    a_tags = soup.find_all(lambda tag: tag.name == "a" and tag.has_attr("href") and tag["href"].startswith(replaceString))
    
    # Extract the href attribute of each <a> tag inside the div tags
    hrefs = [a_tag.get('href') for a_tag in a_tags]
    cities = [a_tag.get('href').replace(replaceString, '') for a_tag in a_tags]
    
    return cities


def getNumbersFromPage(state, city):
    
    websiteUrl = "https://partnercarrier.com/" + state + "/" + city
    response = requests.get(websiteUrl)

    # Create a BeautifulSoup object to parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all elements with the href attribute starting with "tel:"
    tel_elements = soup.find_all('a', href=lambda href: href and href.startswith('tel:'))
    
    # Find all h4 elements and get their text content
    h4_elements = soup.find_all('h4')
    h4_text = [element.text.strip() for element in h4_elements if not ('Found' in element.text and 'Results' in element.text)]

    # Extract the phone numbers from the elements
    phone_numbers = [tel_element['href'].replace('tel:', '') for tel_element in tel_elements]

    for i in range(len(phone_numbers)):
        phone_numbers[i] = ''.join(filter(str.isdigit, phone_numbers[i]))
    
    merged_list = [(name, number) for name, number in zip(h4_text, phone_numbers)]
    return merged_list

# open a new file in "write" mode
with open("partnercarrier.csv", "w", newline="") as csvfile:
    writer = csv.writer(csvfile)

    # write the header row
    writer.writerow(["Business Name", "Phone Number", "State"])

    # Loop over the states and cities to create the list.
    for i in range(len(sorted_states)):

        # Get cites from the state page 
        cities = getLinksFromStatePage(sorted_states[i][0])

        # Loop over each city and capture the numbers from each page.
        for j in range(len(cities)):
            business_data = getNumbersFromPage(sorted_states[i][0], cities[j])
                
            # write each row of data
            for row in business_data:
                
                row_with_state = row + (sorted_states[i][1],)
                
                # write each row of data
                writer.writerow(row_with_state)