In [34]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
import pandas as pd
import openpyxl
from datetime import datetime

In [35]:
# Configure Chrome DevTools options
chrome_options = Options()
chrome_options.add_experimental_option("w3c", False)
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--headless")

In [36]:
driver = webdriver.Chrome()

In [37]:
def collect_data(city, latitude, longitude):
    # Set Serach Location
    driver.execute_cdp_cmd('Emulation.setGeolocationOverride', {
        'latitude': latitude,
        'longitude': longitude,
        'accuracy': 100
    })
    
    # Navigate to search results page
    search_term = f"internet packages near {city}"
    driver.get(f"https://www.google.com/search?q={search_term}")
    
    # Wait for the page to load
    driver.implicitly_wait(1)
    
    # Collect data for sponsored links
    sponsored_links = driver.find_elements(By.XPATH, "//div[@class='uEierd']")

    data = []
    for link in sponsored_links:
        link_element = link.find_element(By.TAG_NAME, "a")
        link_text = link_element.text
        link_url = link_element.get_attribute("href")
        link_description = link.find_element(By.TAG_NAME, "span").text
        
        additional_data_elements = link.find_elements(By.CSS_SELECTOR, ".MUxGbd, .yDYNvb, .lyLwlc")
        additional_data = [elem.text for elem in additional_data_elements]
        
        data.append({
            "link_text": link_text,
            "link_url": link_url,
            "link_description": link_description,
            "additional_data": additional_data
        })
        
    return data

In [38]:
cities_excel_file = 'data.xlsx'
cities_df = pd.read_excel('cities_data.xlsx')

In [39]:
# Iterate over the cities and collect data for each city
results_data = []
for index, row in cities_df.iterrows():
    city = row["city"]
    latitude = row["latitude"]
    longitude = row["longitude"]

    data = collect_data(city, latitude, longitude)
    results_data.extend(data)

# Create a dataframe from the collected results data
results_df = pd.DataFrame(results_data)

# Add a timestamp column to the dataframe
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
results_df["timestamp"] = timestamp

# Append the results dataframe to the Excel sheet
results_sheet_name = "results"

try:
    with pd.ExcelWriter(cities_excel_file, mode="a", engine="openpyxl") as writer:
        results_df.to_excel(writer, sheet_name=results_sheet_name, index=False, header=not writer.book)

except FileNotFoundError:
    results_df.to_excel(cities_excel_file, sheet_name=results_sheet_name, index=False)

print("Data appended to Excel successfully.")

# Close the browser
driver.quit()

Data appended to Excel successfully.


In [None]:
try:
    # Set location to Denver
    driver.execute_cdp_cmd('Emulation.setGeolocationOverride', {
        'latitude': 39.7392,
        'longitude': -104.9903,
        'accuracy': 100
    })
except Exception as e:
    print(f"Exception while loading location in Emulator : {e}")

In [None]:
# Navigate to search results page
search_term = "internet packages near new york"
driver.get(f"https://www.google.com/search?q={search_term}")

In [None]:
# Wait for the page to load
driver.implicitly_wait(5)

In [None]:
# Collect data for sponsored links
sponsored_links = driver.find_elements(By.XPATH, "//div[@class='uEierd']")

data = []
for link in sponsored_links:
    link_element = link.find_element(By.TAG_NAME, "a")
    link_text = link_element.text
    link_url = link_element.get_attribute("href")
    link_description = link.find_element(By.TAG_NAME, "span").text
    
    additional_data_elements = link.find_elements(By.CSS_SELECTOR, ".MUxGbd, .yDYNvb, .lyLwlc")
    additional_data = [elem.text for elem in additional_data_elements]
    
    data.append({
        "link_text": link_text,
        "link_url": link_url,
        "link_description": link_description,
        "additional_data": additional_data
    })

In [None]:
# Print collected data
for item in data:
    print("Link Text:", item["link_text"])
    print("Link URL:", item["link_url"])
    print("Link Description:", item["link_description"])
    print("Additional Data:", item["additional_data"])
    print("---------------------------------------------")

In [None]:
# Close the browser
# driver.quit()

In [None]:
# Create a dataframe from the collected data
df = pd.DataFrame(data)

# Add a timestamp column to the dataframe
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
df["timestamp"] = timestamp

In [None]:
excel_file = 'data.xlsx'
sheet_name = 'Sheet1'

In [None]:
try:
    with pd.ExcelWriter(excel_file, mode="a", engine="openpyxl") as writer:
        # Try to open the existing sheet, if it exists
        writer.book = writer.book if sheet_name in writer.book.sheetnames else None
        df.to_excel(writer, sheet_name=sheet_name, index=False, header=not writer.book)

except FileNotFoundError:
    # If the Excel file doesn't exist, create a new one with the dataframe
    df.to_excel(excel_file, sheet_name=sheet_name, index=False)

print("Data appended to Excel successfully.")

In [None]:
cities_data = [
    {"city": "New York", "latitude": 40.7128, "longitude": -74.0060},
    {"city": "Los Angeles", "latitude": 34.0522, "longitude": -118.2437},
    {"city": "Chicago", "latitude": 41.8781, "longitude": -87.6298},
    {"city": "Houston", "latitude": 29.7604, "longitude": -95.3698},
    {"city": "Phoenix", "latitude": 33.4484, "longitude": -112.0740},
    {"city": "Philadelphia", "latitude": 39.9526, "longitude": -75.1652},
    {"city": "San Antonio", "latitude": 29.4241, "longitude": -98.4936},
    {"city": "San Diego", "latitude": 32.7157, "longitude": -117.1611},
    {"city": "Dallas", "latitude": 32.7767, "longitude": -96.7970},
    {"city": "San Jose", "latitude": 37.3382, "longitude": -121.8863},
    {"city": "Austin", "latitude": 30.2672, "longitude": -97.7431},
    {"city": "Jacksonville", "latitude": 30.3322, "longitude": -81.6557},
    {"city": "San Francisco", "latitude": 37.7749, "longitude": -122.4194},
    {"city": "Columbus", "latitude": 39.9612, "longitude": -82.9988},
    {"city": "Indianapolis", "latitude": 39.7684, "longitude": -86.1581},
    {"city": "Fort Worth", "latitude": 32.7555, "longitude": -97.3308},
    {"city": "Charlotte", "latitude": 35.2271, "longitude": -80.8431},
    {"city": "Seattle", "latitude": 47.6062, "longitude": -122.3321},
    {"city": "Denver", "latitude": 39.7392, "longitude": -104.9903},
    {"city": "Washington, D.C.", "latitude": 38.9072, "longitude": -77.0369},
    {"city": "Boston", "latitude": 42.3601, "longitude": -71.0589},
    {"city": "El Paso", "latitude": 31.7619, "longitude": -106.4850},
    {"city": "Nashville", "latitude": 36.1627, "longitude": -86.7816},
    {"city": "Detroit", "latitude": 42.3314, "longitude": -83.0458},
    {"city": "Oklahoma City", "latitude": 35.4676, "longitude": -97.5164},
    {"city": "Portland", "latitude": 45.5051, "longitude": -122.6750},
    {"city": "Las Vegas", "latitude": 36.1699, "longitude": -115.1398},
    {"city": "Memphis", "latitude": 35.1495, "longitude": -90.0490},
    {"city": "Louisville", "latitude": 38.2527, "longitude": -85.7585},
    {"city": "Baltimore", "latitude": 39.2904, "longitude": -76.6122},
    {"city": "Milwaukee", "latitude": 43.0389, "longitude": -87.9065},
    {"city": "Albuquerque", "latitude": 35.0844, "longitude": -106.6504},
    {"city": "Tucson", "latitude": 32.2226, "longitude": -110.9747},
    {"city": "Fresno", "latitude": 36.7372, "longitude": -119.7871},
    {"city": "Mesa", "latitude": 33.4152, "longitude": -111.8315},
    {"city": "Sacramento", "latitude": 38.5816, "longitude": -121.4944},
    {"city": "Atlanta", "latitude": 33.7490, "longitude": -84.3880},
    {"city": "Kansas City", "latitude": 39.0997, "longitude": -94.5786},
    {"city": "Colorado Springs", "latitude": 38.8339, "longitude": -104.8214},
    {"city": "Miami", "latitude": 25.7617, "longitude": -80.1918},
    {"city": "Raleigh", "latitude": 35.7796, "longitude": -78.6382},
    {"city": "Long Beach", "latitude": 33.7701, "longitude": -118.1937},
    {"city": "Virginia Beach", "latitude": 36.8529, "longitude": -75.9780},
    {"city": "Oakland", "latitude": 37.8044, "longitude": -122.2712},
    {"city": "Minneapolis", "latitude": 44.9778, "longitude": -93.2650},
    {"city": "Tampa", "latitude": 27.9506, "longitude": -82.4572},
    {"city": "Tulsa", "latitude": 36.1540, "longitude": -95.9928},
    {"city": "New Orleans", "latitude": 29.9511, "longitude": -90.0715},
    {"city": "Wichita", "latitude": 37.6872, "longitude": -97.3301},
    {"city": "Cleveland", "latitude": 41.4993, "longitude": -81.6944},
    {"city": "Bakersfield", "latitude": 35.3733, "longitude": -119.0187},
    {"city": "Arlington", "latitude": 32.7357, "longitude": -97.1081},
]


# Write the dataframe to an Excel file
excel_file = "cities_data.xlsx"
sheet_name = "cities"
df.to_excel(excel_file, sheet_name=sheet_name, index=False)