In [None]:
import os
import csv
import requests
from bs4 import BeautifulSoup
import re
import time

# Function to get latitude and longitude from the MapQuest tool
def get_lat_lon(street, city, state, postal_code):
    try:
        url = "https://www.findlatitudeandlongitude.com/"
        data = {
            'street': street,
            'city': city,
            'state': state,
            'postalCode': postal_code,
            'country': 'US'
        }
        response = requests.post(url, data=data)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.content, 'html.parser')
        lat_lon_div = soup.find('div', class_='mapTitle')
        
        if lat_lon_div:
            lat_lon_text = lat_lon_div.get_text(strip=True)
            lat_match = re.search(r'Latitude:\s*([-\d.]+)', lat_lon_text)
            lon_match = re.search(r'Longitude:\s*([-\d.]+)', lat_lon_text)
            
            if lat_match and lon_match:
                lat = lat_match.group(1)
                lon = lon_match.group(1)
                return lat, lon
        return None, None
    except Exception as e:
        print(f"Error fetching coordinates for {street}, {city}, {state} {postal_code}: {e}")
        return None, None

# Function to cross-reference and update geocoded files
def update_geocoded_files(geocoding_errors_log, directory):
    with open(geocoding_errors_log, 'r') as infile:
        # Loop through geocoding errors and extract addresses
        for line in infile:
            if "Address returned None" in line:
                # Extract and clean the address
                address = line.replace("Address returned None:", "").strip()
                parts = address.split(',')
                
                if len(parts) >= 4:
                    street = parts[0].strip()
                    city = parts[1].strip()
                    state_zip = parts[2].strip().split(' ')
                    state = state_zip[0]
                    postal_code = state_zip[1] if len(state_zip) > 1 else ""
                    
                    # Fetch latitude and longitude
                    print(f"Fetching coordinates for: {street}, {city}, {state} {postal_code}")
                    lat, lon = get_lat_lon(street, city, state, postal_code)
                    
                    # If successful, update the matching entries in geocoded files
                    if lat and lon:
                        print(f"Updating coordinates for: {street}, {city}, {state} {postal_code}")
                        update_files(directory, street, city, state, postal_code, lat, lon)
                    
                    # Sleep to avoid overwhelming the website
                    time.sleep(2)

# Function to update latitude and longitude in geocoded files
def update_files(directory, street, city, state, postal_code, lat, lon):
    for filename in os.listdir(directory):
        if filename.startswith("geocoded") and filename.endswith(".csv"):
            filepath = os.path.join(directory, filename)
            temp_filepath = os.path.join(directory, f"temp_{filename}")
            
            with open(filepath, mode='r', newline='') as infile, open(temp_filepath, mode='w', newline='') as outfile:
                reader = csv.DictReader(infile)
                fieldnames = reader.fieldnames
                writer = csv.DictWriter(outfile, fieldnames=fieldnames)
                
                # Write headers
                writer.writeheader()
                
                for row in reader:
                    # Check if the address matches the address that failed before
                    if (row['Address'].strip() == street and
                        row['City'].strip() == city and
                        row['State'].strip() == state and
                        row['ZIP'].strip() == postal_code):
                        # Update latitude and longitude
                        row['Latitude'] = lat
                        row['Longitude'] = lon
                    
                    # Write the updated or unchanged row to the new file
                    writer.writerow(row)
            
            # Replace the original file with the updated file
            os.replace(temp_filepath, filepath)

if __name__ == "__main__":
    # Path to the geocoding errors log
    geocoding_errors_log = 'geocoding_errors.log'
    
    # Directory containing geocoded files
    directory = 'Crime2023EXCEL'
    
    # Run the process to update the files
    update_geocoded_files(geocoding_errors_log, directory)
