In [2]:
#!pip install googlemaps

In [3]:
import googlemaps
import pandas as pd
import time

In [24]:
# Replace with your own API Key
API_KEY = 'YOUR_API_KEY'
gmaps = googlemaps.Client(key=API_KEY)

In [15]:
houses_df = pd.read_csv("csv_file_path")

In [18]:
# Function to get the closest school and its rating
def get_closest_school_info(lat, lon, radius=15000, school_type="school"): # radius is in meters, so 2000 = 2 kilometers
    try:
        places_result = gmaps.places_nearby(location=(lat, lon), radius=radius, type=school_type)
        
        # If there are results, get the first (closest) school
        if places_result['results']:
            closest_school = places_result['results'][0]
            school_name = closest_school['name']
            school_rating = closest_school.get('rating', 'N/A')  # Default to 'N/A' if rating not available
            school_address = closest_school['vicinity']
            
            # Calculate distance to school
            distance_result = gmaps.distance_matrix(origins=(lat, lon), destinations=school_address, mode="driving")
            distance = distance_result['rows'][0]['elements'][0]['distance']['text']
            
            return school_name, school_rating, distance
        else:
            return "No nearby school", "N/A", "N/A"
    except Exception as e:
        print(f"Error: {e}")
        return "Error", "Error", "Error"

# Add columns for closest school information
houses_df['closest_school'] = ''
houses_df['school_rating'] = ''
houses_df['school_distance'] = ''

# Populate the columns for each row, only if 'closest_school' is empty or null
for index, row in houses_df.iterrows():
    if pd.isnull(row['closest_school']) or row['closest_school'] == '':
        # Proceed with API request if 'closest_school' is empty
        school_name, school_rating, distance = get_closest_school_info(row['latitude'], row['longitude'])
        houses_df.at[index, 'closest_school'] = school_name
        houses_df.at[index, 'school_rating'] = school_rating
        houses_df.at[index, 'school_distance'] = distance
    
    # Print progress every 500 records
    if (index + 1) % 500 == 0:
        print(f"{index + 1} records have been updated.")


Error: 'distance'
Error: 'distance'
Error: 'distance'
500 records have been updated.
Error: 'distance'
Error: 'distance'
1000 records have been updated.
1500 records have been updated.
2000 records have been updated.
2500 records have been updated.
Error: INVALID_REQUEST
3000 records have been updated.
3500 records have been updated.
4000 records have been updated.
4500 records have been updated.
5000 records have been updated.
5500 records have been updated.
6000 records have been updated.
6500 records have been updated.
7000 records have been updated.
7500 records have been updated.
8000 records have been updated.
8500 records have been updated.
Error: 'distance'
9000 records have been updated.
Error: 'distance'
9500 records have been updated.
Error: INVALID_REQUEST
10000 records have been updated.
10500 records have been updated.
11000 records have been updated.
11500 records have been updated.
12000 records have been updated.
12500 records have been updated.
13000 records have been

In [20]:
# Count records with null values in 'closest_school', 'school_rating', and 'school_distance'
null_closest_school_count = houses_df['closest_school'].isnull().sum() + (houses_df['closest_school'] == '').sum()
null_school_rating_count = houses_df['school_rating'].isnull().sum() + (houses_df['school_rating'] == 'N/A').sum()
null_school_distance_count = houses_df['school_distance'].isnull().sum() + (houses_df['school_distance'] == 'N/A').sum()

# For non-numeric values in 'school_rating' and 'school_distance'
non_numeric_school_rating_count = houses_df['school_rating'].apply(lambda x: not pd.to_numeric(x, errors='coerce')).sum()
non_numeric_school_distance_count = houses_df['school_distance'].apply(lambda x: not pd.to_numeric(str(x).replace(" km", "").replace(" mi", ""), errors='coerce')).sum()

print("Records with null or empty 'closest_school':", null_closest_school_count)
print("Records with null or 'N/A' 'school_rating':", null_school_rating_count)
print("Records with non-numeric 'school_rating':", non_numeric_school_rating_count)
print("Records with null or 'N/A' 'school_distance':", null_school_distance_count)
print("Records with non-numeric 'school_distance':", non_numeric_school_distance_count)


Records with null or empty 'closest_school': 0
Records with null or 'N/A' 'school_rating': 121
Records with non-numeric 'school_rating': 0
Records with null or 'N/A' 'school_distance': 9
Records with non-numeric 'school_distance': 0


In [22]:
# Save to csv after updates
csv_file_path = "csv_file_path"
houses_df.to_csv(csv_file_path, index=False)

# Adding Distance to Other Amenities

In [26]:
# Function to get distances for specific amenities
def get_nearby_distances(lat, lon, radius=15000):
    """
    Fetches the distances to the nearest restaurant, grocery store, park, and hospital.
    
    Parameters:
    - lat (float): Latitude of the location.
    - lon (float): Longitude of the location.
    - radius (int): Radius in meters for the search area.
    
    Returns:
    - dict: Dictionary containing distances for each amenity type.
    """
    amenity_types = {
        "restaurant": "restaurant_distance",
        "grocery_or_supermarket": "grocery_distance",
        "park": "park_distance",
        "hospital": "hospital_distance"
    }
    
    distances = {}
    
    for amenity_type, column_name in amenity_types.items():
        try:
            # Find the nearest place of the specified type
            places_result = gmaps.places_nearby(location=(lat, lon), radius=radius, type=amenity_type)
            if places_result['results']:
                # Get the address of the closest place
                place_address = places_result['results'][0]['vicinity']
                
                # Calculate driving distance to the closest place
                distance_result = gmaps.distance_matrix(origins=(lat, lon), destinations=place_address, mode="driving")
                if distance_result['rows'][0]['elements'][0].get('distance'):
                    distance = distance_result['rows'][0]['elements'][0]['distance']['text']
                else:
                    distance = "50"  # Set default distance if exact distance is unavailable
            else:
                distance = "50"  # Set high default distance if no places found within the radius
            
            distances[column_name] = distance
        
        except Exception as e:
            print(f"Error fetching {amenity_type} data: {e}")
            distances[column_name] = "50"  # Default distance for errors
    
    return distances

# Add columns for the distances to each amenity type
houses_df['restaurant_distance'] = ''
houses_df['grocery_distance'] = ''
houses_df['park_distance'] = ''
houses_df['hospital_distance'] = ''

# Populate the columns for each row
for index, row in houses_df.iterrows():
    if pd.isnull(row['restaurant_distance']) or row['restaurant_distance'] == '':
        distances = get_nearby_distances(row['latitude'], row['longitude'])
        
        # Update DataFrame columns for each amenity distance
        for column_name, distance in distances.items():
            houses_df.at[index, column_name] = distance
    
    # Print progress every 500 records
    if (index + 1) % 500 == 0:
        print(f"{index + 1} records have been updated.")



500 records have been updated.
1000 records have been updated.
1500 records have been updated.
2000 records have been updated.
2500 records have been updated.
Error fetching restaurant data: INVALID_REQUEST
Error fetching grocery_or_supermarket data: INVALID_REQUEST
Error fetching park data: INVALID_REQUEST
Error fetching hospital data: INVALID_REQUEST
3000 records have been updated.
3500 records have been updated.
4000 records have been updated.
4500 records have been updated.
5000 records have been updated.
5500 records have been updated.
6000 records have been updated.
6500 records have been updated.
7000 records have been updated.
7500 records have been updated.
8000 records have been updated.
8500 records have been updated.
Error fetching park data: INVALID_REQUEST
Error fetching park data: INVALID_REQUEST
9000 records have been updated.
9500 records have been updated.
Error fetching restaurant data: INVALID_REQUEST
Error fetching grocery_or_supermarket data: INVALID_REQUEST
Error

In [28]:
# List of columns to clean
distance_columns = ['school_distance', 'restaurant_distance', 'grocery_distance', 'park_distance', 'hospital_distance']

# Remove ' km' suffix from each specified distance column
for column in distance_columns:
    houses_df[column] = houses_df[column].str.replace(' km', '', regex=False)

# Display the first few rows to confirm the removal
print(houses_df[distance_columns].head())

  school_distance restaurant_distance grocery_distance park_distance  \
0             5.1                12.4               50           2.8   
1             0.2                 2.6              1.2           2.5   
2             5.4                 5.9              4.7           4.8   
3            16.0                15.1              1.6           4.1   
4            19.5                 3.7             12.2          19.2   

  hospital_distance  
0                50  
1               3.1  
2               6.3  
3               2.1  
4              18.7  


In [30]:
distance_amenities_file = 'csv_file_path'
houses_df.to_csv(distance_amenities_file, index=False)