<a href="https://colab.research.google.com/github/swordman08/HousingCA/blob/main/API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import pandas as pd

# Function to get city distance (transit distance was something we didnt have resources for)
def get_transit_distance(api_key, origin_lat, origin_lng, destination_city):
    # Check if latitude or longitude is missing
    if pd.isnull(origin_lat) or pd.isnull(origin_lng):
        return 0, 0  # Return 0 if there's missing latitude or longitude

    origin = f"{origin_lat},{origin_lng}"
    destination = f"{destination_city}, CA"
    url = f"https://maps.googleapis.com/maps/api/distancematrix/json?units=metric&origins={origin}&destinations={destination}&mode=transit&key={api_key}"

    response = requests.get(url)
    result = response.json()

    if result['rows'][0]['elements'][0]['status'] == 'OK':
        distance = result['rows'][0]['elements'][0]['distance']['value']  # distance in meters
        duration = result['rows'][0]['elements'][0]['duration']['value']  # time in seconds that we dont end up using
        return distance, duration
    else:
        return 0, 0  # Return 0 if the API doesn't return a valid result

# Sample dataset loading
df = pd.read_excel("lotwize_case.xlsx")  # Load your dataset, modify if it's in a different format

# Google API Key
api_key = "TookKeyOut"  # API key

# empty list to store the results
results2 = []

# Iterate through the dataset and fetch distance
for index, row in df.iterrows():
    origin_lat = row['latitude']
    origin_lng = row['longitude']
    destination_city = row['address/city']

    # Call the function to get transit distance and duration
    distance, duration = get_transit_distance(api_key, origin_lat, origin_lng, destination_city)

    # Append the result (including city info) to the results list
    results2.append({
        "city": destination_city,
        "latitude": origin_lat,
        "longitude": origin_lng,
        "distance_meters": distance,
        "duration_seconds": duration
    })

# Convert the results list into a DataFrame, ensuring the order is preserved
result_df = pd.DataFrame(results2)


IndexError: list index out of range

In [None]:
result_df.to_csv('DistanceToCityApi.csv', index=False)

In [None]:
import requests
from math import radians, cos, sin, sqrt, atan2

def haversine(lat1, lon1, lat2, lon2):
    # Radius of the Earth in kilometers
    R = 6371.0

    # Convert latitude and longitude from degrees to radians
    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)

    # Difference in coordinates
    dlat = lat2 - lat1
    dlon = lon2 - lon1

    # Haversine formula
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    # Distance in kilometers
    distance = R * c
    return distance

def get_nearest_park(api_key, lat, lon):
    if pd.isnull(origin_lat) or pd.isnull(origin_lng):
        return 0, 0
    url = f"https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={lat},{lon}&radius=2000&type=park&key={api_key}"

    response = requests.get(url)
    result = response.json()

    if result['results']:
        nearest_park = result['results'][0]['name']
        park_lat = result['results'][0]['geometry']['location']['lat']
        park_lon = result['results'][0]['geometry']['location']['lng']
        # Calculate the distance
        distance = haversine(lat, lon, park_lat, park_lon)
        return nearest_park, distance
    else:
        return None, None


api_key = "TookKeyOut"

DistanceToParks = []

for index, row in df.iterrows():
    origin_lat = row['latitude']
    origin_lng = row['longitude']
    destination_city = row['address/city']

    # Call the function to get nearest park distance.
    nearest_park, distance = get_nearest_park(api_key, origin_lat, origin_lng)

    # Append the result (including city info) to the results list
    DistanceToParks.append({
        "latitude": origin_lat,
        "longitude": origin_lng,
        "distance_kilometers": distance,
    })

# Convert the results list into a DataFrame, ensuring the order is preserved
distanceToParks_df = pd.DataFrame(DistanceToParks)





In [None]:
distanceToParks_df.to_csv('DistanceToParksApi.csv', index=False)

In [None]:
import requests
import pandas as pd
import math

# Google Places API Key
API_KEY = 'TookKeyOut!'

# Function to get nearby malls using latitude and longitude
def get_nearby_malls(lat, lng, radius=5000):
    endpoint_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"

    # Parameters for the request
    params = {
        'location': f'{lat},{lng}',
        'radius': radius,  # Radius in meters (5km here)
        'type': 'shopping_mall',  # Looking for malls
        'key': API_KEY
    }

    # Make the request to Google Places API
    response = requests.get(endpoint_url, params=params)

    # Parse the JSON response
    results = response.json().get('results', [])

    # Return a list of malls and their locations
    malls = []
    for place in results:
        mall_info = {
            'name': place['name'],
            'address': place.get('vicinity', ''),
            'latitude': place['geometry']['location']['lat'],
            'longitude': place['geometry']['location']['lng']
        }
        malls.append(mall_info)

    return malls

# Haversine formula to calculate the distance between two latitude/longitude points
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Earth radius in kilometers

    # Convert degrees to radians
    lat1_rad = math.radians(lat1)
    lon1_rad = math.radians(lon1)
    lat2_rad = math.radians(lat2)
    lon2_rad = math.radians(lon2)

    # Differences in latitudes and longitudes
    dlat = lat2_rad - lat1_rad
    dlon = lon2_rad - lon1_rad

    # Haversine formula
    a = math.sin(dlat / 2)**2 + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    # Distance in kilometers
    distance = R * c
    return distance

# Load the dataset
df = pd.read_excel('lotwize_case.xlsx')

# Initialize a list to store distances
mall_distances = []

# Loop through the dataset and calculate distances
for index, row in df.iterrows():
    latitude = row.get('latitude')
    longitude = row.get('longitude')

    if pd.notna(latitude) and pd.notna(longitude):
        # Find the nearest mall
        malls = get_nearby_malls(latitude, longitude)

        # If there are malls, calculate distance to the nearest one
        if malls:
            nearest_mall = malls[0]  # Assume first mall is the nearest
            mall_lat = nearest_mall['latitude']
            mall_lng = nearest_mall['longitude']
            distance = haversine(latitude, longitude, mall_lat, mall_lng)
        else:
            distance = None  # No malls found
    else:
        distance = None  # Missing latitude/longitude

    # Append the distance (or None) to the list, keeping the order
    mall_distances.append(distance)

# Add the distances to the DataFrame
mall_df =  pd.DataFrame(mall_distances)



# Return a preview of the updated DataFrame
df.head()


OSError: Cannot save file into a non-existent directory: '/mnt/data'

In [None]:
mall_df.to_csv('DistanceToMalls.csv', index=False)

In [None]:
import pandas as pd
# didnt end up using this
df = pd.read_excel('lotwize_case.xlsx')
unique_zipcodes = df['address/zipcode'].unique()

# Convert the unique zip codes into a DataFrame
zipcodes_df = pd.DataFrame(unique_zipcodes, columns=['ZipCode'])

# Save the unique zip codes to a CSV or Excel file if needed
zipcodes_df.to_csv('unique_zipcodes.csv', index=False)

# Show the first few rows of the DataFrame
zipcodes_df.head()

Unnamed: 0,ZipCode
0,94107
1,90278
2,90046
3,92867
4,92663
