In [None]:
# Install required libraries
!pip install polyline
!pip install requests

In [None]:
# Import all required libraries here.
import pandas as pd
import numpy as np
import requests
import polyline
import json
from datetime import datetime
from operator import itemgetter

In [None]:
# API Key:
# Note: When you add API keys here, please mention the function name that is using the APi next to this, so it is easier to track usage.
HERE_API_KEY = 'YOUR_HERE_API_KEY' # [latlng_to_city, city_to_latlng] (Prer API Key)
HERE_APP_ID = 'YOUR_HERE_API_ID'

GOOGLE_API_KEY = 'YOUR_GOOGLE_API_KEY' # [get_routes] (Srishti API Key)
OPEN_WEATHER_API_KEY = 'YOUR_OPEN_WEATHER_API_KEY'  # [get_weather_data] (Rashi API Key)
TOMTOM_KEY='YOUR_TOMTOM_API_KEY' #[get_traffic_data]

In [None]:
#Helper Functions:

# Used HERE API key to get geocoding and reverse geocoding APIs.
# Given latitude and longitude, return the city name.
def latlng_to_city(latitude, longitude):
    url = f"https://revgeocode.search.hereapi.com/v1/revgeocode?at={latitude},{longitude}&apiKey={HERE_API_KEY}"
    response = requests.get(url)
    if response.status_code == 200:
        results = response.json().get('items', [])
        if results:
          return results[0]['address']['city']
        else:
          return "No results found"
    else:
      print('REV_GEO', response.status_code)
      return "Error in API call"

# Given cityname, get the latitude and longitude for the city.
def city_to_latlng(city_name):
    url = f"https://geocode.search.hereapi.com/v1/geocode?q={city_name}&apiKey={HERE_API_KEY}"
    response = requests.get(url)
    if response.status_code == 200:
        results = response.json().get('items', [])
        if results:
          location = results[0].get('position', {})
          return location.get('lat'), location.get('lng')
        else:
          return "No results found"
    else:
      return "Error in API call"

# Haversine Formula to get nearest city
def haversine(lat1, lon1, lat2, lon2):
    # Convert decimal degrees to radians
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])

    # Haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    r = 6371 # Radius of earth in kilometers
    return c * r

# Find Nearest City to the get the latitude
def find_nearest_city(lat, lon, cities_df):
    distances = cities_df.apply(lambda row: haversine(lat, lon, row['lat'], row['lng']), axis=1)
    return cities_df.loc[distances.idxmin()]

# Decode Polyline function - From polyline string
def decode_polyline(polyline_str):
    decoded_coords = polyline.decode(polyline_str)
    return decoded_coords

    start = f"{sourceLat},{sourceLong}"  # Example: LA coordinates (latitude,longitude)
    destination = f"{destLat},{destLong}"  # Example: NY coordinates (latitude,longitude)

In [None]:
#Get Routes API: Use google maps directions api to get multiple routes for two points:
def get_routes(start, destination):

  # Directions API to get routes between start and destination
  url = f'https://maps.googleapis.com/maps/api/directions/json?origin={start}&destination={destination}&key={GOOGLE_API_KEY}&alternatives=true'
  headers = {'Content-type': 'application/json'}

  print("Using Google Directions API to get routes between start and destination")

  r = requests.get(url, headers=headers)
  if r.status_code != 200:
    print("Error in Directions API call")
    return
  else:
    route_json = r.json()
    num_routes = len(route_json['routes'])
    print(f"Received response from Directions API and found {num_routes} between start and destination")

  return route_json

In [None]:
# Get weather data for each latitude and longitude:
def get_weatherData_for_latlng(lat, lng):
  city_name = latlng_to_city(lat, lng)
  country_code = 'US'
  params = {
    "q": f"{city_name},{country_code}",
    "appid": OPEN_WEATHER_API_KEY, #Used OpenWeatherApiKey to get city Id
  }

  #This url is to get city information from weather map
  base_url = "http://api.openweathermap.org/data/2.5/weather"
  response = requests.get(base_url, params=params)
  weather_data = response.json()
  city_id  = None

  if response.status_code == 200:
    city_id = weather_data['id']
  else:
    print('Error in Retrieving city_id, skipping the code to get weather data')
    city_id = None
    return None

  #The below API (free) uses City_Id as Input to get the weather forecast, hence we need to find City Id first and then the Weather forecast.
  if city_id is not None:
    forecast_url = "http://api.openweathermap.org/data/2.5/forecast"
    params = {
      "id": city_id,
      "appid": OPEN_WEATHER_API_KEY,
      "units": "metric"
    }
    response = requests.get(forecast_url, params=params)
    weather_data = response.json()
    if response.status_code == 200:
      return weather_data
    else:
      print(f"Error: {response.status_code}")
      return None
  else:
    return None

In [None]:
# Function to get safety score for each city using US Crime Data dataset:
def preprocess_safetyScore_data():
  #Upload csv files from Google Drive
  from google.colab import drive
  drive.mount('/content/drive')

  data = pd.read_csv('/content/drive/MyDrive/US_Crime_Data.csv')
  print(data.head(1))

  #Data Processing
  columns_to_drop = ['Title', 'Organization', 'URL','Keyword','Summary']
  data.drop(columns=columns_to_drop, inplace=True)
  data.dropna(subset=['City', 'State'], inplace=True)

  data.to_csv('preprocessed_crime_data.csv', index=False)

  # Load data
  df = pd.read_csv('/content/preprocessed_crime_data.csv')

  # Group by city and state
  grouped = df.groupby(['City', 'State'])

  # Count records for each city + state
  crime_counts = grouped.size().reset_index(name='crime_count')

  # Calculate safety score
  # Higher score = Safer city
  crime_counts['safety_score'] = 1 / (crime_counts['crime_count'] + 1)

  # Analyze safety scores
  print(crime_counts.sort_values('safety_score', ascending=False))
  return crime_counts

In [None]:
def get_safetyScore_for_latlng(lat, lng):
    # Get the city name for the given latitude and longitude
    city_name = latlng_to_city(lat, lng)

    # Load the cleaned crime data
    cleanedCrimeDataDf = pd.read_csv('/content/drive/MyDrive/cleaned_crime_data_ss.csv')

    # Filter to find the matching city and state
    is_present = cleanedCrimeDataDf[(cleanedCrimeDataDf['City'] == city_name)]

    if not is_present.empty:
        print(f'Retrieved safety score for City: {city_name}')

        # Get the value of SafetyScore column in the first matching row
        safety_score = is_present.iloc[0]['safety_score_x']

        return safety_score
    else:
        print(f'City not found for {city_name}. Skipping Safety score information')
        return 0

In [None]:
#Get Traffic Data - Using Tom Tom API
def calculate_central_point(points):
  avg_lat = sum(p[0] for p in points) / len(points)
  avg_lng = sum(p[1] for p in points) / len(points)
  return avg_lat, avg_lng

def get_traffic_data(lineSegmentStart, lineSegmentEnd):
  #Create list of segment points
  segment_points = [lineSegmentStart, lineSegmentEnd]

  #Calculating central point to get Traffic Data.
  central_lat, central_lng = calculate_central_point(segment_points)

  # Get traffic data for the central point
  traffic_url = f'https://api.tomtom.com/traffic/services/4/flowSegmentData/absolute/10/json?point={central_lat}%2C{central_lng}&key={TOMTOM_KEY}'
  response = requests.get(traffic_url)
  if response.status_code == 200:
    return response.json()
  else:
    print(f"Failed to get traffic data for {central_lat}, {central_lng}, Skipping traffic information")
    return None

In [None]:
def get_placetype_info(lat, lng, place_type):
  # Set the radius in meters
  radius = 2500

  # Set the URL for the Places API request
  places_url = f"https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={lat},{lng}&radius={radius}&type={place_type}&key={GOOGLE_API_KEY}"

  # Make the API request
  response = requests.get(places_url)

  # Process the response
  if response.status_code == 200:
    places_output = response.json().get('results')
    return places_output
  else:
    print(f"Failed to get places data for {lat}, {lng}, Skipping places information")
    return None

def get_places_data(lat, lng):
  # Info to be returned for different place types
  placeTypes = ['gas_stations', 'rest_stop', 'restaurant']
  places_data = {}

  # Get places data for each place type
  for idx, placeType in enumerate(placeTypes):
    places_output = get_placetype_info(lat, lng, placeType)
    if places_output is not None:
      if idx == 0:
        places_data['Gas Stations'] = len(places_output)
      elif idx == 1:
        places_data['Rest Stops'] = len(places_output)
      else:
        places_data['Restaurants'] = len(places_output)

  return places_data

In [None]:
def parse_route_data(route_json):
  num_routes = len(route_json['routes'])
  routeData = {}  # Dictionary to hold data for all routes

  # Iterate over each route
  for i in range(num_routes):
    route_info = {}  # Dictionary to hold data for the current route
    route_polyline = route_json['routes'][i]['overview_polyline']['points']
    decoded_coords = decode_polyline(route_polyline)
    route_info['lat_lng'] = decoded_coords

    # Assuming each route has only one leg, as is common with Google Directions API
    leg = route_json['routes'][i]['legs'][0]

    # Extract distance and duration from the leg
    distance_text = leg['distance']['text']
    duration_text = leg['duration']['text']

    route_info['distance'] = distance_text
    route_info['duration'] = duration_text

    routeData[f"Route{i}"] = route_info  # Add the current route info to the main dictionary

  return routeData

In [None]:
def parse_weather_data(weather_response):
  new_dict = {}

  # Extract the forecast list
  forecast_list = weather_response['list']

  # Get the current timestamp
  current_timestamp = datetime.utcnow().timestamp()

  # Filter the forecast data for the next 24 hours
  next_24_hours_forecast = [forecast for forecast in forecast_list if forecast['dt'] <= current_timestamp + 24 * 3600]

  # Initialize variables for calculating averages
  count = len(next_24_hours_forecast)
  total_main = total_speed = total_deg = total_all = total_feels_like = total_temp_min = total_temp_max = total_pressure = total_humidity = 0

  # Calculate the sum of each parameter
  for forecast in next_24_hours_forecast:
    total_main += forecast['main']['temp']
    total_speed += forecast['wind']['speed']
    total_deg += forecast['wind']['deg']
    total_all += forecast['clouds']['all']
    total_feels_like += forecast['main']['feels_like']
    total_temp_min += forecast['main']['temp_min']
    total_temp_max += forecast['main']['temp_max']
    total_pressure += forecast['main']['pressure']
    total_humidity += forecast['main']['humidity']

  # Calculate the averages
  avg_main = total_main / count
  avg_speed = total_speed / count
  avg_deg = total_deg / count
  avg_all = total_all / count
  avg_feels_like = total_feels_like / count
  avg_temp_min = total_temp_min / count
  avg_temp_max = total_temp_max / count
  avg_pressure = total_pressure / count
  avg_humidity = total_humidity / count

  # Initialize variables for calculating averages
  count = len(next_24_hours_forecast)
  main_counts = {}  # Dictionary to store the counts for each 'main' weather condition

  # Count the occurrences of each 'main' weather condition
  for forecast in next_24_hours_forecast:
    main_condition = forecast['weather'][0]['main']
    main_counts[main_condition] = main_counts.get(main_condition, 0) + 1

  # Calculate the averages
  # For 'main', we can identify the most common 'main' weather condition as the average
  most_common_main = max(main_counts, key=main_counts.get)
  average_main = most_common_main

  new_dict['Average Temperature'] = avg_main
  new_dict['Average Wind Speed'] = avg_speed
  new_dict['Average Wind Direction'] = avg_deg
  new_dict['Average Cloud Coverage'] = avg_all
  new_dict['Average Feels Like Temperature'] = avg_feels_like
  new_dict['Average Minimum Temperature'] = avg_temp_min
  new_dict['Average Maximum Temperature'] = avg_temp_max
  new_dict['Average Pressure'] = avg_pressure
  new_dict['Average Humidity'] = avg_humidity
  new_dict['Average Main Weather Condition'] = average_main

  return new_dict

In [None]:
def convert_route_data_to_df(routeData):
    flattened_data = []

    for route_id, data in routeData.items():
        for lat_lng in data['lat_lng']:
            # Create a dictionary for each latitude and longitude pair
            row = {'RouteId': route_id, 'Latitude': lat_lng[0], 'Longitude': lat_lng[1]}
            flattened_data.append(row)

    # Create a DataFrame from the flattened data
    df = pd.DataFrame(flattened_data)
    return df

In [None]:
source_lat, source_lng = city_to_latlng('New York')
destination_lat, destination_lng = city_to_latlng('Los Angeles')



```
Code to start building the dataframe
```



In [None]:
start = f"{source_lat},{source_lng}"
destination = f"{destination_lat},{destination_lng}"

# Get Route information from start and destination
route_json_response = get_routes(start, destination)

# Parse Route information
routeData = parse_route_data(route_json_response)

# Convert routeData to DataFrame
route_df = convert_route_data_to_df(routeData)

# Display the DataFrame
print(route_df)

Using Google Directions API to get routes between start and destination
Received response from Directions API and found 3 between start and destination
    RouteId  Latitude  Longitude
0    Route0  40.71455  -74.00710
1    Route0  40.75255  -74.19000
2    Route0  40.84079  -74.33961
3    Route0  40.90603  -74.49748
4    Route0  40.91235  -74.75774
..      ...       ...        ...
577  Route2  34.11959 -117.75316
578  Route2  34.12311 -117.96775
579  Route2  34.06454 -118.01102
580  Route2  34.07125 -118.14618
581  Route2  34.05358 -118.24544

[582 rows x 3 columns]


In [None]:
#Convert Weather Data into Dataframe
for index, row in route_df.iterrows():
  # Get latitude and longitude from the current row
  current_lat, current_lng = row['Latitude'], row['Longitude']

  # Get weather data for the current latitude and longitude
  weather_response = get_weatherData_for_latlng(current_lat, current_lng)

  if weather_response is None:
    route_df.at[index, 'Average_Temperature'] = 0
    route_df.at[index, 'Average_Wind_Speed'] = 0
    route_df.at[index, 'Average_Wind_Direction'] = 0
    route_df.at[index, 'Average_Cloud_Coverage'] = 0
    route_df.at[index, 'Average_Feels_Like_Temperature'] = 0
    route_df.at[index, 'Average_Min_Temperature'] = 0
    route_df.at[index, 'Average_Max_Temperature'] = 0
    route_df.at[index, 'Average_Pressure'] = 0
    route_df.at[index, 'Average_Humidity'] = 0
    route_df.at[index, 'Average_Main_Weather_Condition'] = None
    continue

  # Parse the weather data
  weather_data = parse_weather_data(weather_response)

  # Update the8l route_df with the weather data for the current row
  if weather_data is not None:
    route_df.at[index, 'Average_Temperature'] = weather_data['Average Temperature']
    route_df.at[index, 'Average_Wind_Speed'] = weather_data['Average Wind Speed']
    route_df.at[index, 'Average_Wind_Direction'] = weather_data['Average Wind Direction']
    route_df.at[index, 'Average_Cloud_Coverage'] = weather_data['Average Cloud Coverage']
    route_df.at[index, 'Average_Feels_Like_Temperature'] = weather_data['Average Feels Like Temperature']
    route_df.at[index, 'Average_Min_Temperature'] = weather_data['Average Minimum Temperature']
    route_df.at[index, 'Average_Max_Temperature'] = weather_data['Average Maximum Temperature']
    route_df.at[index, 'Average_Pressure'] = weather_data['Average Pressure']
    route_df.at[index, 'Average_Humidity'] = weather_data['Average Humidity']
    route_df.at[index, 'Average_Main_Weather_Condition'] = weather_data['Average Main Weather Condition']
  else:
    route_df.at[index, 'Average_Temperature'] = 0
    route_df.at[index, 'Average_Wind_Speed'] = 0
    route_df.at[index, 'Average_Wind_Direction'] = 0
    route_df.at[index, 'Average_Cloud_Coverage'] = 0
    route_df.at[index, 'Average_Feels_Like_Temperature'] = 0
    route_df.at[index, 'Average_Min_Temperature'] = 0
    route_df.at[index, 'Average_Max_Temperature'] = 0
    route_df.at[index, 'Average_Pressure'] = 0
    route_df.at[index, 'Average_Humidity'] = 0
    route_df.at[index, 'Average_Main_Weather_Condition'] = None

In [None]:
# Iterate through each pair of consecutive rows in route_df
for i in range(len(route_df) - 1):
  # Get latitude and longitude from the current and next rows
  start_lat, start_lng = route_df.at[i, 'Latitude'], route_df.at[i, 'Longitude']
  end_lat, end_lng = route_df.at[i + 1, 'Latitude'], route_df.at[i + 1, 'Longitude']

  start  = (start_lat, start_lng)
  end = (end_lat, end_lng)

  # Get traffic data for the current pair of coordinates
  traffic_data_response = get_traffic_data(start, end)

  if traffic_data_response is None:
    route_df.at[i, 'FRC_Traffic'] = 0
    route_df.at[i, 'Current_Speed_Traffic'] = 0
    route_df.at[i, 'Free_Flow_Speed_Traffic'] = 0
    route_df.at[i, 'Current_Travel_Time_Traffic'] = 0
    route_df.at[i, 'Road_Closure_Traffic'] = None
    continue

  # Update the route_df with the traffic data for the current row
  if traffic_data_response is not None:
    route_df.at[i, 'FRC_Traffic'] = traffic_data_response['flowSegmentData']['frc']
    route_df.at[i, 'Current_Speed_Traffic'] = traffic_data_response['flowSegmentData']['currentSpeed']
    route_df.at[i, 'Free_Flow_Speed_Traffic'] = traffic_data_response['flowSegmentData']['freeFlowSpeed']
    route_df.at[i, 'Current_Travel_Time_Traffic'] = traffic_data_response['flowSegmentData']['currentTravelTime']
    route_df.at[i, 'Road_Closure_Traffic'] = traffic_data_response['flowSegmentData']['roadClosure']
  else:
    route_df.at[i, 'FRC_Traffic'] = 0
    route_df.at[i, 'Current_Speed_Traffic'] = 0
    route_df.at[i, 'Free_Flow_Speed_Traffic'] = 0
    route_df.at[i, 'Current_Travel_Time_Traffic'] = 0
    route_df.at[i, 'Road_Closure_Traffic'] = None

# For the last row (no consecutive row after it)
i = len(route_df) - 1
last_row = route_df.iloc[len(route_df) - 2]

route_df.at[i, 'FRC_Traffic'] = last_row['FRC_Traffic']
route_df.at[i, 'Current_Speed_Traffic'] = last_row['Current_Speed_Traffic']
route_df.at[i, 'Free_Flow_Speed_Traffic'] =  last_row['Free_Flow_Speed_Traffic']
route_df.at[i, 'Current_Travel_Time_Traffic'] =  last_row['Current_Travel_Time_Traffic']
route_df.at[i, 'Road_Closure_Traffic'] = last_row['Road_Closure_Traffic']

In [None]:
route_df

Unnamed: 0,RouteId,Latitude,Longitude,Average_Temperature,Average_Wind_Speed,Average_Wind_Direction,Average_Cloud_Coverage,Average_Feels_Like_Temperature,Average_Min_Temperature,Average_Max_Temperature,Average_Pressure,Average_Humidity,Average_Main_Weather_Condition,FRC_Traffic,Current_Speed_Traffic,Free_Flow_Speed_Traffic,Current_Travel_Time_Traffic,Road_Closure_Traffic
0,Route0,40.71455,-74.00710,8.73625,4.00750,248.500,80.875,6.62875,8.68000,8.73625,1006.875,75.625,Clouds,FRC1,87.0,87.0,216.0,False
1,Route0,40.75255,-74.19000,8.60750,3.54375,246.875,80.750,6.67125,8.53625,8.60750,1006.625,75.375,Clouds,FRC4,36.0,36.0,47.0,False
2,Route0,40.84079,-74.33961,7.92375,3.24625,240.625,80.750,5.96375,7.84375,7.92375,1006.750,79.250,Clouds,FRC1,79.0,79.0,22.0,False
3,Route0,40.90603,-74.49748,6.39000,3.26500,236.500,82.000,4.12750,6.28000,6.41125,1007.000,81.000,Clouds,FRC5,56.0,56.0,826.0,False
4,Route0,40.91235,-74.75774,5.90375,3.12500,235.375,84.125,3.73125,5.79125,5.92000,1006.625,83.125,Clouds,FRC0,115.0,115.0,395.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
577,Route2,34.11959,-117.75316,17.80000,1.31125,122.875,67.375,16.65750,17.35875,17.80000,1019.625,39.125,Clouds,FRC4,57.0,57.0,69.0,False
578,Route2,34.12311,-117.96775,18.87625,1.02625,220.500,68.125,17.89375,18.49000,18.90000,1019.500,41.125,Clouds,FRC0,116.0,116.0,69.0,False
579,Route2,34.06454,-118.01102,18.76500,0.96000,222.125,76.375,17.78875,18.41750,18.81375,1019.500,41.875,Clouds,FRC4,43.0,43.0,106.0,False
580,Route2,34.07125,-118.14618,18.24625,1.18000,235.375,67.375,17.20750,17.89375,18.30375,1019.500,41.375,Clouds,FRC4,54.0,54.0,166.0,False


In [None]:
#Convert Weather Data into Dataframe
for index, row in route_df.iterrows():
  # Get latitude and longitude from the current row
  current_lat, current_lng = row['Latitude'], row['Longitude']

  # Get weather data for the current latitude and longitude
  rest_stop_info = get_places_data(current_lat, current_lng)

  # Update the route_df with the weather data for the current row
  if rest_stop_info is not None:
    route_df.at[index, 'Gas_Stations'] = rest_stop_info['Gas Stations']
    route_df.at[index, 'Rest_Stops'] = rest_stop_info['Rest Stops']
    route_df.at[index, 'Restaurants'] = rest_stop_info['Restaurants']
  else:
    route_df.at[index, 'Gas_Stations'] = 0
    route_df.at[index, 'Rest_Stops'] = 0
    route_df.at[index, 'Restaurants'] = 0

In [None]:
#Convert Safety Score Data into Dataframe
for index, row in route_df.iterrows():
  #Get Safety score for nearest city
  saf_score = get_safetyScore_for_latlng(row['Latitude'], row['Longitude'])

  #Append safety score for route_df
  route_df.at[index, 'Safety_Score'] = saf_score

In [None]:
route_df.to_csv('/content/drive/MyDrive/route.csv')