# Distance to City Variable Generation:

In this notebook a new exogenous variable is generated: Distance to the nearest city (in km). To do so, we rely on the Nominatim API

#### Loading necessary libraries

In [None]:
import requests
from math import radians, sin, cos, sqrt, atan2
import pandas as pd

#### Loading auxiliary data

In [2]:
df_coord = pd.read_csv('auxiliary_data/gw_coordinates_df.csv')

### Defining necessary functions

In [1]:
def haversine(lat1, lon1, lat2, lon2):
    """
    Calculate the great-circle distance between two points on the Earth using the Haversine formula.

    Args:
        lat1 (float): Latitude of the first point in decimal degrees.
        lon1 (float): Longitude of the first point in decimal degrees.
        lat2 (float): Latitude of the second point in decimal degrees.
        lon2 (float): Longitude of the second point in decimal degrees.

    Returns:
        float: Distance between the two points in kilometers.
    """
    R = 6371.0  # Radius of the Earth in kilometers
    dlat = radians(lat2 - lat1)  # Difference in latitudes converted to radians
    dlon = radians(lon2 - lon1)  # Difference in longitudes converted to radians

    # Haversine formula to calculate the distance
    a = sin(dlat / 2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    # Calculate the distance
    distance = R * c
    return distance


def get_nearest_city(lat, lon):
    """
    Find the nearest city to a given set of coordinates using the Nominatim API.

    Args:
        lat (float): Latitude of the point.
        lon (float): Longitude of the point.

    Returns:
        tuple: A tuple containing the name of the nearest city and the distance to it (in kilometers).
               If no city is found, returns ("Desconocido", None).
    """
    url = "https://nominatim.openstreetmap.org/reverse"
    params = {
        "lat": lat,
        "lon": lon,
        "format": "json",
        "zoom": 10,  # Starting zoom level
        "addressdetails": 1
    }

    headers = {
        "User-Agent": "MyUniqueApp/1.0 (contacto@miapp.com)"  # Custom user agent for the API request
    }

    # Try different zoom levels to find a nearby city (from zoom level 10 to 12)
    for zoom in range(10, 13):
        params["zoom"] = zoom
        response = requests.get(url, params=params, headers=headers)

        # Check if the request was successful
        if response.status_code == 200:
            try:
                data = response.json()  # Parse the response data as JSON
                nearest_city = data.get("address", {}).get("city") or \
                               data.get("address", {}).get("town") or "Desconocido"
                nearest_lat = float(data.get("lat", 0))
                nearest_lon = float(data.get("lon", 0))

                # Calculate the distance to the nearest city using the haversine formula
                distance_to_city = haversine(lat, lon, nearest_lat, nearest_lon)
                return nearest_city, distance_to_city

            except ValueError:
                continue  # If the JSON parsing fails, continue with the next zoom level

    # If no city is found, return "Desconocido" and None as the distance
    return "Desconocido", None

### Getting the distance to the nearest city for each set of coordinates

In [3]:
df_coord['distance_to_city_km'] = df_coord.apply(lambda row: get_nearest_city(row['latitude'], row['longitude'])[1], axis=1)

In [4]:
df_coord.head()

Unnamed: 0,id_loc,latitude,longitude,distance_to_city_km
0,324095,48.310278,14.3075,1.648698
1,323295,48.330278,14.302778,2.974244
2,323154,48.283056,14.349444,1.705724
3,304535,48.306111,16.872222,2.338073
4,326934,47.915833,16.289167,1.457035


### Saving the resulting data

In [6]:
df_coord.to_csv('dist_to_city_full.csv', index = False)