In [65]:
from geopy import distance
import pandas as pd

In [66]:
# Read zip code registry file as data frame.
df = pd.read_csv("random_zip_codes_with_coordinates.csv")

In [67]:
# Are coordinates umeric?
# Yupp.
df.dtypes

zip                int64
quantity           int64
av. longitude    float64
av. latitude     float64
dtype: object

In [68]:
df

Unnamed: 0,zip,quantity,av. longitude,av. latitude
0,15859,1,13.894806,52.227569
1,21279,2,9.748,53.359325
2,22965,1,10.3478,53.6946
3,35279,7,9.1167,50.85
4,37345,6,10.434645,51.518855
5,55122,1,8.237,49.995
6,56769,1,7.004175,50.27915
7,76698,1,8.6316,49.163
8,83224,1,12.46845,47.78215
9,94336,1,12.73975,48.94645


In [69]:
# Function to determine the distance between user zip code and our two branches.
def check_distance_to_libraries(i):
    # Coordinates of branches Sankt Augustin and Rheinbach.
    coords_sta = (50.775, 7.197) 
    coords_rhb = (50.626, 6.949)
    # Coordinates of zip code.
    coords_zip = (i[4], i[3])

    dist_to_sta = geopy.distance.distance(coords_sta, coords_zip).km
    dist_to_rhb = geopy.distance.distance(coords_rhb, coords_zip).km

    return dist_to_sta, dist_to_rhb

In [70]:
dists_to_sta = []
dists_to_rhb = []
for i in df.itertuples():
    dist_sta, dist_rhb = check_distance_to_libraries(i)
    dists_to_sta.append(dist_sta)
    dists_to_rhb.append(dist_rhb)
df["dist_to_sta_in_km"] = dists_to_sta
df["dist_to_rhb_in_km"] = dists_to_rhb

In [71]:
# Round up the distances.
df[["dist_to_sta_in_km", "dist_to_rhb_in_km"]] = df[["dist_to_sta_in_km", "dist_to_rhb_in_km"]].round(2)

In [72]:
df

Unnamed: 0,zip,quantity,av. longitude,av. latitude,dist_to_sta_in_km,dist_to_rhb_in_km
0,15859,1,13.894806,52.227569,492.14,514.65
1,21279,2,9.748,53.359325,336.53,359.74
2,22965,1,10.3478,53.6946,389.63,413.03
3,35279,7,9.1167,50.85,135.55,155.03
4,37345,6,10.434645,51.518855,241.17,263.69
5,55122,1,8.237,49.995,114.01,115.51
6,56769,1,7.004175,50.27915,56.83,38.78
7,76698,1,8.6316,49.163,206.73,202.71
8,83224,1,12.46845,47.78215,507.62,511.44
9,94336,1,12.73975,48.94645,447.27,456.78


In [73]:
df.to_csv("random_zip_codes_with_coordinates_and_distances.csv", index=False)

In [74]:
# Function that checks if users location is within a certain radius, default: 100 km.
def check_if_inside_distance(distance=100):
    if (i.dist_to_sta_in_km < distance) or \
    (i.dist_to_rhb_in_km < distance):
        status = "inside"
    else:
        status = "outside"
        
    return status

In [75]:
df

Unnamed: 0,zip,quantity,av. longitude,av. latitude,dist_to_sta_in_km,dist_to_rhb_in_km
0,15859,1,13.894806,52.227569,492.14,514.65
1,21279,2,9.748,53.359325,336.53,359.74
2,22965,1,10.3478,53.6946,389.63,413.03
3,35279,7,9.1167,50.85,135.55,155.03
4,37345,6,10.434645,51.518855,241.17,263.69
5,55122,1,8.237,49.995,114.01,115.51
6,56769,1,7.004175,50.27915,56.83,38.78
7,76698,1,8.6316,49.163,206.73,202.71
8,83224,1,12.46845,47.78215,507.62,511.44
9,94336,1,12.73975,48.94645,447.27,456.78


In [77]:
inside_or_outside = []
for i in df.itertuples():
    checked_distance = check_if_inside_distance()
    inside_or_outside.append(checked_distance)
df["within_dist"] = inside_or_outside

In [78]:
df

Unnamed: 0,zip,quantity,av. longitude,av. latitude,dist_to_sta_in_km,dist_to_rhb_in_km,within_dist
0,15859,1,13.894806,52.227569,492.14,514.65,outside
1,21279,2,9.748,53.359325,336.53,359.74,outside
2,22965,1,10.3478,53.6946,389.63,413.03,outside
3,35279,7,9.1167,50.85,135.55,155.03,outside
4,37345,6,10.434645,51.518855,241.17,263.69,outside
5,55122,1,8.237,49.995,114.01,115.51,outside
6,56769,1,7.004175,50.27915,56.83,38.78,inside
7,76698,1,8.6316,49.163,206.73,202.71,outside
8,83224,1,12.46845,47.78215,507.62,511.44,outside
9,94336,1,12.73975,48.94645,447.27,456.78,outside


In [79]:
# Number of locations
locations_in_total = df.quantity.sum()
print(f"Number of locations: {locations_in_total}.")

Number of locations: 22.


In [80]:
# Number of locations within 100 km radius.
distance = 100
inside = df.within_dist.str.contains("inside")
locations_inside = df[inside].quantity.sum()
print(f"Number of locations inside {distance} km: {locations_inside}.")

Number of locations inside 100 km: 1.


In [81]:
# Number of users outside 100 km radius.
outside = df.within_dist.str.contains("outside")
users_outside = df[outside].quantity.sum()
print(f"Number of users outside {distance} km: {users_outside}.")

Number of users outside 100 km: 21.


In [82]:
df.to_csv("random_zip_codes_with_coordinates_distances_and_radius.csv", index=False)