In [4]:
#This module counts all supermarkets within a distance of 500m for every row in the dataset 
#Input: dataset "GeoFeatures_Zurich_supermarkets_companies", which contains geographical data and urban 
#Output: new dataset with additional information about the number of supermarkets nearby 

In [None]:
import pandas as pd
import random as rd 
import math

In [5]:
# Function that computes the distance between two points

def distance(lat1, lon1, lat2, lon2):
    R = 6371e3
    lat1_rad = lat1*math.pi/180
    lat2_rad = lat2*math.pi/180
    delta_lat = (lat2 - lat1)*math.pi/180
    delta_lon =  (lon2 -lon1)*math.pi/180
    a = math.sin(delta_lat/2) * math.sin(delta_lat/2) + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(delta_lon/2) * math.sin(delta_lon/2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    d = R * c
    return d

In [6]:
#load dataset

df = pd.read_csv("../Raw_Data/GeoFeatures_Zurich_supermarkets_companies.csv",low_memory=False)

In [7]:
counter_var = ['migros', 'coop', 'discounter', 'other', 'number_companies']

counter_df = pd.DataFrame() #empty dataframe for the number of supermarkets nearby

dis_migros = [] # empty list for the distance beween supermarkets

#Loop that goes over all supermarkets and counts the nearby supermarkets 

for supermarket in counter_var:
    print(supermarket)
    num_counts = []
    
    for ind, row in df.iterrows():
        #Extract coordinates from element
        lat1 = row["lat"]
        lon1 = row["lng"]

        #Condition that checks for elements nearby 
        condition_1 = abs(lat1 - df["lat"])<=0.006/2
        condition_2 = abs(lon1 - df["lng"])<=0.0075/2
        condition_3 = df[supermarket]==1
        condition_4 = df.index != ind

        #Dataframe with nearby elements 
        temp_df = df[condition_1 & condition_2 & condition_3 & condition_4]
        #Count all Supermarkets 
        ges = temp_df[supermarket].sum()
        num_counts.append(ges)
        
        #Loop for printing out intermediate results
        if temp_df.empty != True:
            #print('index:' + str(ind))
            #print('lat:' + str(lat1))
            #print('lng:' + str(lon1))
            #print('migros in proximity:' + str(ges))
            for ind2, row2 in temp_df.iterrows():
                lat2 = row2["lat"]
                lon2 = row2["lng"] 
                dis_migros.append( distance(lat1, lon1, lat2, lon2)  )
                #print('D_lat: ' + str(lat2-lat1) + ', D_lng: ' + str(lon2-lon1) + ', Distance: ' + str(distance(lat1, lon1, lat2, lon2)))
            #print(temp_df)
            
            
    counter_df[supermarket] = num_counts

migros
coop
discounter
other
number_companies


In [8]:
#Add the new data frame to the existing data frame
counter_df.columns = ['migros_500m', 'coop_500m', 'discounter_500m', 'other_500m', 'number_companies_500m']
result = pd.concat([df, counter_df], axis=1)
result.to_csv('../Processed_Data/counter_500m.csv')