In [1]:
import csv
import itertools
import math

In [2]:
def transform_point(target):
    return {
        'featureType': target['featureType'],
        'latitude': float(target['latitude']),
        'longitude': float(target['longitude'])
    }

In [3]:
with open('combined_no_dedupe.csv') as f:
    points = [transform_point(x) for x in csv.DictReader(f)]

In [4]:
already_seen = {'supermarket': [], 'fastFood': [], 'home': []}

In [5]:
for point in points:
    feature_type = point['featureType']
    target_list = already_seen[feature_type]
    
    def get_distance(other):
        latitude_diff = abs(other['latitude'] - point['latitude'])
        longitude_diff = abs(other['longitude'] - point['longitude'])
        return math.sqrt(latitude_diff ** 2 + longitude_diff ** 2)
    
    matching = filter(lambda x: get_distance(x) < 0.001, target_list)
    num_matching = sum(map(lambda x: 1, matching))
    
    if num_matching == 0:
        target_list.append(point)

In [6]:
all_records = itertools.chain(*already_seen.values())

In [7]:
with open('combined_dedupe.csv', 'w') as f:
    writer = csv.DictWriter(f, fieldnames=['featureType', 'latitude', 'longitude'])
    writer.writeheader()
    writer.writerows(all_records)