In [35]:
import pandas as pd
import numpy as np
import folium
from haversine import haversine, Unit

### Split area into cells with no more than given number of stores from universe

In [36]:
LIMIT_PER_CELL = 35
init_lat_cells = 4
init_lng_cells = 4
universum_stores_coords_path = "stores_coords.csv"
cells_out_file_path = "cells.csv"


uni = pd.read_csv(universum_stores_coords_path)
uni.drop_duplicates(subset=["lat", "lng"], inplace=True)
test = uni.copy()
points = np.array(test[["lat", "lng"]])


class GridSplitter:
    def __init__(self, points, limit_per_cell=20):
        self.points = points
        self.limit_per_cell = limit_per_cell
        self.cells = []
        self.cell_counts = {}

    def create_start_cells(self, grid_x, grid_y):
        min_lat, min_lng = min(point[0] for point in self.points), min(
            point[1] for point in self.points
        )
        max_lat, max_lng = max(point[0] for point in self.points), max(
            point[1] for point in self.points
        )

        cell_width = (max_lng - min_lng) / grid_x
        cell_height = (max_lat - min_lat) / grid_y

        cells = []
        for i in range(grid_x):
            for j in range(grid_y):
                lat_min = min_lat + j * cell_height
                lat_max = lat_min + cell_height
                lng_min = min_lng + i * cell_width
                lng_max = lng_min + cell_width
                cells.append(((lat_min, lat_max), (lng_min, lng_max)))
        return cells

    def split_cells(self, cells, points=None):
        if points is None:
            points = self.points
        if isinstance(points, np.ndarray):
            points = points.tolist()
        points = sorted(points)

        for cell in cells:
            count = 0
            points_inside = []
            for point in points:
                if (
                    point[0] >= cell[0][0]
                    and point[0] < cell[0][1]
                    and point[1] >= cell[1][0]
                    and point[1] < cell[1][1]
                ):
                    count += 1
                    points_inside.append(point)
                elif point[0] >= cell[0][1] and point[1] >= cell[1][1]:
                    break

            if count <= self.limit_per_cell:
                self.cells.append((cell, count))
            else:
                print("count: ", count)
                lat_min, lat_max = cell[0]
                lng_min, lng_max = cell[1]
                lat_mid = (lat_min + lat_max) / 2
                lng_mid = (lng_min + lng_max) / 2
                splited = [
                    ((lat_min, lat_mid), (lng_min, lng_mid)),  # Top-left
                    ((lat_min, lat_mid), (lng_mid, lng_max)),  # Top-right
                    ((lat_mid, lat_max), (lng_min, lng_mid)),  # Bottom-left
                    ((lat_mid, lat_max), (lng_mid, lng_max)),  # Bottom-right
                ]
                self.split_cells(splited, points_inside)


spliter = GridSplitter(points, limit_per_cell=LIMIT_PER_CELL)
spliter.cells = []
cells = spliter.create_start_cells(init_lat_cells, init_lng_cells)
print(len(cells))
spliter.split_cells(cells, points)

print(len(spliter.cells))

sum = 0
for cell in spliter.cells:
    sum = sum + cell[1]
print(sum)

print(uni.shape)
with open(cells_out_file_path, "w") as f:
    f.write(f"min_lat,max_lat,min_lng,max_lng,count\n")
    for cell in spliter.cells:
        f.write(
            f"{cell[0][0][0]},{cell[0][0][1]},{cell[0][1][0]},{cell[0][1][1]},{cell[1]}\n"
        )


16
count:  221
count:  221
count:  221
count:  61
count:  56
count:  46
count:  42
count:  148
count:  39
count:  44
count:  41
count:  6543
count:  399
count:  391
count:  77
count:  52
count:  45
count:  42
count:  65
count:  125
count:  98
count:  93
count:  65
count:  124
count:  74
count:  43
count:  41
count:  2802
count:  344
count:  138
count:  38
count:  99
count:  43
count:  201
count:  82
count:  43
count:  36
count:  85
count:  47
count:  1170
count:  41
count:  236
count:  107
count:  39
count:  40
count:  84
count:  65
count:  48
count:  490
count:  87
count:  70
count:  260
count:  41
count:  174
count:  86
count:  46
count:  41
count:  126
count:  99
count:  73
count:  403
count:  49
count:  128
count:  96
count:  90
count:  56
count:  202
count:  160
count:  130
count:  51
count:  53
count:  522
count:  118
count:  64
count:  37
count:  220
count:  176
count:  158
count:  122
count:  60
count:  37
count:  87
count:  44
count:  97
count:  61
count:  38
count:  38
count:

### Plot cell center and radius

In [37]:
cells = pd.read_csv(cells_out_file_path)

In [38]:
center_lat_lng_radius = []
for index, row in cells.iterrows():
    if row['count'] == 0 or row['count'] == '0':
        continue
    # get center of the cell
    center_lat = (row['min_lat'] + row['max_lat']) / 2
    center_lng = (row['min_lng'] + row['max_lng']) / 2
    # get radius of the cell in meters
    radius = haversine((row['min_lat'], row['min_lng']), (row['max_lat'], row['max_lng']), unit=Unit.METERS) / 2
    radius = round(radius / 100) * 100
    center_lat_lng_radius.append({'lat': center_lat, 'lng': center_lng, 'radius': radius})



In [39]:
center_lat_lng_radius = pd.DataFrame(center_lat_lng_radius)
center_lat_lng_radius.to_csv(f'scrap_points_radius_limit_per_cell_{LIMIT_PER_CELL}.csv', index=False)

In [40]:
center_lat_lng_radius

Unnamed: 0,lat,lng,radius
0,50.268043,16.466277,14900
1,50.424702,16.193539,3700
2,50.413512,16.290945,1900
3,50.430297,16.242242,900
4,50.441487,16.242242,900
...,...,...,...
9009,54.162138,23.089929,7100
9010,54.251657,22.934078,7100
9011,54.251657,23.089929,7100
9012,54.206898,23.323705,14200


In [41]:
# draw points from center_lat_lng_radius as dot and circle of radius around
points_radius = center_lat_lng_radius[["lat", "lng", "radius"]].values.tolist()
map_center = [50.0, 20.0]
mymap = folium.Map(location=map_center, zoom_start=10)
for point in points_radius:
    folium.Circle(
        location=[point[0], point[1]],
        radius=point[2],
        color='blue',
        fill=False,
        opacity=0.5
    ).add_to(mymap)

mymap.save(f"map_with_center_points_limit_per_cell_{LIMIT_PER_CELL}.html")
