In [1]:
import pandas as pd
import random

user_coordinates = pd.read_csv('user_coordinates.csv')
place_zone_coordinates = pd.read_csv('place_zone_coordinates.csv')

In [2]:
user_coordinates

Unnamed: 0,user_id,loc_lat,loc_lon
0,1,20,20
1,2,5,5
2,3,3,3
3,4,5,1
4,5,8,1
5,6,-1,10


In [3]:
place_zone_coordinates

Unnamed: 0,place_id,loc_lat,loc_lon,point_number
0,1,0,0,0
1,1,10,0,1
2,1,10,10,2
3,1,0,10,3
4,2,0,0,0
5,2,5,5,1
6,2,5,0,2


In [4]:
import sys

class Point:
    def __init__(self, x, y):
        self.x = x
        self.y = y

class Polygon:
    def __init__(self, points):
        self.points = points
        
        self.min_x = min(points, key=lambda p: p.x).x
        self.max_x = max(points, key=lambda p: p.x).x
        self.min_y = min(points, key=lambda p: p.y).y
        self.max_y = max(points, key=lambda p: p.y).y
    
    @property
    def edges(self):
        edge_list = []
        for i,p in enumerate(self.points):
            p1 = p
            p2 = self.points[(i+1) % len(self.points)]
            edge_list.append((p1,p2))

        return edge_list
    
    def contains(self, point):
        _huge = sys.float_info.max
        _eps = 0.00001

        inside = False
        for edge in self.edges: 
            A, B = edge[0], edge[1]
            if A.y > B.y:
                A, B = B, A

            if point.y == A.y or point.y == B.y:
                point.y += _eps

            if (point.y > B.y or point.y < A.y or point.x > max(A.x, B.x)):
                continue

            if point.x < min(A.x, B.x):
                inside = not inside
                continue

            try:
                m_edge = (B.y - A.y) / (B.x - A.x)
            except ZeroDivisionError:
                m_edge = _huge

            try:
                m_point = (point.y - A.y) / (point.x - A.x)
            except ZeroDivisionError:
                m_point = _huge

            if m_point >= m_edge:
                inside = not inside
                continue

        return inside

In [23]:
from tqdm.notebook import tqdm

def get_points(points_table):
    user_coords = points_table.itertuples(name=None, index=False)
    points = {}

    for p in user_coords:
        points[p[0]] = Point(p[1], p[2])
        
    return points

def get_polygons(polygon_table):
    polygons_coords = polygon_table.itertuples(name=None, index=False)
    
    polygons = {}
    current_id = None
    current_points = []
    
    for p in polygons_coords:
        poly_id = p[0]
        
        if current_id == poly_id:
            current_points.append(Point(p[1], p[2])) 
        else:
            if current_points:
                polygons[current_id] = Polygon(current_points)
                current_points = []
            current_points.append(Point(p[1], p[2]))
            current_id = poly_id
            
            
    polygons[current_id] = Polygon(current_points)
            
    return polygons

def count_points_entries(user_coords, polygons_coords):
    points = get_points(user_coords).items()
    polygons = list(get_polygons(polygons_coords).items())[0:100]
    entries = {}
    
    for point_id, point in tqdm(points, desc='1st loop'):
        entries[point_id] = 0
        
        filtered_polygons = filter(lambda poly: compare_coords(point, poly[1]), polygons)
            
        for poly_id, polygon in filtered_polygons:
            is_inside = polygon.contains(point)
            if is_inside:
                entries[point_id] += 1
                        
    return entries

def compare_coords(point, poly):
    if point.x < poly.min_x or point.x > poly.max_x or point.y < poly.min_y or point.y > poly.max_y:
        return False
    
    return True

def create_entries_table(user_coords, polygons):
    entries = count_points_entries(user_coords, polygons)
    return pd.DataFrame(entries.items(), columns=['id', 'number_of_places_available'])
    

In [6]:
create_entries_table(user_coordinates, place_zone_coordinates)

HBox(children=(FloatProgress(value=0.0, description='1st loop', max=6.0, style=ProgressStyle(description_width…




Unnamed: 0,id,number_of_places_available
0,1,0
1,2,1
2,3,1
3,4,2
4,5,1
5,6,0


### Тест

In [7]:
def generate_points(amount):
    uc = []

    for i in range(amount):
        x = random.randrange(20)
        y = random.randrange(20)
        uc.append((i, x, y))

    return pd.DataFrame(uc, columns=['user_id', 'loc_lat', 'loc_lon'])

def generate_polygons(amount):
    pzc = []
    for i in range(amount):
        p1 = (i, random.randrange(10), random.randrange(10), 0)
        p2 = (i, random.randrange(10), random.randrange(10, 20), 1)
        p3 = (i, random.randrange(10, 20), random.randrange(10, 20), 2)
        p4 = (i, random.randrange(10, 20), random.randrange(10), 3)
        
        pzc += [p1, p2, p3, p4]
    
    return pd.DataFrame(pzc, columns=['place_id', 'loc_lat', 'loc_lon', 'point_number'])

In [19]:
test_points = generate_points(1000)
test_poly = generate_polygons(1000)

In [25]:
import time

tic = time.perf_counter()
r = create_entries_table(test_points, test_poly)
toc = time.perf_counter()
print(f"{toc - tic:0.4f} seconds")

r

HBox(children=(FloatProgress(value=0.0, description='1st loop', max=1000.0, style=ProgressStyle(description_wi…


0.5110 seconds


Unnamed: 0,id,number_of_places_available
0,0,1
1,1,5
2,2,19
3,3,10
4,4,61
...,...,...
995,995,72
996,996,17
997,997,11
998,998,7


### JIT и progress bar

In [21]:
from numba import jit
import time
from tqdm.notebook import tqdm

@jit
def test():
    a = 0
    for i in tqdm(range(300000), desc='1st loop'):
        for j in range(500000):
            a += 1

tic = time.perf_counter()
test()
toc = time.perf_counter()
print(f"{toc - tic:0.4f} seconds")

In [28]:
from tqdm.notebook import tqdm
import time

tic = time.perf_counter()

a = 0
for i in tqdm(range(300000), desc='1st loop'):
    for j in range(100):
        a += 1
        
toc = time.perf_counter()
print(f"{toc - tic:0.4f} seconds")

HBox(children=(FloatProgress(value=0.0, description='1st loop', max=300000.0, style=ProgressStyle(description_…


4.7749 seconds
