In [1]:
import seaborn as sns
import pandas as pd
import geopandas as gpd
import descartes
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display 
from shapely import wkt
from shapely.geometry import Polygon, LineString, Point
import json 
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans

import urllib.request
import cv2
import numpy as np
from slugify import slugify
from pathlib import Path
import shutil

%matplotlib inline

# Data path 
p='/Users/eric/proj/data-projects/downtown-partnership/annotations/data/'


def uninvert_point(p):
    x,y = p

    y = -y + 2000
    return (x,y)

def uninvert_poly(poly):

    return Polygon( uninvert_point(p) for p in poly_to_array(poly))

def norm(pri):
    pri = pri.copy()
    pri[:, 0] = pri[:, 0] - pri[:, 0].mean()
    pri[:, 0] = pri[:, 0] / pri[:, 0].max()
    pri[:, 1] = pri[:, 1] - pri[:, 1].mean()
    pri[:, 1] = pri[:, 1] / pri[:, 1].max()
    return pri

def poly_to_array(poly):

    try:
        return  np.array(poly.exterior.coords)[:-1]
    except AttributeError:
        return poly

def reorder_points(v):
    """Reorder points to ensure the shape is valid. The only works if all of the points
    are on the convex hull, which is true for our shapes. """

    from math import sqrt

    try:
        points = poly_to_array(v).tolist()
    except AttributeError:
        points = v # Hopefully already a list.

    points = poly_to_array(Polygon(v).convex_hull).tolist()

    # Find the point closest to the origin
    # Norming ensures origin finding is consistent. I guess.
    normed_points = norm(np.array(points)) + 10  # +10 to void div by zero in distance

    mp = next(iter(sorted(normed_points, key=lambda p: sqrt(p[0] ** 2 + p[1] ** 1))))

    # Rotate the list of points so the mp point is first in the list
    mpos = normed_points.tolist().index(mp.tolist())
    points = points[mpos:] + points[:mpos]

    return np.array(points)


def get_image(url):
    slug = slugify(url)
    url_response = urllib.request.urlopen(url)

    img_file = Path('/tmp/{}'.format('dlimage-'+slug))
    
    if not img_file.exists():
        with urllib.request.urlopen(url) as response, img_file.open('wb') as out_file:
            print("Download", url)
            shutil.copyfileobj(response, out_file)
    
    with img_file.open('rb') as f:
        img_array = np.array(bytearray(f.read()), dtype=np.uint8)
        return cv2.imdecode(img_array, -1)

def bound_dims(v):
    (minx, miny, maxx, maxy) = v.bounds
    return round(maxx-minx,-1), round(maxy-miny,-1)

def intersections_df():
    

    df = pd.read_csv(p+'gcp_transforms.csv', index_col=False)
    df['tf'] = df.matrix.apply(json.loads)
    df['source'] = df.source.apply(wkt.loads).apply(uninvert_poly)
    df['source_area'] = df.source.apply(lambda v: v.area)
    df['source_shape'] = df.source.apply(bound_dims)
    df['source_shape_x'] = df.source_shape.apply(lambda v: v[0])
    df['source_shape_y'] = df.source_shape.apply(lambda v: v[1])

    rgcp = pd.read_csv(p+'raw_gcp.csv', index_col=False)
    def agg_inter(g):
        return ','.join(sorted(g.unique()))

    t =rgcp.rename(columns={'image':'url'}).groupby('url').agg({'intersection':agg_inter}).reset_index()
    df = df.merge(t, on='url')

    df['year'] = df.url.apply(lambda v: Path(v).stem[:4])

    # Different maps variants, based on the intersections
    map_map = {
        'Ketner_A,Ketner_Broadway,State_A,State_Broadway':   'columbia',
        '11th_B,11th_Broadway,Front_B,Front_Broadway':       'core_columbia',
        '1st_Ash,1st_Cedar,9th_Ash,9th_Cedar':               'cortez',
        '16th_E,16th_Imperial,7th_E,7th_Imperial':           'east_village_a',
        '16th_Imperial,16th_Market,7th_Imperial,7th_Market': 'east_village_b',
        '4th_Broadway,4th_K,6th_Broadway,6th_K':             'gaslamp',
        '3rd_G,3rd_K,Ketner_G,Ketner_Market':                'marina'}

    df['map_name'] = t.intersection.apply(lambda v: map_map[v] )

    return df



array(['columbia', 'core_columbia', 'cortez', 'east_village_a',
       'east_village_b', 'gaslamp', 'marina'], dtype=object)

In [3]:
df = pd.read_csv(p+'raw_gcp.csv', index_col=False)
df.head().T

Unnamed: 0,0,1,2,3,4
source,/Users/eric/Google Sync/sandiegodata.org/Proje...,/Users/eric/Google Sync/sandiegodata.org/Proje...,/Users/eric/Google Sync/sandiegodata.org/Proje...,/Users/eric/Google Sync/sandiegodata.org/Proje...,/Users/eric/Google Sync/sandiegodata.org/Proje...
image,http://ds.civicknowledge.org/downtownsandiego....,http://ds.civicknowledge.org/downtownsandiego....,http://ds.civicknowledge.org/downtownsandiego....,http://ds.civicknowledge.org/downtownsandiego....,http://ds.civicknowledge.org/downtownsandiego....
x,1338,1839,1340,1841,1284
y,178,185,1183,1191,163
width,47,45,45,41,36
height,30,32,32,28,30
intersection,4th_Broadway,6th_Broadway,4th_K,6th_K,4th_Broadway


In [16]:
from collections import defaultdict
# From https://stackoverflow.com/a/50962734
def imcrop(img, x1, y1, x2, y2):
  
    if x1 < 0 or y1 < 0 or x2 > img.shape[1] or y2 > img.shape[0]:
        img, x1, x2, y1, y2 = pad_img_to_fit_bbox(img, x1, x2, y1, y2)
    return img[y1:y2, x1:x2, :]

def pad_img_to_fit_bbox(img, x1, x2, y1, y2):
    img = cv2.copyMakeBorder(img, - min(0, y1), max(y2 - img.shape[0], 0),
                            -min(0, x1), max(x2 - img.shape[1], 0),cv2.BORDER_REPLICATE)
    y2 += -min(0, y1)
    y1 += -min(0, y1)
    x2 += -min(0, x1)
    x1 += -min(0, x1)
    return img, x1, x2, y1, y2

EXTRACT_SIZE = 50

intersections = defaultdict(list)
for idx, r in  df.iterrows():
   
    try:
        source_img = get_image(r.image)
    except:
        continue
    
    c,*bb = imcrop(source_img, r.x-EXTRACT_SIZE, r.y-EXTRACT_SIZE, r.x+EXTRACT_SIZE, r.y+EXTRACT_SIZE)
    
    intersections[r.intersection].append(c)
    

0 4th_Broadway
1 6th_Broadway
2 4th_K
3 6th_K
4 4th_Broadway
5 6th_Broadway
6 4th_K
7 6th_K
8 4th_Broadway
9 6th_Broadway
10 4th_K
11 6th_K
12 4th_Broadway
13 6th_Broadway
14 4th_K
15 6th_K
16 4th_Broadway
17 6th_Broadway
18 4th_K
19 6th_K
20 4th_Broadway
21 6th_Broadway
22 4th_K
23 6th_K
24 Ketner_Market
25 Ketner_G
26 3rd_G
27 3rd_K
28 Ketner_Market
29 Ketner_G
30 3rd_G
31 3rd_K
32 Ketner_Market
33 Ketner_G
34 3rd_G
35 3rd_K
36 Ketner_Market
37 Ketner_G
38 3rd_G
39 3rd_K
40 Ketner_Market
41 Ketner_G
42 3rd_G
43 3rd_K
44 Ketner_Market
45 Ketner_G
46 3rd_G
47 3rd_K
48 Ketner_Market
49 Ketner_G
50 3rd_G
51 3rd_K
52 Ketner_Market
53 Ketner_G
54 3rd_G
55 3rd_K
56 Ketner_Market
57 Ketner_G
58 3rd_G
59 3rd_K
60 Ketner_Market
61 Ketner_G
62 3rd_G
63 3rd_K
64 Ketner_Market
65 Ketner_G
66 3rd_G
67 3rd_K
68 Ketner_Market
69 Ketner_G
70 3rd_G
71 3rd_K
72 4th_Broadway
73 6th_Broadway
74 4th_K
75 6th_K
76 6th_K
77 6th_Broadway
78 4th_K
79 4th_Broadway
80 6th_K
81 6th_Broadway
82 4th_K
83 4th_Broad

In [17]:
len(intersections)


26

In [19]:
for k,v in intersections.items():
    print(k, len(v))

4th_Broadway 53
6th_Broadway 53
4th_K 53
6th_K 53
Ketner_Market 54
Ketner_G 54
3rd_G 54
3rd_K 54
Ketner_Broadway 31
State_Broadway 31
State_A 31
Ketner_A 31
Front_Broadway 53
Front_B 53
11th_B 53
11th_Broadway 53
7th_Market 11
7th_Imperial 65
16th_Market 11
16th_Imperial 65
7th_E 54
16th_E 54
1st_Cedar 54
1st_Ash 54
9th_Cedar 54
9th_Ash 54
