In [71]:
import shapely.geometry
import pyproj
import json
import statistics

from grid import Grid
from cell import Cell
from entrie import Entrie, EntrieType 
from utils import *

In [2]:
%pylab inline
import pandas as pd

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


In [36]:
coord_dict = {
    'spb': {
        'borders': ((60.080844, 30.175307), (59.833302, 30.525166)),
        'center': (59.9343, 30.3351),
        'grid': None
    },
    'smr': {
        'borders': ((53.277885, 50.056253), (53.174675, 50.319944)),
        'center': (53.2415, 50.2212),
        'grid': None
    }
}

In [4]:
def make_grid(city_abbr):
    with open(f'./data/entries/parsed/{city_abbr}.json') as file:
        etrie_data = json.load(file)
    grid = Grid.make_grid_in_degrees(*coord_dict[city_abbr]['borders'], 0.001)
    
    #entries[EntrieType(list(etrie_data.keys())[0])]
    i = 0
    for entrie_type_value, raw_entries in zip(etrie_data.keys(), etrie_data.values()):
        entrie_type = EntrieType(entrie_type_value)
        j = 0
        for raw_entrie in raw_entries:
            processed_entrie = Entrie(*raw_entrie)
            #entries[entrie_type].append(processed_entrie)
            
            grid.add_entrie(processed_entrie, entrie_type)
            j += 1
            if j  % 100 == 0:
                print(f'{j}/{len(raw_entries)}')
        i += 1
        print(f'{i}/{len(etrie_data.keys())}')
    
    return grid

In [42]:
def extract_public_trasport_stops(city_abbr):
    with open(f'./data/entries/parsed/{city_abbr}.json') as file:
        etrie_data = json.load(file)
    return [e[0] for e in etrie_data['subway_station']] + [e[0] for e in etrie_data['tram_stop']] + [e[0] for e in etrie_data['bus_stop']]
    

In [17]:
for city_abbr in coord_dict.keys(): 
    coord_dict[city_abbr]['grid'] = make_grid(city_abbr)

In [45]:
coord_dict['spb']['grid'] = spb_grid
coord_dict['smr']['grid'] = smr_grid

In [33]:
e = spb_grid.get_all_entries_in_radius((59.962719, 30.267485), 100)

In [91]:
def cook_table(city_abbr):
    flats = pd.read_csv(f'./data/flats/{city_abbr}.csv').drop(columns=['adress', 'subway_station'])
    cols = flats.columns.tolist()
    cols[-1], cols[-2] = cols[-2], cols[-1]
    cols = cols[-2:] + [cols[0]] + cols[1:-2]
    flats = flats[cols]
    flats['price_per_m'] = (flats['price'] / flats['area']).map(lambda x: int(x))
    flats = flats.drop(columns=['price'])
    flats['distance_to_center'] = [distance(x, coord_dict[city_abbr]['center']) for x in zip(flats.latitude, flats.longitude)]
    
    i = 0
    feature_list = []
    for row in flats.iterrows():
        row = row[1]
        d = {'price_per_m': int(row['price_per_m']),
             'rooms': int(row['rooms']),
             'area': row['area'], 
             'floor': int(row['floor']),
             'floors_total': int(row['floors_total']),
             'distance_to_subway': int(row['distance to subway']),
             'distance_to_center': int(row['distance_to_center'])
            }

        lat, lon = row["latitude"], row["longitude"]
        grid = coord_dict[city_abbr]['grid']
        entries = {
            '500': grid.get_all_entries_in_radius((lat, lon), 500),
            #'1000': grid.get_all_entries_in_radius((lat, lon), 1000),
        }
        
        try:
            #salaries_1000 = [e.weight for e in grid.get_all_entries_in_radius((lat, lon), 1000)[EntrieType.VACANCY]] 
            salaries = [e.weight for e in entries['500'][EntrieType.VACANCY]] 
            d['mean_salary'] = int(statistics.mean(salaries))
            d['median_salary'] = int(statistics.median(salaries))
        except:
            continue
        for key in entries['500'].keys():
            if key in [EntrieType.APARTMENT]:
                continue
            for dist in entries.keys():
                d[f'{key.value}_{dist}'] = len([e for e in entries[dist][key] if distance(e.coorditates, (lat, lon)) <= int(dist)])
        feature_list.append(d)
        i += 1
        if i % 100 == 0:
            print(i)
    return pd.DataFrame(feature_list)

In [93]:
smr_table = spb_table

In [95]:
smr_table.to_csv('./datasets/smr.csv', index=False)

In [83]:
df = 

In [84]:
df

Unnamed: 0,area,bank_1000,bank_500,bus_stop_1000,bus_stop_500,cafe_1000,cafe_500,cinema_1000,cinema_500,distance_to_center,...,shop_1000,shop_500,subway_station_1000,subway_station_500,tram_stop_1000,tram_stop_500,univercity_1000,univercity_500,vacancy_1000,vacancy_500
0,109.4,3,0,61,20,6,1,0,0,4929,...,19,3,0,0,0,0,0,0,92,1


In [4]:
entries = Cell([0, 0]).entries

405.4873611929934

In [25]:
smr_grid.find_suitable_position(50.1995781, 53.2009557)

In [47]:
r = smr_grid.get_all_entries_in_radius((53.277885, 50.056253), 10000)

In [91]:
distance((53.277885, 50.056253), (50.1995781, 53.2009557))

405396.3914606943

In [88]:
grid.cells[0][0].center, grid.cells[0][1].center

((60.14131115764205, 29.9328822447388), (60.14131115764205, 29.93366073117118))

In [26]:
midpoint(grid.cells[0][2].center, grid.cells[0][3].center)

(60.16331988210298, 30.04919449246976)

In [83]:
# Set up projections
p_ll = pyproj.Proj(init='epsg:4326')
p_mt = pyproj.Proj(init='epsg:3857') # metric; same as EPSG:900913

# Create corners of rectangle to be transformed to a grid
nw = shapely.geometry.Point((-5.0, 40.0))
se = shapely.geometry.Point((-4.999, 40.03439880201911))

stepsize = 50

# Project corners to target projection
s = pyproj.transform(p_ll, p_mt, nw.x, nw.y) # Transform NW point to 3857
e = pyproj.transform(p_ll, p_mt, se.x, se.y) # .. same for SE

# Iterate over 2D area
gridpoints = []
x = s[0]
while x < e[0]:
    y = s[1]
    row = []
    while y < e[1]:
        p = shapely.geometry.Point(pyproj.transform(p_mt, p_ll, x, y))
        row.append([p.x, p.y])
        y += stepsize * 1.314
    gridpoints.append(row)
    x += stepsize

In [85]:
distance([-5.0, 40.00045211267976], [-5.0, 40.0])

50.137436430313635

In [None]:
gridpoints

In [63]:
distance((-5.0, 40.03439880201911), [-5.0, 40.0])

3814.68564478778