In [181]:
import requests, json
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
from bs4 import BeautifulSoup



In [182]:
# old dataset:
areas_to_pop = pd.read_csv("data/mow-areas-pop.csv", sep=";")
areas_to_pop = areas_to_pop[["district_code", "area_name", "population"]]

areas_to_pop.head()

Unnamed: 0,district_code,area_name,population
0,tso,Арбат,35796
1,tso,Басманный,110928
2,tso,Замоскворечье,58665
3,tso,Красносельский,47865
4,tso,Мещанский,60044


In [184]:
areas_lst  = list()

with open('data/osm-mow.json', 'r') as fcc_file:
    fcc_data = json.load(fcc_file)
    areas = fcc_data['address']
    for area in areas:
        if area['admin_level'] == 8:
            areas_lst.append(area)


In [185]:
boundary_link = 'https://www.openstreetmap.org/relation/'
way_link = "https://www.openstreetmap.org/way/"
node_link = "https://www.openstreetmap.org/node/"

In [186]:
def parse_data(data):
    """
    :param data: район из osm-файла
    :returns: координаты
    """
    points = []
    def parse_id(l):
        for i in range(len(l) - 1, -1, -1):
            if l[i].isnumeric():
                return l[i]
            if len(l[i]) > 2:
                rm_braces = l[i][1:len(l[i]) - 1]
                if rm_braces.isnumeric():
                    return rm_braces

    def parse_node(node_id):
        node = requests.get(node_link + node_id).text
        soup = BeautifulSoup(node, "html.parser")
        lon = soup.findAll('span', class_ = 'longitude')[0].text
        lat = soup.findAll('span', class_ = 'latitude')[0].text
        points.append(Point(float(lon), float(lat)))


    def parse_way(way_id):
        way = requests.get(way_link + way_id).text
        soup = BeautifulSoup(way, "html.parser")
        all_nodes = soup.findAll('a', class_='node')
        for node in all_nodes:
            text = node.text.split()
            node_id = parse_id(text)
            parse_node(node_id)

    relation = requests.get(boundary_link + str(data["osm_id"])).text
    soup = BeautifulSoup(relation, "html.parser")
    all_ways = soup.findAll('li', class_='way')
    for way in all_ways:
        text = way.text.split()
        way_id = parse_id(text)
        parse_way(way_id)

    return points



In [187]:
def build_poly(points):
    """
    :param points: точки
    :returns: многоугольник Shapely
    """
    poly = Polygon([[p.x, p.y] for p in points])
    return poly

In [189]:
result_df = pd.DataFrame(columns=["district_code", "area_name", "population", "geometry"])



In [190]:
cnt = 0
errors = []
names = list(areas_to_pop['area_name'])
print(names)
for area in areas_lst:
    name = area['localname'].split()
    if 'район' in name:
        name.remove('район')
    if 'поселение' in name:
        name.remove('поселение')
    name = " ".join(name)
    print(name)
    if name not in names:
        cnt += 1
        errors.append(name)
        print("pass", name, cnt)
        continue
    try:
        areas_to_pop_line = areas_to_pop.loc[areas_to_pop['area_name'] == name]
        area_points = parse_data(area)
        area_poly = build_poly(area_points)
        areas_to_pop_line.loc[:, "geometry"] = area_poly
        result_df = result_df.append(areas_to_pop_line, ignore_index=True)
    except Exception as e:
        print("error parsing", name)
        errors.append(name)
    cnt += 1
    print(cnt)


result_df.to_csv("data/mow-areas-pop-geom.csv", index=True)

['Арбат', 'Басманный', 'Замоскворечье', 'Красносельский', 'Мещанский', 'Пресненский', 'Таганский', 'Тверской', 'Хамовники', 'Якиманка']
Якиманка


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  areas_to_pop_line.loc[:, "geometry"] = area_poly
  result_df = result_df.append(areas_to_pop_line, ignore_index=True)


1
Арбат


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  areas_to_pop_line.loc[:, "geometry"] = area_poly
  result_df = result_df.append(areas_to_pop_line, ignore_index=True)


2
Басманный


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  areas_to_pop_line.loc[:, "geometry"] = area_poly
  result_df = result_df.append(areas_to_pop_line, ignore_index=True)


3
Замоскворечье


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  areas_to_pop_line.loc[:, "geometry"] = area_poly
  result_df = result_df.append(areas_to_pop_line, ignore_index=True)


4
Красносельский


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  areas_to_pop_line.loc[:, "geometry"] = area_poly
  result_df = result_df.append(areas_to_pop_line, ignore_index=True)


5
Мещанский


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  areas_to_pop_line.loc[:, "geometry"] = area_poly
  result_df = result_df.append(areas_to_pop_line, ignore_index=True)


6
Пресненский


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  areas_to_pop_line.loc[:, "geometry"] = area_poly
  result_df = result_df.append(areas_to_pop_line, ignore_index=True)


7
Таганский


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  areas_to_pop_line.loc[:, "geometry"] = area_poly
  result_df = result_df.append(areas_to_pop_line, ignore_index=True)


8
Тверской


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  areas_to_pop_line.loc[:, "geometry"] = area_poly
  result_df = result_df.append(areas_to_pop_line, ignore_index=True)


9
Хамовники
10


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  areas_to_pop_line.loc[:, "geometry"] = area_poly
  result_df = result_df.append(areas_to_pop_line, ignore_index=True)
