In [79]:
import wikipedia
import pandas as pd
import geopandas as gpd
from shapely.wkt import loads
from shapely.geometry import Polygon
import requests
import json
import numpy as np
from tqdm import tqdm_notebook
from qwikidata.sparql import (get_subclasses_of_item,
                              return_sparql_query_results)
from datetime import date

In [177]:
WIKIMAPIA_API_KEY = 'wikimapia api key'

### Get all categories from Wikimapia

In [178]:
API_CATEGORIES = 'http://api.wikimapia.org/?function=category.getall'
paramsCat={'key': WIKIMAPIA_API_KEY,
           'format': 'json'}
response_category = requests.get(API_CATEGORIES, params=paramsCat)
response_category = response_category.json()
category_df = pd.DataFrame(response_category["categories"])
category_df.head()

Unnamed: 0,id,amount,icon,name
0,46535,32092430,http://wikimapia.org/mapico/00/00/00/00/00.png,place without photos
1,46534,21345131,http://wikimapia.org/mapico/00/00/00/00/00.png,place without description
2,46532,14909883,http://wikimapia.org/mapico/00/00/00/00/00.png,place without category
3,46533,7704344,http://wikimapia.org/mapico/00/00/00/00/00.png,building without address
4,164,3121855,http://wikimapia.org/mapico/00/00/00/00/45.png,house


In [134]:
category_df["name"].unique()

array(['place without photos', 'place without description',
       'place without category', 'building without address', 'house',
       'home', 'dwelling', 'residence', 'villa', 'building',
       'place without polygon', 'apartments', 'tenement',
       'apartment building', 'block of flats', 'tower block',
       'shopping and services', 'store / shop',
       'place with triangular polygon', 'village', 'dining and leisure',
       'education', 'school', 'schooling', 'schoolhouse',
       'nonresidential building', 'water', 'electric power', 'religion',
       'religious', 'faith', 'do not draw title', 'restaurant', 'eatery',
       'lake', 'park', 'power transmission line', 'hydro tower',
       'electricity pylon', 'transmission tower (electric / electricity)',
       'production', 'industry', 'administrative division',
       'constructions by dates', 'venue', 'sports venue', 'sports centre',
       'sports center', 'place of worship',
       'place with historical importance'], 

In [141]:
category_df[category_df["name"] == "building"]

Unnamed: 0,id,amount,icon,name
9,109,2874073,http://wikimapia.org/mapico/00/00/00/00/00.png,building


### Get objects with category by bbox 

In [184]:
category_id = '109'

In [168]:
API_URL = 'http://api.wikimapia.org/?function=box'
params={'key': WIKIMAPIA_API_KEY,
        'bbox': '37.637723,55.740268,37.746049,55.804392',
        'language':'ru',
        'format': 'json',
        'category': category_id,
        'page': 1,
        'count': 100}

In [169]:
response = requests.get(API_URL, params=params)
response_json = response.json()
response_df = pd.DataFrame(response_json["folder"])
int(response_json['found']), response_df.shape

(166, (100, 5))

у викимапии постраничная выгрузка объектов, на одной странице максимум 100 объектов, но  при первом запросе api можно по полю "found" узнать точное число объектов в выбранном  bbox. Значение "found" нужно разделить на 100, округлить и получится общее число страниц. после чего повторяем вызов api с нужными номерами страниц, указанные в параметре запроса

In [170]:
totalObj = int(response_json['found'])
if totalObj > 100:
    pageCount = round(totalObj/100)
    pageArray = list(range(2, pageCount + 1))
    for i in pageArray:
        params={'key': WIKIMAPIA_API_KEY,
                'bbox': '37.637723,55.740268,37.746049,55.804392',
                'language':'ru',
                'format': 'json',
                'category': category_id,
                'page': i,
                'count': 100}
        response = requests.get(API_URL, params=params)
        response_json = response.json()
        tmp_df = pd.DataFrame(response_json["folder"])
        response_df = response_df.append(tmp_df).reset_index(drop=True) 

response_df.shape

(166, 5)

In [171]:
response_df.loc[0, "polygon"]

[{'x': 37.6941695, 'y': 55.7686025},
 {'x': 37.6943493, 'y': 55.7686999},
 {'x': 37.6944461, 'y': 55.7686454},
 {'x': 37.6944716, 'y': 55.7686575},
 {'x': 37.6952709, 'y': 55.7681988},
 {'x': 37.695299, 'y': 55.7682139},
 {'x': 37.6957242, 'y': 55.7679725},
 {'x': 37.695692, 'y': 55.7679566},
 {'x': 37.6970974, 'y': 55.7671471},
 {'x': 37.6971157, 'y': 55.7671557},
 {'x': 37.6971404, 'y': 55.7671599},
 {'x': 37.6971645, 'y': 55.7671599},
 {'x': 37.6971873, 'y': 55.7671547},
 {'x': 37.698229, 'y': 55.7677583},
 {'x': 37.6953769, 'y': 55.7693722},
 {'x': 37.695589, 'y': 55.7694909},
 {'x': 37.6986138, 'y': 55.7677806},
 {'x': 37.6971338, 'y': 55.7669165},
 {'x': 37.6954948, 'y': 55.7678442},
 {'x': 37.6954707, 'y': 55.7678299},
 {'x': 37.6950415, 'y': 55.7680751},
 {'x': 37.695067, 'y': 55.7680887}]

In [172]:
def convertToWKT(x):
    '''
    Функция конвертирует полигон викимапии в формат WKT
    '''
    newArray = Polygon([list(item.values()) for item in x])
    return newArray

In [173]:
response_df['geometry'] = response_df["polygon"].apply(lambda x: convertToWKT(x))

In [174]:
response_df.head()

Unnamed: 0,id,name,url,location,polygon,geometry
0,33536587,"Госпитальная пл., 1–3 строение 1",http://wikimapia.org/33536587/ru/%D0%93%D0%BE%...,"{'north': 55.7694909, 'south': 55.7669165, 'ea...","[{'x': 37.6941695, 'y': 55.7686025}, {'x': 37....","POLYGON ((37.6941695 55.7686025, 37.6943493 55..."
1,39121011,"ул. Пруд Ключики, 2б",http://wikimapia.org/39121011/ru/%D1%83%D0%BB-...,"{'north': 55.7472352, 'south': 55.7460849, 'ea...","[{'x': 37.728534, 'y': 55.7472352}, {'x': 37.7...","POLYGON ((37.728534 55.7472352, 37.7294218 55...."
2,24180661,Гематологический корпус Главного клинического ...,http://wikimapia.org/24180661/ru/%D0%93%D0%B5%...,"{'north': 55.7696572, 'south': 55.7678527, 'ea...","[{'x': 37.6952499, 'y': 55.7696044}, {'x': 37....","POLYGON ((37.6952499 55.7696044, 37.6955127 55..."
3,31512689,"Авиамоторная ул., 57 строение 13",http://wikimapia.org/31512689/ru/%D0%90%D0%B2%...,"{'north': 55.7427848, 'south': 55.7410344, 'ea...","[{'x': 37.7235391, 'y': 55.7415546}, {'x': 37....","POLYGON ((37.7235391 55.7415546, 37.7243345 55..."
4,39121003,"ул. Пруд Ключики, 2 строение 4",http://wikimapia.org/39121003/ru/%D1%83%D0%BB-...,"{'north': 55.747198, 'south': 55.7462736, 'eas...","[{'x': 37.7273619, 'y': 55.747198}, {'x': 37.7...","POLYGON ((37.7273619 55.747198, 37.7280539 55...."


In [175]:
response_df[["id", "name", "geometry"]].to_csv("wikimapia_polygon.csv", index = False)

### Get object description by ID

In [None]:
# c этим надо поработать еще, я лимит свой израсходовала

In [None]:
API_OBJ = 'http://api.wikimapia.org/?function=object'

In [230]:
df = pd.DataFrame()

In [231]:
for i in response_df["id"]:
    params_obj = {'key': WIKIMAPIA_API_KEY,
                  'id': i,
                  'format': 'json'}
    
    response2 = requests.get(API_OBJ, params=params_obj)
    response2 = response2.json()
    obj_tmp_df = pd.DataFrame({
                    "id": [i],
                    "name": [response2["titleSuperArray"]["title"]]
                    })
    
    df = df.append(obj_tmp_df).reset_index(drop=True)


KeyError: 'titleSuperArray'

In [229]:
response2

{'debug': {'code': 1004, 'message': 'Key limit has been reached'}}

In [193]:
response2.json()

{'is_deleted': False,
 'is_protected': False,
 'is_photos': True,
 'is_building': True,
 'is_region': False,
 'title': None,
 'description': None,
 'wikipedia': None,
 'house_no': None,
 'tags_ids': [84530, 51457, 87294, 84911, 85230],
 'text_editor_user_id': None,
 'text_edit_date': None,
 'geo_status_old': 140287,
 'pl': 367.13961921863,
 'xc': 376491385,
 'yc': 557460247,
 'country_adm_id': 197704,
 'x': '376491309',
 'y': '557460336',
 'zoom': '19',
 'w': '5116',
 'h': '3996',
 'polygon_x': '376489719',
 'polygon_y': '557462334',
 'polygon': [{'x': 37.6489719, 'y': 55.7462334},
  {'x': 37.649153, 'y': 55.7462185},
  {'x': 37.649147, 'y': 55.7461948},
  {'x': 37.6493867, 'y': 55.7461749},
  {'x': 37.6493625, 'y': 55.7460755},
  {'x': 37.6492445, 'y': 55.7460844},
  {'x': 37.649215, 'y': 55.7459545},
  {'x': 37.649333, 'y': 55.745944},
  {'x': 37.6493089, 'y': 55.7458473},
  {'x': 37.6490658, 'y': 55.7458664},
  {'x': 37.6490584, 'y': 55.7458338},
  {'x': 37.6488751, 'y': 55.7458494}