# Парсинг сайта cian.ru

In [1]:
import requests
import re
from bs4 import BeautifulSoup
import pandas as pd
import time
import math

In [2]:
def html_stripper(text):
    return re.sub('<[^<]+?>', '', str(text))

# Функции, загружающие наши данные со страниц квартир:

### Цена

In [6]:
def getPrice(flat_page):
    price = flat_page.find('div', attrs={'class':'object_descr_price'})
    price = re.split('<div>|руб|\W', str(price))
    price = "".join([i for i in price if i.isdigit()][-3:])
    return int(price)

### Координаты квартиры: широта, долгота и расстояние от центра до квартиры (Lat, lon, dist). Возьмём формулу гаверсинуса:

In [77]:
from math import radians, cos, sin, asin, sqrt
avg_radius = 6371

def getDist(flat_page):
    coords = flat_page.find('div', attrs={'class':'map_info_button_extend'}).contents[1]
    coords = re.split('&amp|center=|%2C', str(coords))
    coords_list = []
    for item in coords:
        if item[0].isdigit():
            coords_list.append(item)
    lat = float(coords_list[0])
    lon = float(coords_list[1])
    #Haversine distance:
    #за центр Москвы и вообще вселенной взяли Мавзолей, его координаты – 55.753639, 37.619833
    lat1, lon1, latC, lonC = map(radians, (lat, lon, 55.753639, 37.619833))
    d = sin((latC - lat1) * 0.5) ** 2 + cos(lat1) * cos(latC) * sin((lonC - lon1) * 0.5) ** 2
    h = 2 * avg_radius * asin(sqrt(d))
    return lat, lon, h   

###  Число комнат:

In [8]:
def getRooms(flat_page):
    rooms = flat_page.find('div', attrs={'class':'object_descr_title'})
    rooms = html_stripper(rooms)
    room_number = ''
    for i in re.split('-|\n', rooms):
        if 'комн' in i:
            break
        else:
            room_number += i
    room_number = "".join(room_number.split())
    return room_number

### Этаж:

In [9]:
def getFloor(flat_page):
    table = flat_page.find('table', attrs = {'class':'object_descr_props'})
    table = html_stripper(table)
    floor_info = re.split('Этаж|Тип дома', table)[1]
    try:
        floor = "".join([i for i in floor_info if i.isdigit()][0])
    except IndexError:
        return None
    else:
        return floor

### Число вообще этажей в доме:

In [10]:
def getNfloors(flat_page):
    table = flat_page.find('table', attrs = {'class':'object_descr_props'})
    table = html_stripper(table)
    floor_info = re.split('Этаж|Тип дома', table)[1]
    try:
        nfloors = "".join([i for i in floor_info if i.isdigit()][1])
    except IndexError:
        return None
    else:
        return nfloors   

### Новый дом ли дом:

In [11]:
def getNew(flat_page):
    table = flat_page.find('table', attrs = {'class':'object_descr_props'})
    table = html_stripper(table)
    house_type = re.split('Тип дома:|Высота потолков:', table)[1]
    house_type = re.split('\n|,', house_type)
    house_type = "".join(house_type[2].split())
    if house_type == 'новостройка':
        new = 1
    elif house_type == 'вторичка':
        new = 0
    else:
        new = None
    return new

### Тип дома:

In [12]:
def getBrick(flat_page):
    table = flat_page.find('table', attrs = {'class':'object_descr_props'})
    table = html_stripper(table)
    house_type = re.split('Тип дома:|Высота потолков:', table)[1]
    house_type = re.split('\n|,', house_type)
    try:
        house_type = "".join(house_type[4].split())
    except IndexError:
        return None
    else:
        brick = 1 if house_type in ['кирпичный', 'кирпично-монолитный', 'монолитный', 'панельный дом', 'панельный'] else 0
        return brick

### Есть ли балкон:

In [13]:
def getBal(flat_page):
    table = flat_page.find('table', attrs = {'class':'object_descr_props'})
    table = html_stripper(table)
    balcony_info = re.split('Балкон:|Лифт', table)[1]
    balcony = re.split('\n', balcony_info)[1]
    balcony = 1 if ''.join(balcony.split()[0]).isdigit() else 0
    return balcony

### Сколько балконов:

In [14]:
def getBalNum(flat_page):
    table = flat_page.find('table', attrs = {'class':'object_descr_props'})
    table = html_stripper(table)
    balcony_info = re.split('Балкон:|Лифт', table)[1]
    balcony = re.split('\n', balcony_info)[1]
    balcony_number = ''.join(balcony.split()[0]) if ''.join(balcony.split()[0]).isdigit() else 0
    return balcony_number

### Площадь квартиры:

In [15]:
def getTotsp(flat_page):
    table = flat_page.find('table', attrs = {'class':'object_descr_props'})
    table = html_stripper(table)
    totsp = re.split('Общая площадь:|Площадь комнат', table)[1]
    totsp = re.split('\n|\xa0', totsp)[2].replace(',', '.')
    return totsp

### Площадь жилых помещений:

In [16]:
def getLivesp(flat_page):
    table = flat_page.find('table', attrs = {'class':'object_descr_props'})
    table = html_stripper(table)
    livesp = re.split('Жилая площадь:|Площадь кухни', table)[1]
    livesp = re.split('\xa0', livesp)[0]
    livesp = ''.join(i for i in livesp if i.isdigit() or i==',' or i == '.').replace(',', '.')
    return livesp

### Площадь кухни:

In [17]:
def getKitsp(flat_page):
    table = flat_page.find('table', attrs = {'class':'object_descr_props'})
    table = html_stripper(table)
    kitsp = re.split('Площадь кухни:|Санузел|Раздельных санузлов|Совмещенных санузлов', table)[1]
    kitsp = re.split('\xa0', kitsp)[0]
    kitsp = ''.join([i for i in kitsp.split() if i.isdigit() or i==',' and i != '\n']).replace(',', '.')    
    return kitsp

### В шаговой ли доступности:

In [18]:
def getWalk(flat_page):
    walk_info = flat_page.find('span', attrs = {'class': 'object_item_metro_comment'})
    walk_info = html_stripper(walk_info)
    if walk_info is None:
        return walk_info
    else:
        try:
            walk = re.split('\n', walk_info)[4]
        except IndexError:
            return None
        else:
            walk = ''.join(walk.split())
            walk = 1 if walk == 'пешком' else 0
            return walk

### Сколько минут до метро:

In [19]:
def getMetrdist(flat_page):
    metrdist = flat_page.find('span', attrs = {'class': 'object_item_metro_comment'})
    metrdist = html_stripper(metrdist)
    if metrdist == 'None':
        return metrdist
    else:
        metrdist = re.split('\n', metrdist)[1]
        metrdist = ''.join(metrdist.split())
        return metrdist

### Контакт арендатора:

In [20]:
def getTel(flat_page):
    tel = flat_page.find('div', attrs = {'class': 'realtor-card__phone'})
    tel = html_stripper(tel)
    if tel == 'None':
        tel = 0
    else:
        #tel = re.split('\n', tel)[1]
        tel = 1
    return tel

# Само извлечение

### Извлечение кода с помощью getPage(getSubpageLink()):

In [21]:
def getPage(link):
    page = requests.get(link).content
    page = BeautifulSoup(page, 'lxml')
    return page

In [22]:
def getSubpageLink(link):
    return 'http://www.cian.ru/sale/flat/' + str(link) + '/'

### Страницы округов

In [23]:
neighbourhoods = ['ЦАО','САО', 'СВАО', 'ВАО','ЮВАО', 'ЮАО','ЮЗАО','ЗАО','СЗАО','ЗелАО','НАО', 'ТАО']
districts = ['http://www.cian.ru/cat.php?deal_type=sale&district%5B0%5D=13&district%5B1%5D=14&district%5B2%5D=15&district%5B3%5D=16&district%5B4%5D=17&district%5B5%5D=18&district%5B6%5D=19&district%5B7%5D=20&district%5B8%5D=21&district%5B9%5D=22&engine_version=2&offer_type=flat&p={}&room1=1&room2=1&room3=1&room4=1&room5=1&room6=1',
           'http://www.cian.ru/cat.php?deal_type=sale&district%5B0%5D=23&district%5B10%5D=33&district%5B11%5D=34&district%5B12%5D=35&district%5B13%5D=36&district%5B14%5D=37&district%5B15%5D=38&district%5B1%5D=24&district%5B2%5D=25&district%5B3%5D=26&district%5B4%5D=27&district%5B5%5D=28&district%5B6%5D=29&district%5B7%5D=30&district%5B8%5D=31&district%5B9%5D=32&engine_version=2&offer_type=flat&p={}&room1=1&room2=1&room3=1&room4=1&room5=1&room6=1',
           'http://www.cian.ru/cat.php?deal_type=sale&district%5B0%5D=39&district%5B10%5D=49&district%5B11%5D=50&district%5B12%5D=51&district%5B13%5D=52&district%5B14%5D=53&district%5B15%5D=54&district%5B16%5D=55&district%5B1%5D=40&district%5B2%5D=41&district%5B3%5D=42&district%5B4%5D=43&district%5B5%5D=44&district%5B6%5D=45&district%5B7%5D=46&district%5B8%5D=47&district%5B9%5D=48&engine_version=2&offer_type=flat&p={}&room1=1&room2=1&room3=1&room4=1&room5=1&room6=1',
           'http://www.cian.ru/cat.php?deal_type=sale&district%5B0%5D=56&district%5B10%5D=66&district%5B11%5D=67&district%5B12%5D=68&district%5B13%5D=69&district%5B14%5D=70&district%5B15%5D=71&district%5B1%5D=57&district%5B2%5D=58&district%5B3%5D=59&district%5B4%5D=60&district%5B5%5D=61&district%5B6%5D=62&district%5B7%5D=63&district%5B8%5D=64&district%5B9%5D=65&engine_version=2&offer_type=flat&p={}&room1=1&room2=1&room3=1&room4=1&room5=1&room6=1',
           'http://www.cian.ru/cat.php?deal_type=sale&district%5B0%5D=72&district%5B10%5D=82&district%5B11%5D=83&district%5B1%5D=73&district%5B2%5D=74&district%5B3%5D=75&district%5B4%5D=76&district%5B5%5D=77&district%5B6%5D=78&district%5B7%5D=79&district%5B8%5D=80&district%5B9%5D=81&engine_version=2&offer_type=flat&p={}&room1=1&room2=1&room3=1&room4=1&room5=1&room6=1',
           'http://www.cian.ru/cat.php?deal_type=sale&district%5B0%5D=84&district%5B10%5D=94&district%5B11%5D=95&district%5B12%5D=96&district%5B13%5D=97&district%5B14%5D=98&district%5B15%5D=99&district%5B1%5D=85&district%5B2%5D=86&district%5B3%5D=87&district%5B4%5D=88&district%5B5%5D=89&district%5B6%5D=90&district%5B7%5D=91&district%5B8%5D=92&district%5B9%5D=93&engine_version=2&offer_type=flat&p={}&room1=1&room2=1&room3=1&room4=1&room5=1&room6=1',
           'http://www.cian.ru/cat.php?deal_type=sale&district%5B0%5D=100&district%5B10%5D=110&district%5B11%5D=111&district%5B1%5D=101&district%5B2%5D=102&district%5B3%5D=103&district%5B4%5D=104&district%5B5%5D=105&district%5B6%5D=106&district%5B7%5D=107&district%5B8%5D=108&district%5B9%5D=109&engine_version=2&offer_type=flat&p={}&room1=1&room2=1&room3=1&room4=1&room5=1&room6=1',
           'http://www.cian.ru/cat.php?deal_type=sale&district%5B0%5D=112&district%5B10%5D=122&district%5B11%5D=123&district%5B12%5D=124&district%5B13%5D=348&district%5B14%5D=349&district%5B15%5D=350&district%5B1%5D=113&district%5B2%5D=114&district%5B3%5D=115&district%5B4%5D=116&district%5B5%5D=117&district%5B6%5D=118&district%5B7%5D=119&district%5B8%5D=120&district%5B9%5D=121&engine_version=2&offer_type=flat&p={}&room1=1&room2=1&room3=1&room4=1&room5=1&room6=1',
           'http://www.cian.ru/cat.php?deal_type=sale&district%5B0%5D=125&district%5B1%5D=126&district%5B2%5D=127&district%5B3%5D=128&district%5B4%5D=129&district%5B5%5D=130&district%5B6%5D=131&district%5B7%5D=132&engine_version=2&offer_type=flat&p={}&room1=1&room2=1&room3=1&room4=1&room5=1&room6=1',
           'http://www.cian.ru/cat.php?deal_type=sale&district%5B0%5D=152&district%5B1%5D=153&district%5B2%5D=154&district%5B3%5D=355&district%5B4%5D=356&district%5B5%5D=357&district%5B6%5D=358&engine_version=2&offer_type=flat&p={}&room1=1&room2=1&room3=1&room4=1&room5=1&room6=1',
           'http://www.cian.ru/cat.php?deal_type=sale&district%5B0%5D=327&district%5B10%5D=337&district%5B1%5D=328&district%5B2%5D=329&district%5B3%5D=330&district%5B4%5D=331&district%5B5%5D=332&district%5B6%5D=333&district%5B7%5D=334&district%5B8%5D=335&district%5B9%5D=336&engine_version=2&offer_type=flat&p={}&room1=1&room2=1&room3=1&room4=1&room5=1&room6=1',
           'http://www.cian.ru/cat.php?deal_type=sale&district%5B0%5D=338&district%5B1%5D=339&district%5B2%5D=340&district%5B3%5D=341&district%5B4%5D=342&district%5B5%5D=343&district%5B6%5D=344&district%5B7%5D=345&district%5B8%5D=346&district%5B9%5D=347&engine_version=2&offer_type=flat&p={}&room1=1&room2=1&room3=1&room4=1&room5=1&room6=1']

### Соберем ссылки на страницы квартир с первых 30 страниц в поиске по каждому округу.

In [24]:
def getLinks0(district, pages):
    links = []
    for page in pages:
        page_url =  district.format(page)

        search_page = getPage(page_url)

        flat_urls = search_page.findAll('div', attrs = {'ng-class':"{'serp-item_removed': offer.remove.state, 'serp-item_popup-opened': isPopupOpen}"})
        flat_urls = re.split('http://www.cian.ru/sale/flat/|/" ng-class="', str(flat_urls))

        for link in flat_urls:
            if link.isdigit():
                links.append(link)
    return links

Заметим, что если в округе предложений квартир меньше, то при отсутствии следующей страницы когда следующей страницы, цикл снова собирает ссылки на уже обработанных страницах. Поэтому удалим из списка ссылок дубликаты, сохранив их порядок

In [25]:
ZelAOlinks = getLinks0(districts[9], pages = range(1, 4, 1))
ZelAOlinks

['150987610',
 '148433138',
 '150130411',
 '148505839',
 '150987610',
 '148433138',
 '150130411',
 '148505839',
 '150987610',
 '148433138',
 '150130411',
 '148505839']

Видим, что в Зеленоградском АО 4 квартиры, которые помещаются на 1 странице, а в списке данные с этой одной страницы записаны 3 раза. Удаляем повторы, сохраняя порядок. Пусть и прочитали их - это не так долго.

In [27]:
from collections import OrderedDict

uniquelinks = list(OrderedDict.fromkeys(ZelAOlinks))
uniquelinks

['150987610', '148433138', '150130411', '148505839']

### Функция, достающая ссылки:

In [31]:
def getLinks(district, total_pages):
    links = []
    for page in total_pages:
        page_url =  district.format(page)

        search_page = getPage(page_url)

        flat_urls = search_page.findAll('div', attrs = {'ng-class':"{'serp-item_removed': offer.remove.state, 'serp-item_popup-opened': isPopupOpen}"})
        flat_urls = re.split('http://www.cian.ru/sale/flat/|/" ng-class="', str(flat_urls))

        for link in flat_urls:
            if link.isdigit():
                links.append(link)
    links = list(OrderedDict.fromkeys(links))
    return links

In [32]:
getLinks(districts[9], total_pages = range(1, 4, 1))

['150987610', '148433138', '150130411', '148505839']

### Порядковый номер итерации (=номер квартиры в каждом округе, т.к. собирать будем данные отдельно по округам):

In [33]:
def getN(iter_item):
    return iter_item + 1

### Функция, которая будет доставать данные

In [34]:
def ParseDistrict(district_number, pages_range, csv_file = False, file_name = 0):
    CIANDATA = pd.DataFrame(columns=['District', 'N', 'Rooms', 'Price', 'Totsp', 'Livesp', 'Kitsp',  'Lat', 'Lon', 'Dist', 
                 'Metrdist', 'Walk', 'Brick', 'Tel', 'Bal', 'Floor', 'Nfloors', 'New'])
    district = districts[district_number]
    count = 0
    links = getLinks(district, pages_range)
    for link in range(len(links)):
        try:
            flat_page = getPage(getSubpageLink(links[link]))
            to_append = {'District': district_number, 'N': getN(link), 'Rooms': getRooms(flat_page), 'Price': getPrice(flat_page), 'Totsp': getTotsp(flat_page), 'Livesp': getLivesp(flat_page), 'Kitsp': getKitsp(flat_page), 'Lat': getDist(flat_page)[0], 'Lon': getDist(flat_page)[1], 'Dist': getDist(flat_page)[2], 'Metrdist': getMetrdist(flat_page), 'Walk': getWalk(flat_page), 'Brick': getBrick(flat_page), 'Tel': getTel(flat_page), 'Bal': getBal(flat_page), 'Floor': getFloor(flat_page), 'Nfloors': getNfloors(flat_page), 'New': getNew(flat_page)}
            CIANDATA = CIANDATA.append(to_append, ignore_index=True)
            count +=1 
            if link % 28 == 0:
                print('I`m not dead, I`m working! The page is {}'.format(int(link/28+1)))
        except:
            print('Something went wrong while working with link {}.'.format(link))
            return CIANDATA
    print('Finished parsing district {}.'.format(district_number))
    return CIANDATA

    if csv_file == True:
        CIANDATA.to_csv('/Users/mariago/Desktop/CIANDATA.csv')

### Пройдемся по всем округам:

In [35]:
district0 = ParseDistrict(0, range(0, 30, 1))

I`m not dead, I`m working! The page is 1
I`m not dead, I`m working! The page is 2
I`m not dead, I`m working! The page is 3
I`m not dead, I`m working! The page is 4
I`m not dead, I`m working! The page is 5
I`m not dead, I`m working! The page is 6
I`m not dead, I`m working! The page is 7
I`m not dead, I`m working! The page is 8
I`m not dead, I`m working! The page is 9
I`m not dead, I`m working! The page is 10
I`m not dead, I`m working! The page is 11
I`m not dead, I`m working! The page is 12
I`m not dead, I`m working! The page is 13
I`m not dead, I`m working! The page is 14
I`m not dead, I`m working! The page is 15
I`m not dead, I`m working! The page is 16
I`m not dead, I`m working! The page is 17
I`m not dead, I`m working! The page is 18
I`m not dead, I`m working! The page is 19
I`m not dead, I`m working! The page is 20
I`m not dead, I`m working! The page is 21
I`m not dead, I`m working! The page is 22
I`m not dead, I`m working! The page is 23
I`m not dead, I`m working! The page is 24
I

In [36]:
district0.to_csv('/Users/mariago/Desktop/district0.csv')

In [37]:
district1 = ParseDistrict(1, range(0, 30, 1))

I`m not dead, I`m working! The page is 1
I`m not dead, I`m working! The page is 2
I`m not dead, I`m working! The page is 3
I`m not dead, I`m working! The page is 4
I`m not dead, I`m working! The page is 5
I`m not dead, I`m working! The page is 6
I`m not dead, I`m working! The page is 7
I`m not dead, I`m working! The page is 8
I`m not dead, I`m working! The page is 9
I`m not dead, I`m working! The page is 10
I`m not dead, I`m working! The page is 11
I`m not dead, I`m working! The page is 12
I`m not dead, I`m working! The page is 13
I`m not dead, I`m working! The page is 14
I`m not dead, I`m working! The page is 15
I`m not dead, I`m working! The page is 16
I`m not dead, I`m working! The page is 17
I`m not dead, I`m working! The page is 18
I`m not dead, I`m working! The page is 19
I`m not dead, I`m working! The page is 20
I`m not dead, I`m working! The page is 21
I`m not dead, I`m working! The page is 22
I`m not dead, I`m working! The page is 23
I`m not dead, I`m working! The page is 24
I

In [38]:
district1.to_csv('/Users/mariago/Desktop/district1.csv')

In [45]:
district2 = ParseDistrict(2, range(0, 30, 1))

I`m not dead, I`m working! The page is 1
I`m not dead, I`m working! The page is 2
I`m not dead, I`m working! The page is 3
I`m not dead, I`m working! The page is 4
I`m not dead, I`m working! The page is 5
I`m not dead, I`m working! The page is 6
I`m not dead, I`m working! The page is 7
I`m not dead, I`m working! The page is 8
I`m not dead, I`m working! The page is 9
I`m not dead, I`m working! The page is 10
I`m not dead, I`m working! The page is 11
I`m not dead, I`m working! The page is 12
I`m not dead, I`m working! The page is 13
I`m not dead, I`m working! The page is 14
I`m not dead, I`m working! The page is 15
I`m not dead, I`m working! The page is 16
I`m not dead, I`m working! The page is 17
I`m not dead, I`m working! The page is 18
I`m not dead, I`m working! The page is 19
I`m not dead, I`m working! The page is 20
I`m not dead, I`m working! The page is 21
I`m not dead, I`m working! The page is 22
I`m not dead, I`m working! The page is 23
I`m not dead, I`m working! The page is 24
I

In [46]:
district2.to_csv('/Users/mariago/Desktop/district2.csv')

In [43]:
district3 = ParseDistrict(3, range(0, 30, 1))

I`m not dead, I`m working! The page is 1
I`m not dead, I`m working! The page is 2
I`m not dead, I`m working! The page is 3
I`m not dead, I`m working! The page is 4
I`m not dead, I`m working! The page is 5
I`m not dead, I`m working! The page is 6
I`m not dead, I`m working! The page is 7
I`m not dead, I`m working! The page is 8
I`m not dead, I`m working! The page is 9
I`m not dead, I`m working! The page is 10
I`m not dead, I`m working! The page is 11
I`m not dead, I`m working! The page is 12
I`m not dead, I`m working! The page is 13
I`m not dead, I`m working! The page is 14
I`m not dead, I`m working! The page is 15
I`m not dead, I`m working! The page is 16
I`m not dead, I`m working! The page is 17
I`m not dead, I`m working! The page is 18
I`m not dead, I`m working! The page is 19
I`m not dead, I`m working! The page is 20
I`m not dead, I`m working! The page is 21
I`m not dead, I`m working! The page is 22
I`m not dead, I`m working! The page is 23
I`m not dead, I`m working! The page is 24
I

In [47]:
district3.to_csv('/Users/mariago/Desktop/district3.csv')

In [48]:
district4 = ParseDistrict(4, range(0, 30, 1))

I`m not dead, I`m working! The page is 1
I`m not dead, I`m working! The page is 2
I`m not dead, I`m working! The page is 3
I`m not dead, I`m working! The page is 4
I`m not dead, I`m working! The page is 5
I`m not dead, I`m working! The page is 6
I`m not dead, I`m working! The page is 7
I`m not dead, I`m working! The page is 8
I`m not dead, I`m working! The page is 9
I`m not dead, I`m working! The page is 10
I`m not dead, I`m working! The page is 11
I`m not dead, I`m working! The page is 12
I`m not dead, I`m working! The page is 13
I`m not dead, I`m working! The page is 14
I`m not dead, I`m working! The page is 15
I`m not dead, I`m working! The page is 16
I`m not dead, I`m working! The page is 17
I`m not dead, I`m working! The page is 18
I`m not dead, I`m working! The page is 19
I`m not dead, I`m working! The page is 20
I`m not dead, I`m working! The page is 21
Something went wrong while working with link 571.


In [49]:
district4.to_csv('/Users/mariago/Desktop/district4.csv')

In [50]:
district5 = ParseDistrict(5, range(0, 30, 1))

I`m not dead, I`m working! The page is 1
I`m not dead, I`m working! The page is 2
I`m not dead, I`m working! The page is 3
I`m not dead, I`m working! The page is 4
I`m not dead, I`m working! The page is 5
I`m not dead, I`m working! The page is 6
I`m not dead, I`m working! The page is 7
I`m not dead, I`m working! The page is 8
I`m not dead, I`m working! The page is 9
I`m not dead, I`m working! The page is 10
I`m not dead, I`m working! The page is 11
I`m not dead, I`m working! The page is 12
I`m not dead, I`m working! The page is 13
I`m not dead, I`m working! The page is 14
I`m not dead, I`m working! The page is 15
I`m not dead, I`m working! The page is 16
I`m not dead, I`m working! The page is 17
I`m not dead, I`m working! The page is 18
I`m not dead, I`m working! The page is 19
I`m not dead, I`m working! The page is 20
I`m not dead, I`m working! The page is 21
I`m not dead, I`m working! The page is 22
I`m not dead, I`m working! The page is 23
I`m not dead, I`m working! The page is 24
I

In [51]:
district5.to_csv('/Users/mariago/Desktop/district5.csv')

In [52]:
district6 = ParseDistrict(6, range(0, 30, 1))

I`m not dead, I`m working! The page is 1
I`m not dead, I`m working! The page is 2
I`m not dead, I`m working! The page is 3
I`m not dead, I`m working! The page is 4
I`m not dead, I`m working! The page is 5
I`m not dead, I`m working! The page is 6
I`m not dead, I`m working! The page is 7
I`m not dead, I`m working! The page is 8
I`m not dead, I`m working! The page is 9
I`m not dead, I`m working! The page is 10
I`m not dead, I`m working! The page is 11
I`m not dead, I`m working! The page is 12
I`m not dead, I`m working! The page is 13
I`m not dead, I`m working! The page is 14
I`m not dead, I`m working! The page is 15
I`m not dead, I`m working! The page is 16
I`m not dead, I`m working! The page is 17
I`m not dead, I`m working! The page is 18
I`m not dead, I`m working! The page is 19
I`m not dead, I`m working! The page is 20
I`m not dead, I`m working! The page is 21
I`m not dead, I`m working! The page is 22
I`m not dead, I`m working! The page is 23
I`m not dead, I`m working! The page is 24
I

In [53]:
district6.to_csv('/Users/mariago/Desktop/district6.csv')

In [54]:
district7 = ParseDistrict(7, range(0, 30, 1))

I`m not dead, I`m working! The page is 1
I`m not dead, I`m working! The page is 2
I`m not dead, I`m working! The page is 3
I`m not dead, I`m working! The page is 4
I`m not dead, I`m working! The page is 5
I`m not dead, I`m working! The page is 6
I`m not dead, I`m working! The page is 7
I`m not dead, I`m working! The page is 8
I`m not dead, I`m working! The page is 9
I`m not dead, I`m working! The page is 10
I`m not dead, I`m working! The page is 11
I`m not dead, I`m working! The page is 12
I`m not dead, I`m working! The page is 13
I`m not dead, I`m working! The page is 14
I`m not dead, I`m working! The page is 15
I`m not dead, I`m working! The page is 16
I`m not dead, I`m working! The page is 17
I`m not dead, I`m working! The page is 18
I`m not dead, I`m working! The page is 19
I`m not dead, I`m working! The page is 20
I`m not dead, I`m working! The page is 21
I`m not dead, I`m working! The page is 22
I`m not dead, I`m working! The page is 23
I`m not dead, I`m working! The page is 24
I

In [55]:
district7.to_csv('/Users/mariago/Desktop/district7.csv')

In [56]:
district8 = ParseDistrict(8, range(0, 30, 1))

I`m not dead, I`m working! The page is 1
I`m not dead, I`m working! The page is 2
I`m not dead, I`m working! The page is 3
I`m not dead, I`m working! The page is 4
I`m not dead, I`m working! The page is 5
I`m not dead, I`m working! The page is 6
I`m not dead, I`m working! The page is 7
I`m not dead, I`m working! The page is 8
I`m not dead, I`m working! The page is 9
I`m not dead, I`m working! The page is 10
I`m not dead, I`m working! The page is 11
I`m not dead, I`m working! The page is 12
I`m not dead, I`m working! The page is 13
I`m not dead, I`m working! The page is 14
I`m not dead, I`m working! The page is 15
I`m not dead, I`m working! The page is 16
I`m not dead, I`m working! The page is 17
I`m not dead, I`m working! The page is 18
I`m not dead, I`m working! The page is 19
I`m not dead, I`m working! The page is 20
I`m not dead, I`m working! The page is 21
I`m not dead, I`m working! The page is 22
I`m not dead, I`m working! The page is 23
I`m not dead, I`m working! The page is 24
I

In [57]:
district8.to_csv('/Users/mariago/Desktop/district8.csv')

In [58]:
district9 = ParseDistrict(9, range(0, 30, 1))

I`m not dead, I`m working! The page is 1
Finished parsing district 9.


In [59]:
district9.to_csv('/Users/mariago/Desktop/district9.csv')

In [60]:
district10 = ParseDistrict(10, range(0, 30, 1))

I`m not dead, I`m working! The page is 1
I`m not dead, I`m working! The page is 2
I`m not dead, I`m working! The page is 3
I`m not dead, I`m working! The page is 4
I`m not dead, I`m working! The page is 5
I`m not dead, I`m working! The page is 6
I`m not dead, I`m working! The page is 7
I`m not dead, I`m working! The page is 8
I`m not dead, I`m working! The page is 9
I`m not dead, I`m working! The page is 10
I`m not dead, I`m working! The page is 11
I`m not dead, I`m working! The page is 12
I`m not dead, I`m working! The page is 13
I`m not dead, I`m working! The page is 14
I`m not dead, I`m working! The page is 15
I`m not dead, I`m working! The page is 16
I`m not dead, I`m working! The page is 17
I`m not dead, I`m working! The page is 18
I`m not dead, I`m working! The page is 19
I`m not dead, I`m working! The page is 20
I`m not dead, I`m working! The page is 21
I`m not dead, I`m working! The page is 22
I`m not dead, I`m working! The page is 23
I`m not dead, I`m working! The page is 24
I

In [61]:
district10.to_csv('/Users/mariago/Desktop/district10.csv')

In [62]:
district11 = ParseDistrict(11, range(0, 30, 1))

I`m not dead, I`m working! The page is 1
I`m not dead, I`m working! The page is 2
I`m not dead, I`m working! The page is 3
I`m not dead, I`m working! The page is 4
I`m not dead, I`m working! The page is 5
I`m not dead, I`m working! The page is 6
I`m not dead, I`m working! The page is 7
I`m not dead, I`m working! The page is 8
I`m not dead, I`m working! The page is 9
I`m not dead, I`m working! The page is 10
I`m not dead, I`m working! The page is 11
I`m not dead, I`m working! The page is 12
I`m not dead, I`m working! The page is 13
I`m not dead, I`m working! The page is 14
I`m not dead, I`m working! The page is 15
I`m not dead, I`m working! The page is 16
I`m not dead, I`m working! The page is 17
I`m not dead, I`m working! The page is 18
I`m not dead, I`m working! The page is 19
I`m not dead, I`m working! The page is 20
I`m not dead, I`m working! The page is 21
I`m not dead, I`m working! The page is 22
I`m not dead, I`m working! The page is 23
I`m not dead, I`m working! The page is 24
I

In [63]:
district11.to_csv('/Users/mariago/Desktop/district11.csv')

### Сведем всё в одну таблицу:

In [73]:
AllDistricts = pd.concat([district0, district1, district2, district3, district4, district5, district6, district7, district8, district9, district10, district11], axis = 0)

In [75]:
AllDistricts = AllDistricts.reset_index(drop=True)

In [76]:
AllDistricts.to_csv('/Users/mariago/Desktop/ciandata.csv')