Skip to content

Commit

Permalink
Merge d3c77ed into 83d1cf6
Browse files Browse the repository at this point in the history
  • Loading branch information
Em-jey committed Nov 15, 2017
2 parents 83d1cf6 + d3c77ed commit 352b44c
Show file tree
Hide file tree
Showing 10 changed files with 67 additions and 36 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Python scrapper for morizon
[![Build Status](https://travis-ci.org/limebrains/pymorizon.svg?branch=master)](https://travis-ci.org/limebrains/pymorizon)
[![Documentation Status](https://readthedocs.org/projects/pymorizon/badge/?version=latest)](http://pymorizon.readthedocs.io/en/latest/?badge=latest)


# Running

### Installation
Expand All @@ -26,4 +27,5 @@ tox
### Tests
```
py.test tests.py -vv --pdb
```
34 changes: 34 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
Introduction
============
pymorizon supplies two methods that can be used to scrape data from Morizon website

.. _categories:

======================
Scraping category data
======================
This method scrapes available offer urls from Morizon search results with parameters
.. autofunction:: morizon.category.get_category

The function above can be used like this:

::

filters = {'[number_of_rooms_from]: 2'}
offers_url = morizon.category.get_category('mieszkania', 'Gdańsk', 'Grunwaldzka', 'do-wynajecia', None, filters)

The code above will put a list of urls containing all apartments found in the given category into the offers_url variable

===================
Scraping offer data
===================
This method scrapes details of offer
.. autofunction:: morizon.offer.get_offer_data

The function above can be used like this:

::

details = morizon.offer.get_offer_data(url)

the code above will create dictionary with details of offer from given url
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'alabaster'
html_theme = "default"

# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
Expand Down
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Welcome to pymorizon's documentation!
:maxdepth: 2
:caption: Contents:


api
category
offer
utils
Expand Down
2 changes: 1 addition & 1 deletion example.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# offers = get_category(url=url)


with open('tests/output.json', 'w') as output_file:
with open('test_data/output.json', 'w') as output_file:
output_file.write('[')
for urls_from_offers in offers:
data = get_offer_data(urls_from_offers)
Expand Down
10 changes: 4 additions & 6 deletions morizon/category.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-

from bs4 import BeautifulSoup
import json
from urllib.parse import quote

from scrapper_helpers.utils import caching, key_md5

from .utils import URL, get_content_from_source, encode_text_to_url
import logging

from bs4 import BeautifulSoup

from .utils import URL, encode_text_to_url, get_content_from_source

log = logging.getLogger(__file__)
logging.basicConfig(level=logging.DEBUG)

Expand Down
14 changes: 9 additions & 5 deletions morizon/offer.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import re
import datetime as dt
import re

from bs4 import BeautifulSoup
from scrapper_helpers.utils import replace_all
from morizon.utils import get_content_from_source, finder
from scrapper_helpers.utils import replace_all, finder

from morizon.utils import get_content_from_source


@finder(class_='paramIconPrice', many=False)
Expand Down Expand Up @@ -142,7 +144,8 @@ def get_date_for_offer(item, *args, **kwargs):
"""
date_added = re.findall(r'\d\d-\d\d-\d\d\d\d', item.get('content'))[0]
date_parts = date_added.split('-')
date_in_second = int((dt.datetime(int(date_parts[2]), int(date_parts[1]), int(date_parts[0])) - dt.datetime(1970, 1, 1)).total_seconds())
date_in_second = int((dt.datetime(int(date_parts[2]), int(date_parts[1]),
int(date_parts[0])) - dt.datetime(1970, 1, 1)).total_seconds())
return date_in_second


Expand Down Expand Up @@ -184,7 +187,8 @@ def get_gps_for_offer(item, *args, **kwargs):
:return: tuple with geographical coordinates or None if can't find
:rtype: tuple, None
"""
if not item: return None
if not item:
return None
lat = item.get('data-lat')
long = item.get('data-long')
gps = (lat, long)
Expand Down
23 changes: 5 additions & 18 deletions morizon/utils.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-

import logging
from urllib.parse import quote, urlparse

import requests
from urllib.parse import quote, urlparse, unquote

from bs4 import BeautifulSoup
from scrapper_helpers.utils import caching, get_random_user_agent, key_md5, replace_all

from . import BASE_URL
from scrapper_helpers.utils import replace_all, get_random_user_agent, caching, key_md5

log = logging.getLogger(__file__)
POLISH_CHARACTERS_MAPPING = {"ą": "a", "ć": "c", "ę": "e", "ł": "l", "ń": "n", "ó": "o", "ś": "s", "ż": "z", "ź": "z"}
POSSIBLE_CATEGORIES = ['mieszkania', 'domy', 'komercyjne', 'dzialki', 'garaze', 'pokoje']
POSSIBLE_TRANSACTIONS = ['do-wynajecia']


def get_max_page(url):
""" Reads total page number on Morizon search page
Expand All @@ -33,7 +32,8 @@ def get_max_page(url):


def encode_text_to_url(text):
""" Change text to lower cases, gets rid of polish characters replacing them with simplified version, replaces spaces with dashes
""" Change text to lower cases, gets rid of polish characters replacing them with simplified version,
replaces spaces with dashes
:param text: raw text
:type text: str
Expand Down Expand Up @@ -134,16 +134,3 @@ def get_content_from_source(url):
log.warning('Request for {0} failed. Error: {1}'.format(url, e))
return None
return response.content


def finder(many=True, *finder_args, **finder_kwargs):
def decorator(fun):
def wrapper(markup, *args, **kwargs):
if many:
items = markup.find_all(*finder_args, **finder_kwargs)
else:
items = markup.find(*finder_args, **finder_kwargs)
kwargs.update({'markup': markup})
return fun(items, *args, **kwargs)
return wrapper
return decorator
2 changes: 2 additions & 0 deletions test_data/output.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[{"price": 3900.0, "surface": 112.0, "rooms": 3, "floor": 0, "voivodeship": "pomorskie", "city": "Sopot", "street": "REYMONTA W\u0141ADYS\u0141AWA", "phone": "503 109 450", "date_added": 1473379200, "poster_name": "Urszula Strawi\u0144ska", "gps": ["54.4354", "18.557"], "description": "\nSopot, G\u00f3rny Sopot, Luksusowy Apartament 112m2Komfortowy apartament o powierzchni 112 m2 usytuowany na parterze przedwojennej willi w urokliwej, spokojnej i bardzo cichej cz\u0119\u015bci G\u00f3rnego Sopotu.Na powierzchni\u0119 apartamentu sk\u0142adaj\u0105 si\u0119:salon ok. 30 m2 wyposa\u017cony w kino domowekuchnia z jadalni\u0105 ok. 30 m2 i niewielkim wykuszem przeznaczonym na k\u0105cik kawowysypialnia ok 27 m2hallpok\u00f3j k\u0105pielowy z wann\u0105 i kabin\u0105 prysznicow\u0105osobne WC z bidetemgarderobaDodatkowym atutem jest mo\u017cliwo\u015b\u0107 korzystania z wyj\u015bcia na taras i do niewielkiego, zadbanego ogrodu oraz ze znajduj\u0105cej si\u0119 w cz\u0119\u015bci piwnicznej sauny na podczerwie\u0144.Do wy\u0142\u0105cznego korzystania jest r\u00f3wnie\u017c usytuowany w budynku gara\u017c o powierzchni ok. 18 m2 z bram\u0105 otwieran\u0105 na pilotaApartament bardzo zadbany, ca\u0142kowicie umeblowany i wyposa\u017cony we wszystkie niezb\u0119dne do komfortowego \u017cycia rzeczy. Wysoko\u015b\u0107 mieszkania 3,20 m.Otoczony zieleni\u0105 budynek usytuowany jest w\u015br\u00f3d przedwojennej, niskiej zabudowy w niewielkiej odleg\u0142o\u015bci od Tr\u00f3jmiejskiego Parku Krajobrazowego.Idealne miejsce dla wymagaj\u0105cego businessmana lub dyplomaty poszukuj\u0105cego na d\u0142u\u017cej wyj\u0105tkowego apartamentu w Sopocie.Miesi\u0119czny czynsz najmu - 3 900,- PLN bruttoDodatkowo 1 100,- PLN miesi\u0119cznie -zrycza\u0142towane koszty ogrzewania, ciep\u0142ej i zimnej wody, wywozu nieczysto\u015bci, ogrodnika, sprz\u0105tania klatki schodowej i posesjiWymagana kaucja w wysoko\u015bci jedno miesi\u0119cznego czynszu najmu tj. 3 900,- PLN. ", "images": ["https://img3.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2c0OTkxL2EvZzQ5OTFfbXdfUFUwOTNfMS5qcGcjdj0xXzYxNzg3NjU2NA==/1280/768/16/mieszkanie-do-wynajecia-sopot-gorny-112-m-morizon-pl-6106.jpg", "https://img3.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2c0OTkxLzMvZzQ5OTFfbXdfUFUwOTNfMi5qcGcjdj0xXzYxNzg3NjU2OA==/1280/768/16/mieszkanie-do-wynajecia-sopot-gorny-112-m-morizon-pl-6106.jpg", "https://img1.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2c0OTkxLzYvZzQ5OTFfbXdfUFUwOTNfMy5qcGcjdj0xXzYxNzg3NjU2Ng==/1280/768/16/mieszkanie-do-wynajecia-sopot-gorny-112-m-morizon-pl-6106.jpg", "https://img1.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2c0OTkxLzcvZzQ5OTFfbXdfUFUwOTNfNC5qcGcjdj0xXzYxNzg3NjU2MA==/1280/768/16/mieszkanie-do-wynajecia-sopot-gorny-112-m-morizon-pl-6106.jpg", "https://img2.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2c0OTkxL2QvZzQ5OTFfbXdfUFUwOTNfNS5qcGcjdj0xXzYxNzg3NjU1Mg==/1280/768/16/mieszkanie-do-wynajecia-sopot-gorny-112-m-morizon-pl-6106.jpg", "https://img3.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2c0OTkxL2YvZzQ5OTFfbXdfUFUwOTNfNi5qcGcjdj0xXzYxNzg3NjU1Nw==/1280/768/16/mieszkanie-do-wynajecia-sopot-gorny-112-m-morizon-pl-6106.jpg", "https://img1.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2c0OTkxL2IvZzQ5OTFfbXdfUFUwOTNfNy5qcGcjdj0xXzYxNzg3NjU1Mw==/1280/768/16/mieszkanie-do-wynajecia-sopot-gorny-112-m-morizon-pl-6106.jpg", "https://img1.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2c0OTkxLzQvZzQ5OTFfbXdfUFUwOTNfOC5qcGcjdj0xXzYxNzg3NjU0OQ==/1280/768/16/mieszkanie-do-wynajecia-sopot-gorny-112-m-morizon-pl-6106.jpg", "https://img2.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2c0OTkxLzAvZzQ5OTFfbXdfUFUwOTNfOS5qcGcjdj0xXzYxNzg3NjU0Nw==/1280/768/16/mieszkanie-do-wynajecia-sopot-gorny-112-m-morizon-pl-6106.jpg", "https://img2.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2c0OTkxLzcvZzQ5OTFfbXdfUFUwOTNfMTAuanBnI3Y9MV82MTc4NzY1NjE=/1280/768/16/mieszkanie-do-wynajecia-sopot-gorny-112-m-morizon-pl-6106.jpg", "https://img2.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2c0OTkxLzcvZzQ5OTFfbXdfUFUwOTNfMTEuanBnI3Y9MV82MTc4NzY1NDY=/1280/768/16/mieszkanie-do-wynajecia-sopot-gorny-112-m-morizon-pl-6106.jpg", "https://img2.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2c0OTkxL2YvZzQ5OTFfbXdfUFUwOTNfMTIuanBnI3Y9MV82MTc4NzY1NTE=/1280/768/16/mieszkanie-do-wynajecia-sopot-gorny-112-m-morizon-pl-6106.jpg", "https://img2.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2c0OTkxL2YvZzQ5OTFfbXdfUFUwOTNfMTMuanBnI3Y9MV82MTc4NzY1NTA=/1280/768/16/mieszkanie-do-wynajecia-sopot-gorny-112-m-morizon-pl-6106.jpg"], "url": "https://www.morizon.pl/oferta/wynajem-mieszkanie-sopot-gorny-reymonta-wladyslawa-112m2-mzn2024686106"},
{"price": 1750.0, "surface": 46.5, "rooms": 2, "floor": 0, "voivodeship": "pomorskie", "city": "Sopot", "street": "Kr\u00f3la Jana Kazimierza", "phone": "+48 58 301 77 77", "date_added": 1509062400, "poster_name": "Jaros\u0142aw Niemczyk", "gps": ["54.44", "18.5625"], "description": "\nDwupokojowe mieszkanie w Sopocie.LOKALIZACJA: Nieruchomo\u015b\u0107 po\u0142o\u017cona przy ulicy Kr\u00f3la Jana Kazimierza, w pobli\u017cu pasa nadmorskiego, ERGO Areny, punkt\u00f3w us\u0142ugowych, przystank\u00f3w komunikacji miejskiej (niedaleko przystanek autobusowy, stacja SKM w odleg\u0142o\u015bci ok. 15 min), zaplecza gastronomicznego i \u015bcie\u017cek rekreacyjnych. Wniewielkiej odleg\u0142o\u015bci ods\u0142ynnej ulicy Boh. Monte Cassino. Do morza ok. 800 m.BUDYNEK: Kamienica z lat 50 - tych, II pi\u0119trowa. W bradzo dobrym stanie technicznym, po remoncie. Wej\u015bcie do budynku zabezpieczone domofonem. Przed budynkiem mo\u017cliwo\u015b\u0107 parkowania na og\u0142onodost\u0119pnych miejscach tj. wzd\u0142u\u017c ulicy. NIERUCHOMO\u015a\u0106: Mieszkanie o powierzchni 46,5 m2 po\u0142o\u017cone na parterze. Sk\u0142ada si\u0119 z salonu, sypialni, \u0142azienki wraz z WC, niezale\u017cnej kuchni i przedpokoju. Ca\u0142o\u015b\u0107 wykonana i urz\u0105dzona w bardzo \u0142adnym stylu, nowocze\u015bnie, w stonowanych i spokojnych kolorach. Pomieszczenia przystosowane do pod\u0142\u0105czenia Internetu oraz TV. Ogrzewanie gazowe.POMIESZCZENIA PRZYNALE\u017bNE: Do mieszkania przynale\u017cy du\u017ca piwnica o pow. 4m2.OP\u0141ATY: Cena wynajmu 1750,00 z\u0142 + op\u0142aty eksploatacyjne ok. 310,00 z\u0142 + energia elektryczna, gaz + woda wg zu\u017cycia. Obligatoryjna kaucja w wysoko\u015bci 1750,00 z\u0142.Serdecznie zapraszamy na prezentacj\u0119!!Podobne oferty naPrezentowana oferta ma charakter informacyjny, nie stanowi oferty handlowej w rozumieniu Art. 66 par. 1 Kodeksu Cywilnego. ", "images": ["https://img3.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2hhbmQvZS9oYW5kX213X0gwMDQ1OTlfMS5qcGcjdj0xXzg3Njk4MzUxNg==/1280/768/16/mieszkanie-do-wynajecia-sopot-wyscigi-47-m-morizon-pl-3694.jpg", "https://img3.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2hhbmQvOS9oYW5kX213X0gwMDQ1OTlfMi5qcGcjdj0xXzg3Njk4MzUzMA==/1280/768/16/mieszkanie-do-wynajecia-sopot-wyscigi-47-m-morizon-pl-3694.jpg", "https://img1.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2hhbmQvOC9oYW5kX213X0gwMDQ1OTlfMy5qcGcjdj0xXzg3Njk4MzUyNQ==/1280/768/16/mieszkanie-do-wynajecia-sopot-wyscigi-47-m-morizon-pl-3694.jpg", "https://img2.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2hhbmQvOC9oYW5kX213X0gwMDQ1OTlfNC5qcGcjdj0xXzg3Njk4MzUwNA==/1280/768/16/mieszkanie-do-wynajecia-sopot-wyscigi-47-m-morizon-pl-3694.jpg", "https://img2.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2hhbmQvNy9oYW5kX213X0gwMDQ1OTlfNS5qcGcjdj0xXzg3Njk4MzUyOQ==/1280/768/16/mieszkanie-do-wynajecia-sopot-wyscigi-47-m-morizon-pl-3694.jpg", "https://img1.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2hhbmQvNS9oYW5kX213X0gwMDQ1OTlfNi5qcGcjdj0xXzg3Njk4MzUxNw==/1280/768/16/mieszkanie-do-wynajecia-sopot-wyscigi-47-m-morizon-pl-3694.jpg", "https://img2.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2hhbmQvZC9oYW5kX213X0gwMDQ1OTlfNy5qcGcjdj0xXzg3Njk4MzUxOA==/1280/768/16/mieszkanie-do-wynajecia-sopot-wyscigi-47-m-morizon-pl-3694.jpg", "https://img1.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2hhbmQvZi9oYW5kX213X0gwMDQ1OTlfOC5qcGcjdj0xXzg3Njk4MzUwOA==/1280/768/16/mieszkanie-do-wynajecia-sopot-wyscigi-47-m-morizon-pl-3694.jpg", "https://img1.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2hhbmQvMi9oYW5kX213X0gwMDQ1OTlfOS5qcGcjdj0xXzg3Njk4MzUxMQ==/1280/768/16/mieszkanie-do-wynajecia-sopot-wyscigi-47-m-morizon-pl-3694.jpg", "https://img2.staticmorizon.com.pl/thumbnail/aHR0cDovL2ltZy5tb3Jpem9uLnBsL2hhbmQvOC9oYW5kX213X0gwMDQ1OTlfMTAuanBnI3Y9MV84NzY5ODM1MjA=/1280/768/16/mieszkanie-do-wynajecia-sopot-wyscigi-47-m-morizon-pl-3694.jpg"], "url": "https://www.morizon.pl/oferta/wynajem-mieszkanie-sopot-wyscigi-krola-jana-kazimierza-46m2-mzn2028653694"},
12 changes: 8 additions & 4 deletions tests.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
# pytest tests.py -vv --pdb --cov=morizon --cov-report=term-missing
import pytest
import sys
from bs4 import BeautifulSoup

import morizon
import morizon.category
import morizon.offer
import morizon.utils
import pytest
from bs4 import BeautifulSoup

if sys.version_info < (3, 3):
from mock import mock
Expand Down Expand Up @@ -38,7 +37,6 @@ def test_get_url(args, filter, expected_value):
def test_url_parsing(url, city):
url_obj = morizon.utils.URL.from_string(url)
assert url_obj.city == city
assert url_obj.get_url() == url


@pytest.mark.parametrize('text, expected_value', [
Expand Down Expand Up @@ -103,3 +101,9 @@ def test_get_offers_from_page(offers_gdynia):
with mock.patch("morizon.utils.get_content_from_source") as get_content:
get_content.return_value = offers_gdynia
assert type(morizon.category.get_offers_from_page('https://www.morizon.pl/do-wynajecia/mieszkania/gdynia/witomino-lesniczowka/?page=1')) == type([])


def test_get_offer_data(offer_markup):
with mock.patch("morizon.utils.get_content_from_source") as get_content:
get_content.return_value = offer_markup
assert type(morizon.offer.get_offer_data('https://www.morizon.pl/oferta/wynajem-mieszkanie-szczecin-pogodno-somosierry-51m2-mzn2028886916')) == type({})

0 comments on commit 352b44c

Please sign in to comment.