Skip to content

Commit

Permalink
added new columns, improved readme
Browse files Browse the repository at this point in the history
  • Loading branch information
yaksvk committed Jul 13, 2014
1 parent 1bf4d4f commit 59c7f21
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 4 deletions.
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
Evidencia nehnuteľného majetku štátu v správe riadených organizácií
kapitoly MF SR.
========================================================================
Evidencia nehnuteľného majetku štátu v správe riadených organizácií kapitoly MF SR.

This scraper scrapes the data from the Ministry of Finance of The
Slovak Republic. It processes the PDF list of the real-estate
Expand Down
26 changes: 25 additions & 1 deletion scraper.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def process_columns(row):

# specify a standard list of colums for every row in the final resultset
item = collections.OrderedDict()
cols = 'id organizacia zariadenie typ druh_1 druh_2'.split(' ')
cols = 'id organizacia zariadenie typ druh_1 druh_2 inventarne_cislo rok_nadobudnutia kraj okres obec krajsky_urad'.split(' ')

for col in cols:
item[col] = None
Expand All @@ -77,15 +77,39 @@ def process_columns(row):
item['organizacia'] = results[0][1]
else:
return None

# zariadenie
item['zariadenie'] = row.get('Zariadenie', None)
if item['zariadenie'] == '-':
item['zariadenie'] = None

# typ
item['typ'] = row.get('Typ', None)

# druh
item['druh_1'] = row.get('Druh', None)
item['druh_2'] = row.get('Druh2', None)

# inventarne cislo
item['inventarne_cislo'] = row.get('Inventárne číslo', None)

# rok nadobudnutia a kraj
rok_kraj = row.get('Rok nadobudnutia a kraj', None)
if rok_kraj is not None:
results = re.findall('^(\d{4}) (.*)$', rok_kraj)
if results:
item['rok_nadobudnutia'] = int(results[0][0])
item['kraj'] = results[0][1]
else:
if re.match('^\d{4}$', rok_kraj):
item['rok_nadobudnutia'] = int(rok_kraj)
else:
item['kraj'] = rok_kraj

# okres, obec, krajsky_urad
item['okres'] = row.get('Názov okresu', None)
item['obec'] = row.get('Názov obce', None)
item['krajsky_urad'] = row.get('Názov KÚ', None)

return item

Expand Down

0 comments on commit 59c7f21

Please sign in to comment.