Skip to content

Commit

Permalink
Merge branch 'feature/new-models' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
m4droid committed Jun 26, 2016
2 parents 7b27b9b + a0c3b69 commit 0191838
Show file tree
Hide file tree
Showing 12 changed files with 349 additions and 155 deletions.
7 changes: 7 additions & 0 deletions nosetests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

set -e

DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

RESTRICCION_CONFIG="$DIR/configs/tests.json" nosetests --with-coverage --cover-package=restriccion $@
72 changes: 53 additions & 19 deletions restriccion/crawlers/uoct.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pyquery import PyQuery as pq

from restriccion import CONFIG
from ..models.air_quality import AirQuality
from ..models.restriction import Restriction


Expand All @@ -15,6 +16,8 @@ def __init__(self):
self.url = UOCT_Crawler.url

def parse(self):
reports = {'restrictions': [], 'air_quality': []}

if self.url.startswith('file://'):
document = pq(filename=self.url.replace('file://', ''))
else:
Expand All @@ -23,36 +26,67 @@ def parse(self):
current_year = moment.utcnow().timezone(CONFIG['moment']['timezone']).format('YYYY')
rows = document('.selecthistory #table-%s tbody tr' % current_year)

raw_data = []
for row in rows[2:]:
raw_data.append(Restriction.dict(
moment.date(row.find('td[3]').text.strip(), '%d-%m-%Y').format('YYYY-M-D'),
self.clean_digits_string(row.find('td[4]').text),
self.clean_digits_string(row.find('td[5]').text),
UOCT_Crawler.url
date_ = moment.date(row.find('td[3]').text.strip(), '%d-%m-%Y').format('YYYY-M-D')
reports['air_quality'].append(AirQuality.dict(
UOCT_Crawler.url,
{
'ciudad': 'Santiago',
'fecha': date_,
'estado': row.find('td[1]').text.strip()
}
))

reports['restrictions'].append(Restriction.dict(
UOCT_Crawler.url,
{
'ciudad': 'Santiago',
'fecha': date_,
'sin_sello_verde': self.clean_digits_string(row.find('td[4]').text),
'con_sello_verde': self.clean_digits_string(row.find('td[5]').text),
}
))

raw_data.sort(key=lambda r: r['fecha'], reverse=True)
reports['restrictions'].sort(key=lambda r: r['fecha'], reverse=True)

# Current day info
info = document('.eventslist .restriction h3')
if len(info) != 2:
return raw_data
return reports

date_ = moment.utcnow().timezone(CONFIG['moment']['timezone']).format('YYYY-M-D')

data = Restriction.dict(
moment.utcnow().timezone(CONFIG['moment']['timezone']).format('YYYY-M-D'),
self.clean_digits_string(info[0].text),
self.clean_digits_string(info[1].text),
UOCT_Crawler.url
air_quality = AirQuality.dict(
UOCT_Crawler.url,
{
'ciudad': 'Santiago',
'fecha': date_,
'estado': 'Normal'
}
)
self.insert_report_in_position(reports['air_quality'], air_quality)

restriction = Restriction.dict(
UOCT_Crawler.url,
{
'ciudad': 'Santiago',
'fecha': date_,
'sin_sello_verde': self.clean_digits_string(info[0].text),
'con_sello_verde': self.clean_digits_string(info[1].text),
}
)
self.insert_report_in_position(reports['restrictions'], restriction)

if len([r for r in raw_data if r['fecha'] == data['fecha']]) == 0:
for i in range(len(raw_data)):
if raw_data[i]['fecha'] < data['fecha']:
raw_data.insert(i, data)
break
return reports

return raw_data
@staticmethod
def insert_report_in_position(reports_list, report):
# If not in list by date
if len([r for r in reports_list if r['fecha'] == report['fecha']]) == 0:
for i in range(len(reports_list)):
if reports_list[i]['fecha'] < report['fecha']:
reports_list.insert(i, report)
break

@staticmethod
def clean_digits_string(string):
Expand Down
17 changes: 17 additions & 0 deletions restriccion/models/air_quality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
from .base_report import BaseReport


class AirQuality(BaseReport):

@staticmethod
def get_mongo_collection():
return 'reports_air_quality'

@staticmethod
def get_fields():
return ['ciudad', 'fecha', 'estado']

@staticmethod
def get_unique_fields():
return ['ciudad', 'fecha']
101 changes: 101 additions & 0 deletions restriccion/models/base_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# -*- coding: utf-8 -*-
import collections
import hashlib

import moment

from restriccion import CONFIG


class BaseReport(object):

@classmethod
def get_mongo_collection(cls):
raise NotImplementedError()

@classmethod
def get_fields(cls):
raise NotImplementedError()

@classmethod
def get_unique_fields(cls):
raise NotImplementedError()

@classmethod
def dict(cls, source, data):
new_dict = {}

fields = cls.get_fields()

for field in fields:
new_dict[field] = data[field]

# Hash data to detect changes
sha1_message = hashlib.sha1()
for field in fields:
if isinstance(data[field], collections.Hashable):
string = data[field]
else:
string = str(data[field])
sha1_message.update(string.encode('utf-8'))

new_dict['hash'] = sha1_message.hexdigest()
new_dict['fuente'] = source

return new_dict

@classmethod
def get(cls, mongo_db, query=None, limit=10):
if query is None:
query = {}

mongo_query = {
'$query': query,
'$orderby': {field: -1 for field in cls.get_unique_fields()}
}

reports = []
rows = mongo_db[cls.get_mongo_collection()].find(
mongo_query,
{'_id': 0},
limit=limit
)
for row in rows:
reports.append(row)
return reports

@classmethod
def insert_many(cls, mongo_db, reports):
update_time = moment.utcnow().timezone(CONFIG['moment']['timezone']).isoformat()

projection = {
'_id': 0,
'hash': 1,
}
for field in ['hash', 'fuente'] + cls.get_fields():
projection[field] = 1

class_collection = cls.get_mongo_collection()
unique_fields = cls.get_unique_fields()

for report in reports:
find_query = {field: report[field] for field in unique_fields}
row = mongo_db[class_collection].find_one(find_query, projection)

if row == report:
continue

report['actualizacion'] = update_time

if row is None:
mongo_db[class_collection].insert_one(report)
else:
update_query = {field: row[field] for field in unique_fields}

mongo_db[class_collection].update_one(
update_query,
{'$set': report}
)

if '_id' in report:
del report['_id']
79 changes: 11 additions & 68 deletions restriccion/models/restriction.py
Original file line number Diff line number Diff line change
@@ -1,74 +1,17 @@
# -*- coding: utf-8 -*-
import hashlib
from .base_report import BaseReport

import moment

from restriccion import CONFIG
class Restriction(BaseReport):

@classmethod
def get_mongo_collection(cls):
return 'reports_restriction'

class Restriction(object):
@classmethod
def get_fields(cls):
return ['ciudad', 'fecha', 'sin_sello_verde', 'con_sello_verde']

@staticmethod
def dict(date, with_green_seal, without_green_seal, source):
data = {
'fecha': date,
'sin_sello_verde': with_green_seal,
'con_sello_verde': without_green_seal,
'fuente': source,
}

# Clear empty data
for key in ['sin_sello_verde', 'con_sello_verde']:
if data[key] is None or data[key] == '':
data[key] = []

# Hash data to detect changes
sha1_message = hashlib.sha1()
sha1_message.update(data['fecha'].encode('utf-8'))
sha1_message.update('-'.join(data['sin_sello_verde']).encode('utf-8'))
sha1_message.update('-'.join(data['con_sello_verde']).encode('utf-8'))

data['hash'] = sha1_message.hexdigest()
return data

@staticmethod
def get(mongo_db, query=None, limit=10):
if query is None:
query = {}

mongo_query = {'$query': query, '$orderby': {'fecha': -1}}

restrictions = []
rows = mongo_db.restrictions.find(mongo_query, {'_id': 0}, limit=limit)
for row in rows:
restrictions.append(row)
return restrictions

@staticmethod
def insert_many(mongo_db, restrictions_list):
update_time = moment.utcnow().timezone(CONFIG['moment']['timezone']).isoformat()

projection = {
'_id': 0,
'fecha': 1,
'sin_sello_verde': 1,
'con_sello_verde': 1,
'hash': 1,
'fuente': 1,
}

for restriction in restrictions_list:
row = mongo_db.restrictions.find_one({'fecha': restriction['fecha']}, projection)

if row == restriction:
continue

restriction['actualizacion'] = update_time

if row is None:
mongo_db.restrictions.insert_one(restriction)
else:
mongo_db.restrictions.update_one({'fecha': row['fecha']}, {'$set': restriction})

if '_id' in restriction:
del restriction['_id']
@classmethod
def get_unique_fields(cls):
return ['ciudad', 'fecha']
18 changes: 14 additions & 4 deletions restriccion/wsgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
import json

from flask import Flask, request, Response
from flask.ext.cors import CORS
from flask_cors import CORS
import moment
import pymongo
from validate_email import validate_email

from restriccion import CONFIG
from restriccion.models.device import Device
from restriccion.models.air_quality import AirQuality
from restriccion.models.restriction import Restriction


Expand All @@ -28,8 +29,7 @@ def json_response(data, status_code=200):
return response


@app.route("/0/restricciones", methods=['GET'])
def restrictions_get():
def get_model_list(model):
date = request.args.get('fecha', None)

data = []
Expand All @@ -41,7 +41,17 @@ def restrictions_get():
except ValueError:
return json_response(data, status_code=400)

return json_response(Restriction.get(mongo_db, query))
return json_response(model.get(mongo_db, query))


@app.route("/0/restricciones", methods=['GET'])
def restrictions_get():
return get_model_list(Restriction)


@app.route("/0/calidad_aire", methods=['GET'])
def air_quality_get():
return get_model_list(AirQuality)


@app.route("/0/dispositivos", methods=['GET'])
Expand Down
4 changes: 1 addition & 3 deletions runtests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,4 @@ set -e

DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

export RESTRICCION_CONFIG="$DIR/configs/tests.json"

coverage run --source=restriccion setup.py test
RESTRICCION_CONFIG="$DIR/configs/tests.json" coverage run --source=restriccion setup.py test
Loading

0 comments on commit 0191838

Please sign in to comment.