Skip to content

Commit

Permalink
Merge pull request #105 from MrLokans/master
Browse files Browse the repository at this point in the history
PEP8 styling fixes. Extra type annotations and docstrings added.
  • Loading branch information
axsapronov committed May 12, 2016
2 parents 7151f65 + 4e04330 commit ad18777
Show file tree
Hide file tree
Showing 18 changed files with 106 additions and 46 deletions.
1 change: 1 addition & 0 deletions advertising/models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- encoding: utf-8 -*-
import datetime

from django.core.exceptions import ValidationError
Expand Down
5 changes: 2 additions & 3 deletions digest/dashboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@ def values(self):
queryset = self.get_queryset()
date_field = 'related_to_date' if settings.DEPLOY else 'DATE(related_to_date)'
queryset = (queryset.filter(status='active')
.extra({'baked':
date_field})
.extra({'baked': date_field})
.select_related('section')
.values_list('section__title', 'baked')
.order_by('-baked')
Expand Down Expand Up @@ -92,7 +91,7 @@ def values(self):
queryset = self.get_queryset()

date_field = 'related_to_date' if settings.DEPLOY else 'DATE(related_to_date)'
return (queryset.extra({'baked':date_field})
return (queryset.extra({'baked': date_field})
.values_list('baked')
.order_by('-baked')
.annotate(ocount=Count('pk'))[:self.limit_to])
Expand Down
1 change: 0 additions & 1 deletion digest/management/commands/cls_split_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import os
import random

from django.conf import settings
from django.core.management.base import BaseCommand

from digest.management.commands.cls_create_dataset import save_dataset
Expand Down
6 changes: 3 additions & 3 deletions digest/management/commands/create_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ def create_dataset(queryset_items, name):
if not queryset_items:
return
out_filepath = os.path.join(settings.DATASET_FOLDER, name)
data = {'links': [
x.get_data4cls(status=True) for x in queryset_items
]}
data = {
'links': [x.get_data4cls(status=True) for x in queryset_items]
}

if not os.path.exists(os.path.dirname(out_filepath)):
os.makedirs(os.path.dirname(out_filepath))
Expand Down
3 changes: 2 additions & 1 deletion digest/management/commands/create_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def handle(self, *args, **options):
Основной метод - точка входа
"""
api = AlchemyAPI(settings.ALCHEMY_KEY)
for item in Item.objects.filter(pk__range=(options['start'], options['end']), keywords=None):
pk_limits = (options['start'], options['end'])
for item in Item.objects.filter(pk__range=pk_limits, keywords=None):
# create_keywords(api, item)
async(create_keywords, api, item)
6 changes: 4 additions & 2 deletions digest/management/commands/download_pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def get_article(item):
text = item.text
except Exception as e:
text = ''

fio.write(text)
item.article_path = path
item.save()
Expand All @@ -35,6 +35,8 @@ def handle(self, *args, **options):
os.makedirs(settings.DATASET_ROOT)

for item in Item.objects.all():
if item.article_path is None or not item.article_path or not os.path.exists(item.article_path):
path_incorrect = item.article_path is None or not item.article_path
path_exists = os.path.exists(item.article_path)
if path_incorrect or not path_exists:
async(get_article, item)
# get_article(item)
12 changes: 9 additions & 3 deletions digest/management/commands/import_news.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,15 @@
from django.core.management.base import BaseCommand
from typing import List, Dict

from digest.management.commands import apply_parsing_rules, get_tweets_by_url, \
save_item, is_weekly_digest, \
parse_weekly_digest, _get_http_data_of_url, apply_video_rules
from digest.management.commands import (
apply_parsing_rules,
apply_video_rules,
get_tweets_by_url,
parse_weekly_digest,
save_item,
is_weekly_digest,
_get_http_data_of_url,
)
from digest.models import ITEM_STATUS_CHOICES, \
AutoImportResource, Item, ParsingRules, Section

Expand Down
40 changes: 31 additions & 9 deletions digest/management/commands/import_python_weekly.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,26 @@

from urllib.error import URLError
from urllib.request import urlopen
from typing import Sequence, Dict, Union

import lxml.html as html
from bs4 import BeautifulSoup
from bs4.element import Tag
from django.core.management.base import BaseCommand
from lxml import etree

from digest.management.commands import apply_parsing_rules, apply_video_rules, save_item
from digest.management.commands import (
apply_parsing_rules,
apply_video_rules,
save_item
)
from digest.models import ParsingRules, Section, ITEM_STATUS_CHOICES, Resource

Parseble = Union[BeautifulSoup, html.HtmlElement]


def _get_content(url: str) -> str:
"""Gets text from URL's response"""
try:
result = urlopen(url, timeout=10).read()
except URLError:
Expand All @@ -23,37 +31,50 @@ def _get_content(url: str) -> str:
return result


def _get_blocks(url) -> list:
def _get_blocks(url: str) -> Sequence[BeautifulSoup]:
"""
Grab all blocks containing news titles and links
from URL
"""
result = []
content = _get_content(url)
if content:
try:
page = html.parse(content)
result = page.getroot().find_class('bodyTable')[0].xpath('//span[@style="font-size:14px"]')
page = html.fromstring(content)
result = page.find_class('bodyTable')[0]
result = result.xpath('//span[@style="font-size:14px"]')
except OSError:
page = BeautifulSoup(content, 'lxml')
result = page.findAll('table', {'class': 'bodyTable'})[0].findAll('span', {'style': "font-size:14px"})
result = page.findAll('table', {'class': 'bodyTable'})[0]
result = result.findAll('span', {'style': "font-size:14px"})
return result


def _get_block_item(block) -> dict:
def _get_block_item(block: Parseble) -> Dict[str, Union[str, int, Resource]]:
"""Extract all data (link, title, description) from block"""
resource = Resource.objects.get(title='PythonWeekly')

# Handle BeautifulSoup element
if isinstance(block, Tag):
link = block.findAll('a')[0]
url = link['href']
title = link.string
try:
text = str(block.nextSibling.nextSibling).replace('<br/>', '').strip()
text = str(block.nextSibling.nextSibling)
text = text.replace('<br/>', '').strip()
except AttributeError:
return {}

# Handle BeautifulSoup element
else:
link = block.cssselect('a')[0]
url = link.attrib['href']
title = link.text
_text = block.getnext()
if _text is None:
return {}
text = etree.tostring(block.getnext()).decode('utf-8').replace('<br/>', '').strip()
text = etree.tostring(block.getnext()).decode('utf-8')
text = text.replace('<br/>', '').strip()

return {
'title': title,
Expand Down Expand Up @@ -88,7 +109,8 @@ def main(url):
}
_apply_rules = _apply_rules_wrap(**data)

list(map(save_item, map(_apply_rules, map(_get_block_item, _get_blocks(url)))))
block_items = map(_get_block_item, _get_blocks(url))
list(map(save_item, map(_apply_rules, block_items)))


# Написать тест с использованием ссылки
Expand Down
1 change: 1 addition & 0 deletions digest/management/commands/import_release_news.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def check_previous_news_of_package(news, package_data):
assert items.count() <= 1, 'Many items for library'
return items.count() != 0


def parse_rss():

url = 'https://allmychanges.com/rss/03afbe621916b2f2145f111075db0759/'
Expand Down
10 changes: 6 additions & 4 deletions digest/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@
def build_url(*args, **kwargs):
params = kwargs.pop('params', {})
url = reverse(*args, **kwargs)
if not params: return url
if not params:
return url

query_dict = QueryDict('', mutable=True)
for k, v in params.items():
Expand Down Expand Up @@ -282,7 +283,8 @@ def link_type(self):

@property
def text(self):
if self.article_path is not None and self.article_path and os.path.exists(
nonempty_path = self.article_path is not None and self.article_path
if nonempty_path and os.path.exists(
self.article_path):
with open(self.article_path, 'r') as fio:
result = fio.read()
Expand Down Expand Up @@ -335,8 +337,8 @@ def internal_link(self):

@property
def tags_as_links(self):
return [(x.name, build_url('digest:feed', params={'tag': x.name})) for x
in self.tags.all()]
return [(x.name, build_url('digest:feed', params={'tag': x.name}))
for x in self.tags.all()]

@property
def tags_as_str(self):
Expand Down
15 changes: 11 additions & 4 deletions digest/pub_digest.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
from twx.botapi import TelegramBot


def init_auth(consumer_key, consumer_secret, access_token, access_token_secret):
def init_auth(consumer_key,
consumer_secret,
access_token,
access_token_secret):
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
Expand Down Expand Up @@ -90,7 +93,8 @@ def send_message(self, room, text):
'Authorization': 'Bearer {0}'.format(self.token),
}
room_id = self.room_id_dict.get(room)
url = 'https://api.gitter.im/v1/rooms/{room_id}/chatMessages'.format(room_id=room_id)
url = 'https://api.gitter.im/v1/rooms/{room_id}/chatMessages'
url = url.format(room_id=room_id)
payload = {'text': text}
r = requests.post(url, data=json.dumps(payload), headers=headers)

Expand Down Expand Up @@ -175,7 +179,9 @@ def pub_to_vk_users(text, api):
def pub_to_vk_groups(text, attachments, api):
for groupd_id, from_group in get_pydigest_groups():
print(groupd_id, from_group)
res = post_to_wall(api, groupd_id, text, **{'attachments': attachments, 'from_group': from_group})
res = post_to_wall(api, groupd_id, text,
**{'attachments': attachments,
'from_group': from_group})
print(res)
time.sleep(1)

Expand All @@ -192,7 +198,8 @@ def pub_to_telegram(text, bot_token, tg_channel):


def pub_to_slack(text, digest_url, digest_image_url, ifttt_key):
url = 'https://maker.ifttt.com/trigger/pub_digest/with/key/{0}'.format(ifttt_key)
url = 'https://maker.ifttt.com/trigger/pub_digest/with/key/{0}'
url = url.format(ifttt_key)

data = {
'value1': text,
Expand Down
10 changes: 8 additions & 2 deletions digest/urls.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
from django.conf.urls import url

from .views import IssuesList, \
IssueView, ItemView, AddNews, NewsList, get_items_json, ItemsByTagView
from .views import (
IssuesList,
IssueView,
ItemView,
AddNews,
NewsList,
get_items_json,
)

app_name = 'digest'
urlpatterns = [
Expand Down
6 changes: 4 additions & 2 deletions digest/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@


def write_fixture(name: str, data: Any, mode='wb') -> None:
with open(os.path.join(os.path.dirname(__file__), 'tests', name), mode) as fio:
path = os.path.join(os.path.dirname(__file__), 'tests', name)
with open(path, mode) as fio:
fio.write(data)


def read_fixture(name: str, mode='rb'):
with open(os.path.join(os.path.dirname(__file__), 'tests', name), mode) as fio:
path = os.path.join(os.path.dirname(__file__), 'tests', name)
with open(path, mode) as fio:
return fio.read()


Expand Down
5 changes: 4 additions & 1 deletion jobs/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,15 @@ class JobFeedAdmin(admin.ModelAdmin):
link_html.allow_tags = True
link_html.short_description = 'Ссылка'


class RejectedListAdmin(admin.ModelAdmin):
pass


class AcceptedListAdmin(admin.ModelAdmin):
pass


class JobItemAdmin(admin.ModelAdmin):
list_display = (
'title',
Expand All @@ -43,4 +46,4 @@ class JobItemAdmin(admin.ModelAdmin):
admin.site.register(JobItem, JobItemAdmin)
admin.site.register(JobFeed, JobFeedAdmin)
admin.site.register(RejectedList, RejectedListAdmin)
admin.site.register(AcceptedList, AcceptedListAdmin)
admin.site.register(AcceptedList, AcceptedListAdmin)
6 changes: 3 additions & 3 deletions jobs/management/commands/import_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,9 @@ def import_jobs_rss():
filter(excl_filter,
map(make_validate_dict,
map(prepare_link_title,
filter(is_new_job,
join(
map(get_rss_items, job_feeds)))))))
filter(is_new_job,
join(
map(get_rss_items, job_feeds)))))))
for x in items:
save_job(x)

Expand Down
20 changes: 14 additions & 6 deletions jobs/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class Meta:
verbose_name = 'Источник импорта вакансий'
verbose_name_plural = 'Источники импорта вакансий'


class RejectedList(models.Model):
title = models.CharField('Строка', max_length=255)

Expand All @@ -42,6 +43,7 @@ class Meta:
verbose_name = 'Слисок исключения'
verbose_name_plural = 'Строки для исключения'


class AcceptedList(models.Model):
title = models.CharField('Строка', max_length=255)

Expand All @@ -52,6 +54,7 @@ class Meta:
verbose_name = 'Слисок одобрения'
verbose_name_plural = 'Строки для одобрения'


class JobItem(models.Model):
title = models.CharField('Название', max_length=255)
link = models.URLField('Ссылка')
Expand All @@ -60,9 +63,12 @@ class JobItem(models.Model):
null=True,
blank=True)

created_at = models.DateTimeField('Дата создания', auto_now_add=True, null=True, blank=True)
updated_at = models.DateTimeField('Дата обновления', auto_now=True, null=True, blank=True)
published_at = models.DateTimeField('Дата публикации', null=True, editable=False)
created_at = models.DateTimeField('Дата создания', auto_now_add=True,
null=True, blank=True)
updated_at = models.DateTimeField('Дата обновления', auto_now=True,
null=True, blank=True)
published_at = models.DateTimeField('Дата публикации', null=True,
editable=False)

src_id = models.CharField('ID в источнике', max_length=50, null=True,
blank=True)
Expand All @@ -83,10 +89,12 @@ class JobItem(models.Model):
salary_currency = models.CharField('Валюта', max_length=255, null=True,
blank=True)

def get_salary_str(self):
def get_salary_str(self) -> str:
result = ''
result += ' от %s' % format_currency(self.salary_from) if self.salary_from else ''
result += ' до %s' % format_currency(self.salary_till) if self.salary_till else ''
low_limit = format_currency(self.salary_from) if self.salary_from else ''
high_limit = format_currency(self.salary_till) if self.salary_till else ''
result += ' от {low}'.format(low=low_limit)
result += ' до {high}'.format(high=high_limit)
result += ' ' + self.salary_currency if self.salary_currency else ''
return result

Expand Down
Loading

0 comments on commit ad18777

Please sign in to comment.