diff --git a/apps/grepo_base/backends/__init__.py b/apps/grepo_base/backends/__init__.py index bde31ea..47196cf 100644 --- a/apps/grepo_base/backends/__init__.py +++ b/apps/grepo_base/backends/__init__.py @@ -15,11 +15,6 @@ Updates a given :class:`~grepo_base.models.Repository` object, **without** saving it to the database. - - .. function:: needs_update(repository) - - Returns ``True`` if a given :class:`~grepo_base.models.Repository` - instance needs to be updated and ``False`` otherwise. """ from django.conf import settings diff --git a/apps/grepo_base/backends/github.py b/apps/grepo_base/backends/github.py index 0c26ba8..072b816 100644 --- a/apps/grepo_base/backends/github.py +++ b/apps/grepo_base/backends/github.py @@ -3,57 +3,76 @@ grepo_base.github ~~~~~~~~~~~~~~~~~ + Github.com backend for `grepo`. """ -from datetime import datetime - -from json import loads -from itertools import count +import itertools from httplib import HTTPConnection +from datetime import datetime from django.conf import settings +from django.utils import simplejson as json + +from grepo_base.models import Repository, Language -from grepo_base.models import Repository #: Github api search path SEARCH_PATH = "/api/v2/json/repos/search/language:{lang}?start_page={page}" #: Github host, used for httplib for connect to api GITHUB = "github.com" -#: List of languages that we will be processed -LANGUAGES = [lang[0].upper() + lang[1:] for lang in settings.GREPO_LANGUAGES] -def get_page(lang, page): +def fetch(language, page): connection = HTTPConnection(GITHUB) - connection.request("GET", SEARCH_PATH.format(lang=lang, page=page)) + connection.request("GET", SEARCH_PATH.format(lang=language, page=page)) response = connection.getresponse() - return response + foo = response.read() + return foo + + +def list(): + """Yields all repositories one by one.""" + for language in Language.objects.all(): + for page in itertools.count(1): + data = fetch(language.name, page) + repositories = json.loads(data)["repositories"] + + if not repositories: + break + + for repository in repositories: + if not repository["language"]: + continue + # Note: `source` and `language` field should be handled + # by the caller. + yield { + "url": repository["url"], + "name": repository["name"], + "language": repository["language"], + "summary": repository["description"], + "score": calculate_repository_score(repository), + "updated_at": datetime.utcnow(), + "created_at": datetime.utcnow() + } -def fetch_by_lang(lang): - for page in count(1): - page = get_page(lang, page) - repositories = loads(page.read())["repositories"] - if not repositories: - break - for repository in repositories: - yield repository +def update(repository): + return repository # A simple pass-through for now. -def fetch_repositories(): - for lang in LANGUAGES: - for repository in fetch_by_lang(lang): - yield repository +def calculate_repository_score(data): + """Calculates and returns Grepo-score for a given repository. -def save_repository(repository): - obj = Repository(url=repository["url"], name=repository["name"], - source=0, - language=LANGUAGES.index(repository["language"]), - summary=repository.get("description"), - updated_at=datetime.utcnow(), created_at=datetime.utcnow()) - obj.save() + .. todo:: query for pull requests and add them to the exponent + argument. + """ + parse = lambda d: datetime.strptime(d, "%Y/%m/%dT %H:%M:%S %z") + data.update( + created_at=parse(data["created_at"]), + pushed_at=parse(data["pushed_at"]) + ) -def rescan_github(): - for repository in fetch_repositories(): - save_repository(repository) + return (data["created_at"] - data["pushed_at"]) * math.exp( + 1 / (data["open_issues"] + data["watchers"] / data["forks"]) + ) diff --git a/apps/grepo_base/backends/lp.py b/apps/grepo_base/backends/lp.py index 68d96ba..99510f7 100644 --- a/apps/grepo_base/backends/lp.py +++ b/apps/grepo_base/backends/lp.py @@ -55,7 +55,7 @@ def get_repos(): yield get_project_info(project.name, language_list) def get_project_info(name, language_list): - project = launchpad.projects[name] + project = launchpad.projects[name] info = {} info['name'] = name info['created_at'] = project.date_created @@ -102,7 +102,7 @@ def get_project_languages(language_string): languages = language_string.split(r'/') if len(languages) == 1: languages = language_string.split(r' ') - + language_list = [] for l in languages: language = guess_language(l) @@ -110,4 +110,3 @@ def get_project_languages(language_string): language_list.append(language) return set(language_list) - diff --git a/apps/grepo_base/models.py b/apps/grepo_base/models.py index f517b00..038e368 100644 --- a/apps/grepo_base/models.py +++ b/apps/grepo_base/models.py @@ -64,8 +64,3 @@ def __unicode__(self): def save(self): self.updated_at = datetime.now() super(Repository, self).save() - - -def calculate_grepo_score(): - """Returns `grepo` score for a bunch of repository metadata.""" - return 0