Skip to content

Commit

Permalink
Merge pull request #36 from justjasongreen/seed
Browse files Browse the repository at this point in the history
Implement Seed Command (closes #25)
  • Loading branch information
justjasongreen committed Jul 29, 2016
2 parents 1bbd855 + 5190f55 commit 98c19e6
Show file tree
Hide file tree
Showing 9 changed files with 73 additions and 20 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ This project adheres to [Semantic Versioning](http://semver.org/).
- Add the Sample class (from @justjasongreen)
- Implement Provider.get_sample_by_runner method (from @justjasongreen)
- Extend racing_data.Race objects with a sample property (from @justjasongreen)
- Implement seed command line utility (from @justjasongreen)

### Changed
- Log the item associated with an exception in process_collection (from @justjasongreen)
Expand Down
10 changes: 10 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,16 @@ The -r (or --redis-uri=) option can be used to specify a URI for a redis server
The -q and -v (or --quiet and --verbose) options can be used to control the logging output generated by the scrape command. When the -q option is used, the logging level will be set to logging.WARNING. When the -v option is used, the logging level will be set to logging.DEBUG. By default, the logging level will be set to logging.INFO.


Seed
====

The 'seed' command line utility can be used to pre-seed query data for runners in the database. The syntax of the seed command is:

seed [-b] [-d <database_uri>] [-q] [-r <redis_uri>] [-v] date_from [date_to]

The application of the various command line options and arguments is the same as for the 'scrape' command described above.


***********************
Development and Testing
***********************
Expand Down
3 changes: 1 addition & 2 deletions predictive_punter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@

from . import race
from . import runner

from .sample import Sample

from .provider import Provider

from .command import Command
from .scrape import ScrapeCommand
from .seed import SeedCommand
4 changes: 2 additions & 2 deletions predictive_punter/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
from lxml import html
import punters_client
import pymongo
import racing_data
import redis
import requests

from . import Provider
from .date_utils import *
from .profiling_utils import *

Expand Down Expand Up @@ -87,7 +87,7 @@ def __init__(self, *args, **kwargs):

scraper = punters_client.Scraper(http_client, html_parser)

self.provider = racing_data.Provider(self.database, scraper)
self.provider = Provider(self.database, scraper)

def backup_database(self):
"""Backup the database if backup_database is available"""
Expand Down
25 changes: 12 additions & 13 deletions predictive_punter/race.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,15 @@ def get_combinations(results):
for next_combination in next_combinations:
combinations.append([item] + next_combination)

dupes = []
for index in range(len(combinations)):
for item in combinations[index]:
if len([combo_item for combo_item in combinations[index] if combo_item == item]) > 1:
dupes.append(index)
break
for index in sorted(dupes, reverse=True):
del combinations[index]

return combinations

results = []
Expand All @@ -44,19 +53,9 @@ def get_combinations(results):

for index in range(len(results) - 1):
if len(results[index + 1]) < 1:
results[index + 1] = list(*results[index])

combinations = get_combinations(results)
dupes = []
for index in range(len(combinations)):
for item in combinations[index]:
if len([combo_item for combo_item in combinations[index] if combo_item == item]) > 1:
dupes.append(index)
break
for index in sorted(dupes, reverse=True):
combinations.removeAt(index)

return [tuple(combination) for combination in combinations]
results[index + 1] = list(results[index])

return [tuple(combination) for combination in get_combinations(results)]

racing_data.Race.get_winning_combinations = get_winning_combinations

Expand Down
5 changes: 3 additions & 2 deletions predictive_punter/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def calculate_expected_time(momentum):
if momentum is not None:
return self.actual_distance / (momentum / self.actual_weight)

return tuple([calculate_expected_time(momentum) for momentum in getattr(self, performance_list_name).momentums])
return tuple([calculate_expected_time(momentum) for momentum in getattr(self, performance_list_name).momentums]) if self.actual_distance is not None else tuple([None, None, None])

racing_data.Runner.calculate_expected_times = calculate_expected_times

Expand All @@ -17,7 +17,8 @@ def calculate_expected_time(momentum):
def races_per_year(self):
"""Return total number of career starts for the horse divded by the horse's age as at the race date"""

return self.career.starts / self.age
if self.age is not None and self.age > 0:
return self.career.starts / self.age

racing_data.Runner.races_per_year = races_per_year

Expand Down
21 changes: 21 additions & 0 deletions predictive_punter/seed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import sys

from . import Command


class SeedCommand(Command):
"""Command line utility to pre-seed query data for all active runners in a specified date range"""

def process_runner(self, runner):
"""Extend the process_runner method to generate a sample if necessary"""

super().process_runner(runner)

if runner['is_scratched'] == False:
runner.sample.normalized_query_data


def main():
"""Main entry point for seed console script"""

ScrapeCommand.main(sys.argv[1:])
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@
data_files=[],
entry_points={
'console_scripts': [
'scrape=predictive_punter.scrape:main'
'scrape=predictive_punter.scrape:main',
'seed=predictive_punter.seed:main'
]
}
)
21 changes: 21 additions & 0 deletions tests/seed_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import predictive_punter
import pytest


@pytest.fixture(scope='module')
def seed_command(database_uri):

predictive_punter.SeedCommand.main(['-d', database_uri, '2016-2-1', '2016-2-2'])


def test_samples(database, seed_command):
"""The seed command should populate the database with the expected number of samples"""

assert database['samples'].count() == database['runners'].count({'is_scratched': False})


def test_values(database, seed_command):
"""The seed command should set normalized query data values for all samples"""

for sample in database['samples'].find():
assert sample['normalized_query_data'] is not None

0 comments on commit 98c19e6

Please sign in to comment.