Skip to content

Commit

Permalink
Updated parser.
Browse files Browse the repository at this point in the history
  • Loading branch information
sidneycadot committed Sep 27, 2015
1 parent ca663e4 commit 565db52
Show file tree
Hide file tree
Showing 4 changed files with 347 additions and 265 deletions.
27 changes: 20 additions & 7 deletions OeisEntry.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,24 @@

class OeisEntry:
def __init__(self, oeis_id, identification, values, name, offset, keywords):
self.oeis_id = oeis_id
self.identification = identification
self.values = values
self.name = name
self.offset = offset
self.keywords = keywords
def __init__(self, oeis_id, identification, values, name, comments, detailed_references, links, formulas, examples,
maple_programs, mathematica_programs, other_programs, cross_references, keywords, offset_a, offset_b, author, extensions_and_errors):
self.oeis_id = oeis_id
self.identification = identification
self.values = values
self.name = name
self.comments = comments
self.detailed_references = detailed_references
self.links = links
self.formulas = formulas
self.examples = examples
self.maple_programs = maple_programs
self.mathematica_programs = mathematica_programs
self.other_programs = other_programs
self.cross_references = cross_references
self.keywords = keywords
self.offset_a = offset_a
self.offset_b = offset_b
self.author = author
self.extensions_and_errors = extensions_and_errors
def __str__(self):
return "A{:06d}".format(self.oeis_id)
1 change: 1 addition & 0 deletions check_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def main():
filename = "oeis_with_bfile.pickle"
filename = "oeis_with_bfile-10000.pickle"
filename = "oeis_v20150919.pickle"
filename = "oeis.pickle"

with open(filename, "rb") as f:
oeis_entries = pickle.load(f)
Expand Down
44 changes: 11 additions & 33 deletions fetch_oeis_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@
import lzma
import concurrent.futures

from timer import start_timer
from fetch_remote_oeis_entry import fetch_remote_oeis_entry, BadOeisResponse
from timer import start_timer
from exit_scope import close_when_done, shutdown_when_done

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -117,8 +118,8 @@ def process_responses(dbconn, responses):

processed_entries = set()

dbcursor = dbconn.cursor()
try:
with close_when_done(dbconn.cursor()) as dbcursor:

for response in responses:

if response is None:
Expand Down Expand Up @@ -156,9 +157,6 @@ def process_responses(dbconn, responses):

processed_entries.add(response.oeis_id)

finally:
dbcursor.close()

dbconn.commit()

logger.info("Processed {} responses (failures: {}, new: {}, identical: {}, updated: {}).".format(len(responses), countFailures, countNewEntries, countIdenticalEntries, countUpdatedEntries))
Expand Down Expand Up @@ -222,12 +220,9 @@ def fetch_entries_into_database(dbconn, entries):
def make_database_complete(dbconn, highest_oeis_id):
"""Fetch all entries from the remote OEIS database that are not yet present in the local SQLite database."""

dbcursor = dbconn.cursor()
try:
with close_when_done(dbconn.cursor()) as dbcursor:
dbcursor.execute("SELECT oeis_id FROM oeis_entries;")
present_entries = dbcursor.fetchall()
finally:
dbcursor.close()

present_entries = [oeis_id for (oeis_id, ) in present_entries]
logger.info("Entries present in local database: {}.".format(len(present_entries)))
Expand All @@ -242,12 +237,9 @@ def make_database_complete(dbconn, highest_oeis_id):
def update_database_entries_randomly(dbconn, howmany):
"""Re-fetch (update) a random subset of entries that are already present in the local SQLite database."""

dbcursor = dbconn.cursor()
try:
with close_when_done(dbconn.cursor()) as dbcursor:
dbcursor.execute("SELECT oeis_id FROM oeis_entries;")
present_entries = dbcursor.fetchall()
finally:
dbcursor.close()

present_entries = [oeis_id for (oeis_id, ) in present_entries]

Expand All @@ -274,13 +266,10 @@ def update_database_entries_by_priority(dbconn, howmany):

t_current = time.time()

dbcursor = dbconn.cursor()
try:
with close_when_done(dbconn.cursor()) as dbcursor:
query = "SELECT oeis_id FROM oeis_entries ORDER BY (? - t2) / max(t2 - t1, 1e-6) DESC LIMIT ?;"
dbcursor.execute(query, (t_current, howmany))
highest_priority_entries = dbcursor.fetchall()
finally:
dbcursor.close()

highest_priority_entries = [oeis_id for (oeis_id, ) in highest_priority_entries]

Expand All @@ -291,12 +280,9 @@ def update_database_entries_by_priority(dbconn, howmany):
def update_database_entries_for_nonzero_time_window(dbconn):
""" Re-fetch entries in the database that have a 0-second time window. These are entries that have been fetched only once."""

dbcursor = dbconn.cursor()
try:
with close_when_done(dbconn.cursor()) as dbcursor:
dbcursor.execute("SELECT oeis_id FROM oeis_entries WHERE t1 = t2;")
zero_timewindow_entries = dbcursor.fetchall()
finally:
dbcursor.close()

zero_timewindow_entries = [oeis_id for (oeis_id, ) in zero_timewindow_entries]

Expand Down Expand Up @@ -349,11 +335,8 @@ def consolidate_database_daily(database_filename, remove_stale_files_flag):
logger.info("Consolidating database to '{}' ...".format(xz_filename))

# Vacuum the database
dbconn = sqlite3.connect(database_filename)
try:
with close_when_done(sqlite3.connect(database_filename)) as dbconn:
vacuum_database(dbconn)
finally:
dbconn.close()

# Create the xz file.
compress_file(database_filename, xz_filename)
Expand All @@ -374,15 +357,12 @@ def database_update_cycle(database_filename):

highest_oeis_id = find_highest_oeis_id() # Check OEIS server for highest entry ID.

dbconn = sqlite3.connect(database_filename)
try:
with close_when_done(sqlite3.connect(database_filename)) as dbconn:
ensure_database_schema_created(dbconn)
make_database_complete(dbconn, highest_oeis_id) # Make sure we have all entries (full fetch on first run).
update_database_entries_randomly(dbconn, highest_oeis_id // 1000) # Refresh 0.1 % of entries randomly.
update_database_entries_by_priority(dbconn, highest_oeis_id // 200) # Refresh 0.5 % of entries by priority.
update_database_entries_for_nonzero_time_window(dbconn) # Make sure we have t1 != t2 for all entries (full fetch on first run).
finally:
dbconn.close()

consolidate_database_daily(database_filename, remove_stale_files_flag = False)

Expand All @@ -408,10 +388,8 @@ def main():
FORMAT = "%(asctime)-15s | %(levelname)-8s | %(message)s"
logging.basicConfig(format = FORMAT, level = logging.DEBUG)

try:
with shutdown_when_done(logging):
database_update_cycle_loop(database_filename)
finally:
logging.shutdown()

if __name__ == "__main__":
main()
Loading

0 comments on commit 565db52

Please sign in to comment.