Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 29 additions & 10 deletions nowcasting_dataset/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,18 +245,37 @@ def sample_spatial_and_temporal_locations_for_examples(
columns: 't0_datetime_UTC', 'x_center_OSGB', 'y_center_OSGB'.
"""
shuffled_t0_datetimes = np.random.choice(t0_datetimes, size=n_examples)
# TODO: Issue #304. Speed this up by splitting the shuffled_t0_datetimes across
# multiple processors. Currently takes about half an hour for 25,000 batches.
# But wait until we've implemented issue #305, as that is likely to be sufficient!
(
x_locations,
y_locations,
) = self.data_source_which_defines_geospatial_locations.get_locations(shuffled_t0_datetimes)

with futures.ThreadPoolExecutor() as executor:
tasks = []

# loop over t0 datetimes
for t0_datetime_UTC in shuffled_t0_datetimes:

# submit task
locations_task = executor.submit(
self.data_source_which_defines_geospatial_locations.get_locations,
t0_datetimes=[t0_datetime_UTC],
)
tasks.append([t0_datetime_UTC, locations_task])

# Collect results from each thread.
x_centers_osgb = []
y_centers_osgb = []
t0_datetimes_utc = []
for t0_datetime_UTC, locations_task in tasks:
x_center_OSGB, y_center_OSGB = locations_task.result()

# current 'x_center_OSGB' and 'y_center_OSGB' are lists
x_centers_osgb.append(x_center_OSGB[0])
y_centers_osgb.append(y_center_OSGB[0])
t0_datetimes_utc.append(t0_datetime_UTC)

return pd.DataFrame(
{
"t0_datetime_UTC": shuffled_t0_datetimes,
"x_center_OSGB": x_locations,
"y_center_OSGB": y_locations,
"t0_datetime_UTC": t0_datetimes_utc,
"x_center_OSGB": x_centers_osgb,
"y_center_OSGB": y_centers_osgb,
}
)

Expand Down