Skip to content

Commit

Permalink
#19 related. Fixed: ignore case of clustering methods for dataset WIT…
Browse files Browse the repository at this point in the history
…HOUT HLA
  • Loading branch information
nicolay-r committed Jul 27, 2023
1 parent 410b030 commit fa604e4
Showing 1 changed file with 8 additions and 4 deletions.
12 changes: 8 additions & 4 deletions my_s5_parlai_dataset_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,13 @@ def iter_dataset_lines(dataset_source, traits_func, candidates_provider, candida
else traits_provider["original"](your_id, partner_id)
}

dataset_lines_iter = MyAPI.read_dataset(
keep_usep=False, split_meta=True, dataset_filepath=MyAPI.dataset_filepath)

candidates_provider = {
#"_no-cands": None,

"": SameBookRandomCandidatesProvider(
iter_dialogs=MyAPI.iter_dataset_as_dialogs(dataset_lines_iter),
iter_dialogs=MyAPI.iter_dataset_as_dialogs(
MyAPI.read_dataset(keep_usep=False, split_meta=True, dataset_filepath=MyAPI.dataset_filepath)
),
candidates_per_book=1000,
candidates_limit=MyAPI.dataset_candidates_limit),

Expand All @@ -99,6 +98,11 @@ def iter_dataset_lines(dataset_source, traits_func, candidates_provider, candida
for data_fold_type, data_fold_source in dataset_filepaths.items():
for trait_type, traits_func in traits_provider.items():
for candidates_type, candidates_dict in candidates_provider.items():

# This type does not makes sense, so we skip such formatting.
if trait_type == "spectrum" and candidates_type == "":
continue

filename = '{}_{}{}.txt'.format(data_fold_type, trait_type, candidates_type)

data_it = iter_dataset_lines(
Expand Down

0 comments on commit fa604e4

Please sign in to comment.