Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions official/recommendation/movielens.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@


def _download_and_clean(dataset, data_dir):
"""Download MovieLens dataset in a standard format.
"""Download the MovieLens dataset in a standard format.

This function downloads the specified MovieLens format and coerces it into a
standard format. The only difference between the ml-1m and ml-20m datasets
Expand Down Expand Up @@ -148,10 +148,10 @@ def _transform_csv(input_path, output_path, names, skip_first, separator=","):

Args:
input_path: The path of the raw csv.
output_path: The path of the cleaned csv.
names: The csv column names.
skip_first: Boolean of whether to skip the first line of the raw csv.
separator: Character used to separate fields in the raw csv.
output_path: The location of the cleaned csv file.
names: The names of the csv columns.
skip_first: Boolean indicating whether the first line of the raw csv should be skipped.
separator: A character used in raw csv to separate fields.
"""
if six.PY2:
names = [six.ensure_text(n, "utf-8") for n in names]
Expand Down Expand Up @@ -179,17 +179,17 @@ def _regularize_1m_dataset(temp_dir):
ratings.dat
The file has no header row, and each line is in the following format:
UserID::MovieID::Rating::Timestamp
- UserIDs range from 1 and 6040
- MovieIDs range from 1 and 3952
- UserIDs range between 1 and 6040
- MovieIDs can range between 1 and 3952
- Ratings are made on a 5-star scale (whole-star ratings only)
- Timestamp is represented in seconds since midnight Coordinated Universal
- Timestamp is represented in seconds since midnight. Coordinated Universal
Time (UTC) of January 1, 1970.
- Each user has at least 20 ratings

movies.dat
Each line has the following format:
MovieID::Title::Genres
- MovieIDs range from 1 and 3952
- MovieIDs can range between 1 and 3952
"""
working_dir = os.path.join(temp_dir, ML_1M)

Expand Down Expand Up @@ -223,7 +223,7 @@ def _regularize_20m_dataset(temp_dir):
movies.csv
Each line has the following format:
MovieID,Title,Genres
- MovieIDs range from 1 and 3952
- MovieIDs can range between 1 and 3952
"""
working_dir = os.path.join(temp_dir, ML_20M)

Expand Down Expand Up @@ -265,7 +265,7 @@ def csv_to_joint_dataframe(data_dir, dataset):


def integerize_genres(dataframe):
"""Replace genre string with a binary vector.
"""Replace the genre string with a binary vector.

Args:
dataframe: a pandas dataframe of movie data.
Expand Down Expand Up @@ -308,7 +308,7 @@ def define_data_download_flags():


def main(_):
"""Download and extract the data from GroupLens website."""
"""Download and extract the data from the GroupLens website."""
download(flags.FLAGS.dataset, flags.FLAGS.data_dir)


Expand Down
6 changes: 3 additions & 3 deletions official/recommendation/ncf_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def define_ncf_flags():
default=None,
help=flags_core.help_wrap(
"The batch size used for evaluation. This should generally be larger"
"than the training batch size as the lack of back propagation during"
"than the training batch size, as the lack of back propagation during"
"evaluation can allow for larger batch sizes to fit in memory. If not"
"specified, the training batch size (--batch_size) will be used."))

Expand Down Expand Up @@ -257,7 +257,7 @@ def define_ncf_flags():
"If passed, training will stop when the evaluation metric HR is "
"greater than or equal to hr_threshold. For dataset ml-1m, the "
"desired hr_threshold is 0.68 which is the result from the paper; "
"For dataset ml-20m, the threshold can be set as 0.95 which is "
"For the dataset ml-20m, the threshold can be set as 0.95 which is "
"achieved by MLPerf implementation."))

flags.DEFINE_enum(
Expand Down Expand Up @@ -308,7 +308,7 @@ def define_ncf_flags():
"If set, output the MLPerf compliance logging. This is only useful "
"if one is running the model for MLPerf. See "
"https://github.com/mlperf/policies/blob/master/training_rules.adoc"
"#submission-compliance-logs for details. This uses sudo and so may "
"#submission-compliance-logs for details. This uses sudo, and so it may "
"ask for your password, as root access is needed to clear the system "
"caches, which is required for MLPerf compliance."))

Expand Down