tensorflow · laxmareddyp · Dec 6, 2023 · Feb 28, 2023
@@ -89,7 +89,7 @@
 
 
 def _download_and_clean(dataset, data_dir):
-  """Download MovieLens dataset in a standard format.
+  """Download the MovieLens dataset in a standard format.
 
   This function downloads the specified MovieLens format and coerces it into a
   standard format. The only difference between the ml-1m and ml-20m datasets
@@ -148,10 +148,10 @@ def _transform_csv(input_path, output_path, names, skip_first, separator=","):
 
   Args:
     input_path: The path of the raw csv.
-    output_path: The path of the cleaned csv.
-    names: The csv column names.
-    skip_first: Boolean of whether to skip the first line of the raw csv.
-    separator: Character used to separate fields in the raw csv.
+    output_path: The location of the cleaned csv file.
+    names: The names of the csv columns.
+    skip_first: Boolean indicating whether the first line of the raw csv should be skipped.
+    separator: A character used in raw csv to separate fields.
   """
   if six.PY2:
     names = [six.ensure_text(n, "utf-8") for n in names]
@@ -179,17 +179,17 @@ def _regularize_1m_dataset(temp_dir):
   ratings.dat
     The file has no header row, and each line is in the following format:
     UserID::MovieID::Rating::Timestamp
-      - UserIDs range from 1 and 6040
-      - MovieIDs range from 1 and 3952
+      - UserIDs range between 1 and 6040
+      - MovieIDs can range between 1 and 3952
       - Ratings are made on a 5-star scale (whole-star ratings only)
-      - Timestamp is represented in seconds since midnight Coordinated Universal
+      - Timestamp is represented in seconds since midnight. Coordinated Universal
         Time (UTC) of January 1, 1970.
       - Each user has at least 20 ratings
 
   movies.dat
     Each line has the following format:
     MovieID::Title::Genres
-      - MovieIDs range from 1 and 3952
+      - MovieIDs can range between 1 and 3952
   """
   working_dir = os.path.join(temp_dir, ML_1M)
 
@@ -223,7 +223,7 @@ def _regularize_20m_dataset(temp_dir):
   movies.csv
     Each line has the following format:
     MovieID,Title,Genres
-      - MovieIDs range from 1 and 3952
+      - MovieIDs can range between 1 and 3952
   """
   working_dir = os.path.join(temp_dir, ML_20M)
 
@@ -265,7 +265,7 @@ def csv_to_joint_dataframe(data_dir, dataset):
 
 
 def integerize_genres(dataframe):
-  """Replace genre string with a binary vector.
+  """Replace the genre string with a binary vector.
 
   Args:
     dataframe: a pandas dataframe of movie data.
@@ -308,7 +308,7 @@ def define_data_download_flags():
 
 
 def main(_):
-  """Download and extract the data from GroupLens website."""
+  """Download and extract the data from the GroupLens website."""
   download(flags.FLAGS.dataset, flags.FLAGS.data_dir)
 
 

@@ -191,7 +191,7 @@ def define_ncf_flags():
       default=None,
       help=flags_core.help_wrap(
           "The batch size used for evaluation. This should generally be larger"
-          "than the training batch size as the lack of back propagation during"
+          "than the training batch size, as the lack of back propagation during"
           "evaluation can allow for larger batch sizes to fit in memory. If not"
           "specified, the training batch size (--batch_size) will be used."))
 
@@ -257,7 +257,7 @@ def define_ncf_flags():
           "If passed, training will stop when the evaluation metric HR is "
           "greater than or equal to hr_threshold. For dataset ml-1m, the "
           "desired hr_threshold is 0.68 which is the result from the paper; "
-          "For dataset ml-20m, the threshold can be set as 0.95 which is "
+          "For the dataset ml-20m, the threshold can be set as 0.95 which is "
           "achieved by MLPerf implementation."))
 
   flags.DEFINE_enum(
@@ -308,7 +308,7 @@ def define_ncf_flags():
           "If set, output the MLPerf compliance logging. This is only useful "
           "if one is running the model for MLPerf. See "
           "https://github.com/mlperf/policies/blob/master/training_rules.adoc"
-          "#submission-compliance-logs for details. This uses sudo and so may "
+          "#submission-compliance-logs for details. This uses sudo, and so it may "
           "ask for your password, as root access is needed to clear the system "
           "caches, which is required for MLPerf compliance."))