Format with black

myslak71 · Nov 2, 2019 · feefe3d · feefe3d
2 parents 936fccd + d10a971
commit feefe3d
Show file tree

Hide file tree

Showing 2 changed files with 88 additions and 30 deletions.
diff --git a/csv_report_processer/cli.py b/csv_report_processer/cli.py
@@ -26,7 +26,9 @@
 
 
 def get_parser():
-    parser = ArgumentParser(description=description, formatter_class=RawDescriptionHelpFormatter)
+    parser = ArgumentParser(
+        description=description, formatter_class=RawDescriptionHelpFormatter
+    )
     required = parser.add_argument_group('required arguments')
     required.add_argument('-i', '--input', help='Input CSV file path', required=True)
     required.add_argument('-o', '--output', help='Output CSV file path', required=True)

diff --git a/csv_report_processer/report_processer.py b/csv_report_processer/report_processer.py
@@ -12,13 +12,16 @@ class ReportProcesser(object):
     Has one class attribute: _columns which contains column names to
     be used in pandas.DataFrame
     """
+
     _columns = ('date', 'country_code', 'impressions', 'clicks')
 
     def __init__(self):
         """Initialization of the object's DataFrame"""
         self.df = pd.DataFrame()
 
-    def process_csv_report(self, input_path: str, output_path: str, error_path: str = None):
+    def process_csv_report(
+        self, input_path: str, output_path: str, error_path: str = None
+    ):
         """Report processing function.
 
         If possible, converts input file data to specific format and saves to
@@ -43,31 +46,60 @@ def process_csv_report(self, input_path: str, output_path: str, error_path: str
         try:
             self._open_report(input_path)
         except UnicodeError:
-            LOGGER.error('Invalid file encoding - supported encoding: UTF-8, UTF-16\nCould not process the file.')
+            LOGGER.error(
+                'Invalid file encoding - supported encoding: UTF-8, UTF-16\nCould not process the file.'
+            )
         except FileNotFoundError:
-            LOGGER.error(f'Input file {input_path} does not exist\nCould not process the file.')
+            LOGGER.error(
+                f'Input file {input_path} does not exist\nCould not process the file.'
+            )
         else:
             self._convert_data()
 
             df_error = self.df[self.df['error'] == 1]
-            df_valid = self.df[self.df['error'] != 1].groupby(['date', 'country_code'], as_index=False) \
-                                                     .agg(self._aggregate_function)
+            df_valid = (
+                self.df[self.df['error'] != 1]
+                .groupby(['date', 'country_code'], as_index=False)
+                .agg(self._aggregate_function)
+            )
 
             # concatenate valid data frame with error data frame and save it as CSV file
             if df_error.empty or not error_path:
-                pd.concat([df_valid, df_error]).sort_values(by=['date', 'country_code']) \
-                                            .to_csv(output_path, index=False, header=False,
-                                                    columns=self._columns, line_terminator='\n')
+                pd.concat([df_valid, df_error]).sort_values(
+                    by=['date', 'country_code']
+                ).to_csv(
+                    output_path,
+                    index=False,
+                    header=False,
+                    columns=self._columns,
+                    line_terminator='\n',
+                )
                 word = 'out' if df_error.empty else ''
-                LOGGER.info(f'File has been converted with{word} errors and saved at {output_path}')
+                LOGGER.info(
+                    f'File has been converted with{word} errors and saved at {output_path}'
+                )
 
             else:
-                df_valid.to_csv(output_path, index=False, header=False,
-                                columns=self._columns, line_terminator='\n')
-                df_error.to_csv(error_path, index=False, header=False,
-                                columns=self._columns, line_terminator='\n')
-                LOGGER.info(f'File has been converted with errors and saved at {output_path}')
-                LOGGER.info(f'Invalid data has been excluded from the result and saved at {error_path}')
+                df_valid.to_csv(
+                    output_path,
+                    index=False,
+                    header=False,
+                    columns=self._columns,
+                    line_terminator='\n',
+                )
+                df_error.to_csv(
+                    error_path,
+                    index=False,
+                    header=False,
+                    columns=self._columns,
+                    line_terminator='\n',
+                )
+                LOGGER.info(
+                    f'File has been converted with errors and saved at {output_path}'
+                )
+                LOGGER.info(
+                    f'Invalid data has been excluded from the result and saved at {error_path}'
+                )
 
     @staticmethod
     def _aggregate_function(cell: pd.Series) -> np.int64:
@@ -96,11 +128,22 @@ def _open_report(self, input_path: str):
         """
 
         try:
-            self.df = pd.read_csv(input_path, names=self._columns, index_col=False,
-                                  keep_default_na=False, sep=',')
+            self.df = pd.read_csv(
+                input_path,
+                names=self._columns,
+                index_col=False,
+                keep_default_na=False,
+                sep=',',
+            )
         except UnicodeDecodeError:
-            self.df = pd.read_csv(input_path, names=self._columns, index_col=False,
-                                  keep_default_na=False, sep=',', encoding='utf-16')
+            self.df = pd.read_csv(
+                input_path,
+                names=self._columns,
+                index_col=False,
+                keep_default_na=False,
+                sep=',',
+                encoding='utf-16',
+            )
 
     def _convert_data(self):
         """
@@ -109,32 +152,45 @@ def _convert_data(self):
         Tries to convert each cell to corresponding format. If it fails,
         changes row 'error' flag to 1.
         """
-        self.df['country_code'] = self.df['country_code'].apply(self._convert_state_to_country)
+        self.df['country_code'] = self.df['country_code'].apply(
+            self._convert_state_to_country
+        )
 
         self.df['error'] = 0
 
         for row in self.df.itertuples():
             # convert date
             try:
-                self.df.at[row.Index, 'date'] = pd.to_datetime(row.date).strftime('%Y-%m-%d')
+                self.df.at[row.Index, 'date'] = pd.to_datetime(row.date).strftime(
+                    '%Y-%m-%d'
+                )
             except ValueError:
                 LOGGER.error(
-                    f'Row {row.Index}: Following date could not be converted: {self.df.at[row.Index, "date"]}\n')
+                    f'Row {row.Index}: Following date could not be converted: {self.df.at[row.Index, "date"]}\n'
+                )
                 self.df.at[row.Index, 'error'] = 1
 
             # convert impressions and clicks
             try:
                 self.df.at[row.Index, 'impressions'] = int(row.impressions)
-                self.df.at[row.Index, 'clicks'] = float(str(row.clicks).rstrip('%')) / 100
-                self.df.at[row.Index, 'clicks'] = round(self.df.at[row.Index, 'clicks'] * int(row.impressions))
+                self.df.at[row.Index, 'clicks'] = (
+                    float(str(row.clicks).rstrip('%')) / 100
+                )
+                self.df.at[row.Index, 'clicks'] = round(
+                    self.df.at[row.Index, 'clicks'] * int(row.impressions)
+                )
             except Exception as e:
                 if str(e).startswith('invalid literal for int() with base 10: '):
-                    error_message = str(e).replace('invalid literal for int() with base 10: ',
-                                                   f'Row {row.Index}: Following impression number '
-                                                   f'could not be converted: ')
+                    error_message = str(e).replace(
+                        'invalid literal for int() with base 10: ',
+                        f'Row {row.Index}: Following impression number '
+                        f'could not be converted: ',
+                    )
                 else:
-                    error_message = str(e).replace('could not convert string to float: ',
-                                                   f'Row {row.Index}: Following CTR could not be converted: ')
+                    error_message = str(e).replace(
+                        'could not convert string to float: ',
+                        f'Row {row.Index}: Following CTR could not be converted: ',
+                    )
                 self.df.at[row.Index, 'error'] = 1
                 LOGGER.error(error_message)