Skip to content

Commit

Permalink
fix: fix error in value selector
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoBousseau committed Aug 1, 2022
1 parent debdae8 commit b44c7f1
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 10 deletions.
29 changes: 20 additions & 9 deletions src/data_selector/selector.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from typing import Any
import pandas as pd
from pandas import DataFrame as df
import os
Expand Down Expand Up @@ -27,11 +28,13 @@ def select(
This function handles the interaction with the
user for the choices.
"""

if data_frame is None:
data_frame = pd.read_csv(input_file, nrows=nb_rows, engine='python', sep=file_sep)

if len(data_frame) < nb_rows and (len(data_frame) - nb_rows) > 0:
logging.warning(str(nb_rows - len(data_frame)) + " rows were lost during file reading.")
logging.warning(str(nb_rows - len(data_frame))
+ " rows were lost during file reading.")
cols_number: int = len(data_frame.columns)

if path_columns_to_keep is not None:
Expand All @@ -43,7 +46,10 @@ def select(
col_size_change: int = len(param_dict['column_names'].keys())
kpi = (len(data_frame.columns) / (col_size_change)) * 100
if kpi < 100.0:
logging.warning(str(100.0 - kpi)[:6] + "% of the data was lost.")
logging.warning(str(100.0 - kpi)[:6]
+ "% of the data was lost. Ignore "
+ "this warnings if you have "
+ "truncated the dataset.")

if path_columns_to_delete is not None:

Expand Down Expand Up @@ -174,14 +180,19 @@ def select_data_and_column(

data_frame = data_frame.reindex(columns=param_dict["column_names"].keys())
try:
df_res: df = pd.DataFrame()
list_inter_value = []
list_inter_column = []
list_of_delt_values: list[Any] = []
df_res: pd.DataFrame = pd.DataFrame()
list_inter_value: list[Any] = []
list_inter_column: list[Any] = []

for column in param_dict['column_names'].keys():
for val in param_dict["column_names"][column]['value']:
list_inter_value.append(data_frame[data_frame[column] == val])
list_inter_column.append(pd.concat(list_inter_value))
list_inter_value = []
for val in param_dict['column_names'][column]['value']:
if val not in list_of_delt_values:
list_inter_value.append(data_frame[data_frame[column] == val])
list_of_delt_values.append(val)
list_of_delt_values = []
list_inter_column.append(pd.concat(list_inter_value).drop_duplicates())
list_inter_value = []

df_res = pd.concat(list_inter_column)
return df_res
Expand Down
2 changes: 1 addition & 1 deletion tests/test_data_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test_select_data_and_column():
# assert
assert str(list_of_names) != str(data_frame.columns.to_list())
assert data_frame.columns.to_list() == ['SONG_RAW', 'Song_Clean', 'First?', 'temp_test']
assert number_of_rows_control <= len(data_frame.index)
assert number_of_rows_control >= len(data_frame.index)


def test_version_displays_library_version():
Expand Down

0 comments on commit b44c7f1

Please sign in to comment.