From ed14b558a4a114beafff955b40f1d5787744a931 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Wed, 10 Jan 2024 20:05:28 +0800 Subject: [PATCH] fix csv nan bug --- swift/llm/utils/dataset.py | 2 +- tests/llm/data/alpaca.csv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/swift/llm/utils/dataset.py b/swift/llm/utils/dataset.py index cb3b55331f..ac93b279cb 100644 --- a/swift/llm/utils/dataset.py +++ b/swift/llm/utils/dataset.py @@ -1106,7 +1106,7 @@ def load_dataset_from_local( assert isinstance(dataset_path, str) df: DataFrame if dataset_path.endswith('.csv'): - df = pd.read_csv(dataset_path) + df = pd.read_csv(dataset_path, na_filter=False) elif dataset_path.endswith('.jsonl'): df = transform_jsonl_to_df(read_from_jsonl(dataset_path)) elif dataset_path.endswith('.json'): diff --git a/tests/llm/data/alpaca.csv b/tests/llm/data/alpaca.csv index 9db400b039..d24d1f963c 100644 --- a/tests/llm/data/alpaca.csv +++ b/tests/llm/data/alpaca.csv @@ -1,4 +1,4 @@ instruction,input,output 11111,22222,33333 -aaaaa,bbbbb,ccccc +aaaaa,,ccccc AAAAA,BBBBB,CCCCC