Skip to content

Commit

Permalink
feat(clean): add clean_text function
Browse files Browse the repository at this point in the history
  • Loading branch information
atol committed Apr 21, 2021
1 parent 654ce0d commit 55d3ae9
Show file tree
Hide file tree
Showing 5 changed files with 1,245 additions and 2 deletions.
4 changes: 2 additions & 2 deletions dataprep/assets/english_stopwords.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
english_stopwords = [
english_stopwords = {
"i",
"me",
"my",
Expand Down Expand Up @@ -178,4 +178,4 @@
"won't",
"wouldn",
"wouldn't",
]
}
4 changes: 4 additions & 0 deletions dataprep/clean/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@

from .clean_df import clean_df

from .clean_text import clean_text, default_text_pipeline


__all__ = [
"clean_lat_long",
Expand All @@ -50,4 +52,6 @@
"clean_currency",
"validate_currency",
"clean_df",
"clean_text",
"default_text_pipeline",
]

0 comments on commit 55d3ae9

Please sign in to comment.