Skip to content

Commit

Permalink
remove dep on ydata-profiling
Browse files Browse the repository at this point in the history
  • Loading branch information
dclong committed Mar 11, 2024
1 parent de1d7bb commit a488d7a
Show file tree
Hide file tree
Showing 3 changed files with 1,114 additions and 2,085 deletions.
36 changes: 0 additions & 36 deletions aiutil/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,39 +45,3 @@ def read_csv(path: str | Path, **kwargs) -> pd.DataFrame:
if path.is_file():
return pd.read_csv(path, **kwargs)
return pd.concat(pd.read_csv(csv, **kwargs) for csv in path.glob("*.csv"))


def dump_profile(df: pd.DataFrame | str | Path, title: str, output_dir: str | Path):
"""Run ydata-profiling on a DataFrame and dump the report into files.
:param df: A pandas DataFrame.
:param title: The title of the generated report.
:param output_dir: The output directory for reports.
:raises ValueError: If an input file other than Parquet/Pickle/CSV is provided.
"""
if isinstance(df, str):
df = Path(df)
if isinstance(df, Path):
logger.info("Reading the DataFrame from {}...", df)
ext = df.suffix.lower()
if ext == ".parquet":
df = pd.read_parquet(df)
elif ext == ".pickle":
df = pd.read_pickle(df)
elif ext == ".csv":
df = pd.read_csv(df)
else:
raise ValueError("Only Parquet, Pickle and CSV files are support!")
logger.info("Shape of the DataFrame: {}", df.shape)
logger.info("Profiling the DataFrame...")
report = ProfileReport(df, title=title, minimal=True, explorative=True)
if isinstance(output_dir, str):
output_dir = Path(output_dir)
output_dir.mkdir(exist_ok=True)
# dump report
logger.info("Dumping the report to HTML...")
report.to_file(output_dir / "report.html")
logger.info("Dumping the report to JSON...")
report.to_file(output_dir / "report.json")
logger.info("Dumping the report to Pickle...")
report.dump(output_dir / "report.pickle")
Loading

0 comments on commit a488d7a

Please sign in to comment.