Skip to content

Commit

Permalink
Add serialization for DatasetInfo and round avg_words to int (#1294)
Browse files Browse the repository at this point in the history
  • Loading branch information
hungcs authored and ShreyaR committed Sep 17, 2021
1 parent 29cdc9b commit 3ac82ea
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 1 deletion.
2 changes: 2 additions & 0 deletions ludwig/automl/base_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import os
from dataclasses import dataclass
from dataclasses_json import LetterCase, dataclass_json
from typing import List, Union

import pandas as pd
Expand Down Expand Up @@ -45,6 +46,7 @@
}


@dataclass_json(letter_case=LetterCase.CAMEL)
@dataclass
class DatasetInfo:
fields: List[FieldInfo]
Expand Down
2 changes: 1 addition & 1 deletion ludwig/automl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def avg_num_tokens(field: Series) -> int:
if len(field) > 5000:
field = field.sample(n=5000, random_state=40)
unique_entries = field.unique()
avg_words = Series(unique_entries).str.split().str.len().mean()
avg_words = round(Series(unique_entries).str.split().str.len().mean())
return avg_words


Expand Down

0 comments on commit 3ac82ea

Please sign in to comment.