In [6]:
import great_expectations as ge
import statsmodels.api as sm
import pandas as pd

In [32]:
df = sm.datasets.get_rdataset('iris').data
df

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [18]:
# convert pd.DataFrame to ge.PandasDataset. A PandasDataset is absolutely
# identical to a pd.DataFrame, except that a PandasDataset has access to Great
# Expectations' methods.
df_ge = ge.from_pandas(df)

## Tests

### Expect column minimum to be between ...

In [22]:
df_ge.expect_column_min_to_be_between('Sepal.Length', 0, 5)

{
  "success": true,
  "result": {
    "observed_value": 4.3,
    "element_count": 150,
    "missing_count": null,
    "missing_percent": null
  },
  "meta": {},
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [31]:
df_ge.expect_column_min_to_be_between('Sepal.Length', 0, 5).success

True

In [30]:
df_ge.expect_column_min_to_be_between('Sepal.Length', 0, 5).result

{'observed_value': 4.3,
 'element_count': 150,
 'missing_count': None,
 'missing_percent': None}

### Expect column values to be in set

In [46]:
list(df['Species'].unique())

['setosa', 'versicolor', 'virginica']

In [47]:
df_ge.expect_column_values_to_be_in_set('Species', ['setosa', 'versicolor', 'virginica']).success

True

In [48]:
df_ge.expect_column_values_to_be_in_set('Species', ['setosa', 'versicolor']).success

False