In [8]:
import polars as pl

In [9]:
def read_dataset(file_path: str) -> pl.DataFrame:
    if file_path.endswith('.csv'):
        data = pl.read_csv(file_path, infer_schema_length=10000)
    elif file_path.endswith('.xlsx'):
        data = pl.read_excel(file_path, infer_schema_length=10000)
    else:
        raise ValueError("Unsupported file type")
    
    return data

In [10]:
def generate_summary_statistics(data: pl.DataFrame) -> dict:
    if data is None or data.shape[0] == 0:
        raise ValueError("Data cannot be None or empty")

    summary = {
        "mean": data.mean().to_dict(),
        "median": data.median().to_dict(),
        "std_dev": data.std().to_dict()
    }
    
    return summary

In [11]:
try:
    data = read_dataset("winequality-red.csv")
    summary_statistics = generate_summary_statistics(data)
    print("Test passed")
    print(f"Summary Statistics: {summary_statistics}")
except Exception as e:
    print(f"Test failed: {e}")

Test passed
Summary Statistics: {'mean': {'fixed acidity': shape: (1,)
Series: 'fixed acidity' [f64]
[
	8.319637
], 'volatile acidity': shape: (1,)
Series: 'volatile acidity' [f64]
[
	0.527821
], 'citric acid': shape: (1,)
Series: 'citric acid' [f64]
[
	0.270976
], 'residual sugar': shape: (1,)
Series: 'residual sugar' [f64]
[
	2.538806
], 'chlorides': shape: (1,)
Series: 'chlorides' [f64]
[
	0.087467
], 'free sulfur dioxide': shape: (1,)
Series: 'free sulfur dioxide' [f64]
[
	15.874922
], 'total sulfur dioxide': shape: (1,)
Series: 'total sulfur dioxide' [f64]
[
	46.467792
], 'density': shape: (1,)
Series: 'density' [f64]
[
	0.996747
], 'pH': shape: (1,)
Series: 'pH' [f64]
[
	3.311113
], 'sulphates': shape: (1,)
Series: 'sulphates' [f64]
[
	0.658149
], 'alcohol': shape: (1,)
Series: 'alcohol' [f64]
[
	10.422983
], 'quality': shape: (1,)
Series: 'quality' [f64]
[
	5.636023
]}, 'median': {'fixed acidity': shape: (1,)
Series: 'fixed acidity' [f64]
[
	7.9
], 'volatile acidity': shape: (1,