In [None]:
# Question: Data Quality Automation Tools - Introduction to Great Expectations
# Description: Set up a simple Great Expectations check for missing values in a numeric column.



In [4]:
pip install great_expectations

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [6]:
def check_missing_numeric(df, column_name):
  """
  Checks for missing values (None or NaN) in a specified numeric column of a Pandas DataFrame.

  Args:
    df (pd.DataFrame): The input Pandas DataFrame.
    column_name (str): The name of the numeric column to check.

  Returns:
    bool: True if there are no missing values in the specified column, False otherwise.
  """
  if column_name not in df.columns:
    raise ValueError(f"Column '{column_name}' not found in the DataFrame.")
  if not pd.api.types.is_numeric_dtype(df[column_name]):
    raise ValueError(f"Column '{column_name}' is not numeric.")

  return not df[column_name].isnull().any()

import pandas as pd
import numpy as np

# Example Usage:

# Create a sample DataFrame with some missing values in a numeric column
data = {'numeric_column': [1.0, 2.5, np.nan, 4.0, None, 6.7],
        'another_column': ['a', 'b', 'c', 'd', 'e', 'f']}
df_with_missing = pd.DataFrame(data)

# Check for missing values
column_to_check = 'numeric_column'
no_missing = check_missing_numeric(df_with_missing.copy(), column_to_check)

if no_missing:
  print(f"The column '{column_to_check}' has no missing values.")
else:
  print(f"The column '{column_to_check}' contains missing values.")

# Create a sample DataFrame without missing values in the numeric column
data_no_missing = {'numeric_column': [10, 20, 30, 40, 50],
                    'another_column': ['p', 'q', 'r', 's', 't']}
df_without_missing = pd.DataFrame(data_no_missing)

# Check for missing values in the second DataFrame
no_missing_second = check_missing_numeric(df_without_missing.copy(), column_to_check)

if no_missing_second:
  print(f"The column '{column_to_check}' in the second DataFrame has no missing values.")
else:
  print(f"The column '{column_to_check}' in the second DataFrame contains missing values.")

The column 'numeric_column' contains missing values.
The column 'numeric_column' in the second DataFrame has no missing values.
