# Testing fnGetFields
- Description: Based on pre-defined list schemas (MainListTest and LookupListTest), test the output of fnGetFields
- Version: v1
- Author: John Kerski

In [24]:
from pyspark.sql.types import *
from pyspark.sql.functions import *
from great_expectations.dataset import SparkDFDataset
import pandas as pd
# using time module
import time
 
# ts stores the time in seconds
ts = time.time()

#Load Data
delta_df = (spark.read.format("delta").load('Tables/Fields_To_Test_2'))
raw_test_df = SparkDFDataset(delta_df)
raw_test_pd_df = delta_df.toPandas()

# Define Tests
class test_result:
    def __init__(self, test_name, expected_value, actual_value, timestamp):
        self.test_name = test_name
        self.expected_value = expected_value
        self.actual_value = actual_value
        self.timestamp = timestamp

# Define Test Schema
columns = ["actual_value","expected_value","test_name","timestamp"]

# Define test results
test_results = []

StatementMeta(, f947ecf1-dc1a-48a9-bc4c-78e2a057f87d, 28, Finished, Available)





## Test the Schema of the Output of fnGetFields

In [25]:
MANDATORY_COLUMNS = [
    "Title",
    "Description",
    "Type_As_String",
    "Internal_Name",
    "Select_Parameter",
    "Expand_Parameter",
    "Table_Expand_Argument_-_Display_Name",
    "Table_Expand_Argument_-_Internal_Name",
    "List_Name"
]

for column in MANDATORY_COLUMNS:
    try:
        # Add Test Results
        actual_value = raw_test_df.expect_column_to_exist(column).success
        expected_value = True
        test_name = "Mandatory column " + column + " should exist"
        test_results.append(test_result(test_name, str(expected_value),str(actual_value),ts))  

        # Run Assertion
        assert actual_value == expected_value, f"FAILED: Mandatory column {column} does not exist."
        print(f"PASSED: Column {column} exists")
    except AssertionError as e:
        print(e)

StatementMeta(, f947ecf1-dc1a-48a9-bc4c-78e2a057f87d, 29, Finished, Available)

PASSED: Column Title exists
PASSED: Column Description exists
PASSED: Column Type_As_String exists
PASSED: Column Internal_Name exists
PASSED: Column Select_Parameter exists
PASSED: Column Expand_Parameter exists
PASSED: Column Table_Expand_Argument_-_Display_Name exists
PASSED: Column Table_Expand_Argument_-_Internal_Name exists
PASSED: Column List_Name exists


## List_Name should be unique

In [26]:
list_name_count = len(pd.unique(raw_test_pd_df['List_Name']))
type(list_name_count)
try:

    # Add Test Results
    actual_value = list_name_count
    expected_value = 1
    test_results.append(test_result("List Name should have only 1 value", str(expected_value), str(actual_value),ts))

    # Run Asserts    
    assert actual_value==expected_value, "FAILED: List_Name should have only 1 value."
    print(f"PASSED: List_Name contained only 1 value.")
except AssertionError as e:
    print(e)

StatementMeta(, f947ecf1-dc1a-48a9-bc4c-78e2a057f87d, 30, Finished, Available)

PASSED: List_Name contained only 1 value.


## Based on upstream list, Type_As_String should have all the expected field types

In [27]:
MANDATORY_COLUMN_VALUES = [
"Boolean",
"Choice",
"Counter",
"Currency",
"DateTime",
"Location",
"Lookup",
"LookupMulti",
"MultiChoice",
"Note",
"Number",
"Text",
"Thumbnail",
"URL",
"User",
"UserMulti",
"Attachments"
]

for column_value in MANDATORY_COLUMN_VALUES:
    try:
        # Add Test Results
        actual_value = True
        expected_value = (column_value in raw_test_pd_df['Type_As_String'].unique())
        test_name = "Type_As_String has all the expected values: " + column_value
        test_results.append(test_result(test_name, str(expected_value),str(actual_value),ts))  

        # Run Assertion
        assert actual_value == expected_value, "FAILED: Type_As_String has all the expected values: {column_value}"
        print(f"PASSED: Type_As_String has all the expected values: {column_value}")
    except AssertionError as e:
        print(e)



StatementMeta(, f947ecf1-dc1a-48a9-bc4c-78e2a057f87d, 31, Finished, Available)

PASSED: Type_As_String has all the expected values: Boolean
PASSED: Type_As_String has all the expected values: Choice
PASSED: Type_As_String has all the expected values: Counter
PASSED: Type_As_String has all the expected values: Currency
PASSED: Type_As_String has all the expected values: DateTime
PASSED: Type_As_String has all the expected values: Location
PASSED: Type_As_String has all the expected values: Lookup
PASSED: Type_As_String has all the expected values: LookupMulti
PASSED: Type_As_String has all the expected values: MultiChoice
PASSED: Type_As_String has all the expected values: Note
PASSED: Type_As_String has all the expected values: Number
PASSED: Type_As_String has all the expected values: Text
PASSED: Type_As_String has all the expected values: Thumbnail
PASSED: Type_As_String has all the expected values: URL
PASSED: Type_As_String has all the expected values: User
PASSED: Type_As_String has all the expected values: UserMulti
PASSED: Type_As_String has all the expect

## UserMulti has '/' in Select_Parameter and Expand_Parameter is not null

In [28]:
#Using variable
column_check='UserMulti'
user_multi_df=raw_test_pd_df[raw_test_pd_df["Type_As_String"] == column_check]

try:

    # Add Test Results
    actual_value = user_multi_df["Select_Parameter"].str.contains("/").bool()
    expected_value = True
    test_name = "Select_Parameter has '/' to indicated correct odata projection."
    test_results.append(test_result(test_name, str(expected_value),str(actual_value),ts))  

    # Run Assertion
    assert actual_value == expected_value, "FAILED: Select_Parameter has '/' to indicated correct odata projection."
    print(f"PASSED: Select_Parameter has '/' to indicated correct odata projection.")
except AssertionError as e:
    print(e)

try:
    # Add Test Results
    actual_value = user_multi_df["Expand_Parameter"].isnull().values.any()
    expected_value = False
    test_name = "Expand Parameter should not be null"
    test_results.append(test_result(test_name, str(expected_value),str(actual_value),ts))  

    # Run Assertion
    assert user_multi_df["Expand_Parameter"].isnull().values.any() == False, "FAILED: Expand Parameter should not be null"
    print(f"PASSED: Expand Parameter should not be null")
except AssertionError as e:
    print(e)


StatementMeta(, f947ecf1-dc1a-48a9-bc4c-78e2a057f87d, 32, Finished, Available)

PASSED: Select_Parameter has '/' to indicated correct odata projection.
PASSED: Expand Parameter should not be null


## Output Test Results

In [29]:
test_results_df = spark.createDataFrame(data=test_results,schema=columns)
display(test_results_df)


x = test_results_df.toPandas()
json = x.to_json(orient = 'records' )
mssparkutils.notebook.exit(json)

StatementMeta(, f947ecf1-dc1a-48a9-bc4c-78e2a057f87d, 33, Finished, Available)

SynapseWidget(Synapse.DataFrame, 68d06149-0cf2-41e6-9a63-09872a8785fa)



ExitValue: [{"actual_value":"True","expected_value":"True","test_name":"Mandatory column Title should exist","timestamp":1692965447.0993933678},{"actual_value":"True","expected_value":"True","test_name":"Mandatory column Description should exist","timestamp":1692965447.0993933678},{"actual_value":"True","expected_value":"True","test_name":"Mandatory column Type_As_String should exist","timestamp":1692965447.0993933678},{"actual_value":"True","expected_value":"True","test_name":"Mandatory column Internal_Name should exist","timestamp":1692965447.0993933678},{"actual_value":"True","expected_value":"True","test_name":"Mandatory column Select_Parameter should exist","timestamp":1692965447.0993933678},{"actual_value":"True","expected_value":"True","test_name":"Mandatory column Expand_Parameter should exist","timestamp":1692965447.0993933678},{"actual_value":"True","expected_value":"True","test_name":"Mandatory column Table_Expand_Argument_-_Display_Name should exist","timestamp":169296544