# Integrity Constraint Violations

In this report, the total number of integrity constraint violations is calculated for each data table. 

The overall number of integrity constraint violations across all data tables is calculated at the end.

In [76]:
import pandas as pd
from datetime import datetime

In [77]:
# Read in data sets
menu_df = pd.read_feather("../Cleaned Data Files/Menu-cleaned.feather")
menupage_df = pd.read_feather("../Cleaned Data Files/MenuPage-cleaned.feather")
menuitem_df = pd.read_feather("../Cleaned Data Files/MenuItem-cleaned.feather")
dish_df = pd.read_feather("../Cleaned Data Files/Dish-cleaned.feather")

# Reset overall violations
overall_violations = 0
menu_violations = 0
menupage_violations = 0
menuitem_violations = 0
dish_violations = 0

## 1. Menu

IC 1.1: No menus should have a null "id"

In [78]:
id_null = menu_df['id'].isnull()
ic_1_1_violations = menu_df[id_null]
overall_violations += len(ic_1_1_violations)
menu_violations += len(ic_1_1_violations)
print(f'Number of violations: {len(ic_1_1_violations)} violations')

Number of violations: 0 violations


IC 1.2: No menus should have the same "id"

In [79]:
id_duplicates = menu_df.duplicated('id', keep=False)
ic_1_2_violations = menu_df[id_duplicates]
overall_violations += len(ic_1_2_violations)
menu_violations += len(ic_1_2_violations)
print(f'Number of violations: {len(ic_1_2_violations)} violations')

Number of violations: 0 violations


IC 1.3: No menus should have a null "sponsor"

In [80]:
sponsor_null = menu_df['sponsor'].isnull()
ic_1_3_violations = menu_df[sponsor_null]
overall_violations += len(ic_1_3_violations)
menu_violations += len(ic_1_3_violations)
print(f'Number of violations: {len(ic_1_3_violations)} violations')

Number of violations: 0 violations


## 2. MenuPage Table

IC 2.1: No menus pages should have a null "id"

In [81]:
id_null = menupage_df['id'].isnull()
ic_2_1_violations = menupage_df[id_null]
overall_violations += len(ic_2_1_violations)
menupage_violations += len(ic_2_1_violations)
print(f'Number of violations: {len(ic_2_1_violations)} violations')

Number of violations: 0 violations


IC 2.2: No menu pages should have the same "id"

In [82]:
id_duplicates = menupage_df.duplicated('id', keep=False)
ic_2_2_violations = menupage_df[id_duplicates]
overall_violations += len(ic_2_2_violations)
menupage_violations += len(ic_2_2_violations)
print(f'Number of violations: {len(ic_2_2_violations)} violations')

Number of violations: 0 violations


IC 2.3: No menu pages should have a null "menu_id"

In [83]:
menuid_null = menupage_df['menu_id'].isnull()
ic_2_3_violations = menupage_df[menuid_null]
overall_violations += len(ic_2_3_violations)
menupage_violations += len(ic_2_3_violations)
print(f'Number of violations: {len(ic_2_3_violations)} violations')

Number of violations: 0 violations


IC 2.4: Each menu page should reference an existing "menu_id" -> Menu(id)

In [84]:
valid_menu_ids = set(menu_df['id'])
menupage_menuids = set(menupage_df['menu_id'])
ic_2_4_violations = menupage_menuids - valid_menu_ids

overall_violations += len(ic_2_4_violations)
menupage_violations += len(ic_2_4_violations)
print(f'Number of violations: {len(ic_2_4_violations)} violations')

Number of violations: 0 violations


## 3. MenuItem Table

IC 3.1: No menu items should have a null "id"

In [85]:
id_null = menuitem_df['id'].isnull()
ic_3_1_violations = menuitem_df[id_null]
overall_violations += len(ic_3_1_violations)
menuitem_violations += len(ic_3_1_violations)
print(f'Number of violations: {len(ic_3_1_violations)} violations')

Number of violations: 0 violations


IC 3.2: No menu items should have the same "id"

In [86]:
id_duplicates = menuitem_df.duplicated('id', keep=False)
ic_3_2_violations = menuitem_df[id_duplicates]
overall_violations += len(ic_3_2_violations)
menuitem_violations += len(ic_3_2_violations)
print(f'Number of violations: {len(ic_3_2_violations)} violations')

Number of violations: 0 violations


IC 3.3: No menu items should have a null "menu_page_id"

In [87]:
menupageid_null = menuitem_df['menu_page_id'].isnull()
ic_3_3_violations = menuitem_df[menupageid_null]
overall_violations += len(ic_3_3_violations)
menuitem_violations += len(ic_3_3_violations)
print(f'Number of violations: {len(ic_3_3_violations)} violations')

Number of violations: 0 violations


IC 3.4: Each menu item should reference an existing "menu_page_id" -> MenuPage(id)

In [88]:
valid_menupage_ids = set(menupage_df['id'])
menuitem_menupageids = set(menuitem_df['menu_page_id'])
ic_3_4_violations = menuitem_menupageids - valid_menupage_ids

overall_violations += len(ic_3_4_violations)
menuitem_violations += len(ic_3_4_violations)
print(f'Number of violations: {len(ic_3_4_violations)} violations')

Number of violations: 0 violations


IC 3.5: No menu items should have a null "dish_id"

In [89]:
menupageid_null = menuitem_df['dish_id'].isnull()
ic_3_5_violations = menuitem_df[menupageid_null]
overall_violations += len(ic_3_5_violations)
menuitem_violations += len(ic_3_5_violations)
print(f'Number of violations: {len(ic_3_5_violations)} violations')

Number of violations: 0 violations


IC 3.6: Each menu item should reference an existing "dish_id" -> Dish(id)

In [90]:
valid_dish_ids = set(dish_df['id'])
menuitem_dishids = set(menuitem_df['dish_id'])
ic_3_6_violations = menuitem_dishids - valid_dish_ids

overall_violations += len(ic_3_6_violations)
menuitem_violations += len(ic_3_6_violations)
print(f'Number of violations: {len(ic_3_6_violations)} violations')

Number of violations: 0 violations


## 4. Dish Table

IC 4.1: No menus should have a null "id"

In [91]:
id_null = dish_df['id'].isnull()
ic_4_1_violations = dish_df[id_null]
overall_violations += len(ic_4_1_violations)
dish_violations += len(ic_4_1_violations)
print(f'Number of violations: {len(ic_4_1_violations)} violations')

Number of violations: 0 violations


IC 4.2: No dishes should have the same "id"

In [92]:
id_duplicates = dish_df.duplicated('id', keep=False)
ic_4_2_violations = dish_df[id_duplicates]
overall_violations += len(ic_4_2_violations)
dish_violations += len(ic_4_2_violations)
print(f'Number of violations: {len(ic_4_2_violations)} violations')

Number of violations: 0 violations


IC 4.3: Dishes should have a valid value (non-negative) for "menus_appeared"

In [93]:
ic_4_3_violations = dish_df[dish_df['menus_appeared'] < 0]
overall_violations += len(ic_4_3_violations)
dish_violations += len(ic_4_3_violations)
print(f'Number of violations: {len(ic_4_3_violations)} violations')

Number of violations: 0 violations


IC 4.4: Dishes should have a valid value (non-negative) for "times_appeared"

In [94]:
ic_4_4_violations = dish_df[dish_df['times_appeared'] < 0]
overall_violations += len(ic_4_4_violations)
dish_violations += len(ic_4_4_violations)
print(f'Number of violations: {len(ic_4_4_violations)} violations')

Number of violations: 0 violations


IC 4.5: Dishes should have a valid year value for "first_appeared"

In [95]:
current_year = datetime.now().year
ic_4_5_violations = dish_df[(dish_df['first_appeared'] <= 0) | (dish_df['first_appeared'] > current_year)]
overall_violations += len(ic_4_5_violations)
dish_violations += len(ic_4_5_violations)
print(f'Number of violations: {len(ic_4_5_violations)} violations')

Number of violations: 0 violations


IC 4.6: Dishes should have a valid year value for "last_appeared"

In [96]:
current_year = datetime.now().year
ic_4_6_violations = dish_df[(dish_df['last_appeared'] <= 0) | (dish_df['last_appeared'] > current_year)]
overall_violations += len(ic_4_6_violations)
dish_violations += len(ic_4_6_violations)
print(f'Number of violations: {len(ic_4_6_violations)} violations')

Number of violations: 0 violations


IC 4.7: Dishes should not have a year value for last_appeared that occurs before first_appeared

In [97]:
ic_4_7_violations = dish_df[dish_df['last_appeared'] < dish_df['first_appeared']]
overall_violations += len(ic_4_7_violations)
dish_violations += len(ic_4_7_violations)
print(f'Number of violations: {len(ic_4_7_violations)} violations')

Number of violations: 0 violations


IC 4.8: Dishes should have a valid value (non-negative) for "lowest_price"

In [98]:
ic_4_8_violations = dish_df[dish_df['lowest_price'] < 0]
overall_violations += len(ic_4_8_violations)
dish_violations += len(ic_4_8_violations)
print(f'Number of violations: {len(ic_4_8_violations)} violations')

Number of violations: 0 violations


IC 4.9: Dishes should have a valid value (non-negative) for "highest_price"

In [99]:
ic_4_9_violations = dish_df[dish_df['highest_price'] < 0]
overall_violations += len(ic_4_9_violations)
dish_violations += len(ic_4_9_violations)
print(f'Number of violations: {len(ic_4_9_violations)} violations')

Number of violations: 0 violations


IC 4.10: Dishes should not have a "highest_price" less than the "lowest_price"

In [100]:
ic_4_10_violations = dish_df[dish_df['highest_price'] < dish_df['lowest_price']]
overall_violations += len(ic_4_10_violations)
dish_violations += len(ic_4_10_violations)
print(f'Number of violations: {len(ic_4_10_violations)} violations')

Number of violations: 0 violations


## Overall

In [101]:
print(f'Overall number of violations: {overall_violations} violations')

Overall number of violations: 0 violations


In [102]:
print(f'Total number of violations for Menu Table: {menu_violations} violations')

Total number of violations for Menu Table: 0 violations


In [103]:
print(f'Total number of violations for MenuPage Table: {menupage_violations} violations')

Total number of violations for MenuPage Table: 0 violations


In [104]:
print(f'Total number of violations for MenuItem Table: {menuitem_violations} violations')

Total number of violations for MenuItem Table: 0 violations


In [105]:
print(f'Total number of violations for Dish Table: {dish_violations} violations')

Total number of violations for Dish Table: 0 violations
