## Evaluating Data Quality Issues in the Data Provided

In [1]:
# These are the required imports.

import pandas as pd
import numpy as np
from datetime import datetime
from ast import literal_eval

### Summary of issues with users.json
1. 495 user records contain significant duplicates, reducing unique data to 212 records after deduplication
2. 48 records have incorrect "nan" value for signUpSource, dropping to 5 post-deduplication
3. 56 records have incorrect "nan" value for state, reducing to 6 unique records with this issue
4. Incorrect "nan" values in signUpSource and state fields undermine data quality and analytics

Recommend deduplicating then fixing remaining nan values to improve master user data integrity for reporting.

In [2]:
df_users = pd.read_json('data/users.json')

In [3]:
# Viewing the columns in users df.

df_users.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 495 entries, 0 to 494
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   _id           495 non-null    object
 1   active        495 non-null    bool  
 2   createdDate   495 non-null    object
 3   lastLogin     433 non-null    object
 4   role          495 non-null    object
 5   signUpSource  447 non-null    object
 6   state         439 non-null    object
dtypes: bool(1), object(6)
memory usage: 23.8+ KB


In [4]:
# Looking at if column values are Nan

df_users.isnull().any()

_id             False
active          False
createdDate     False
lastLogin        True
role            False
signUpSource     True
state            True
dtype: bool

In [5]:
# Let's unfold the _id map. 

df_users['_id'] = df_users['_id'].apply(
            lambda x: x['$oid'])

In [6]:
# Let's unfold and convert createdDate and lastLogin timestamp to datetime.

df_users['createdDate'] = df_users['createdDate'].apply(
            lambda x: datetime.fromtimestamp(int(x["$date"]/1000)) if not pd.isna(x) else np.nan)

df_users['lastLogin'] = df_users['lastLogin'].apply(
            lambda x: datetime.fromtimestamp(int(x["$date"]/1000)) if not pd.isna(x) else np.nan)

In [7]:
# Checking for duplicates, first create a deep copy of df_users.

df_temp = df_users.copy()

df_temp = df_temp.drop_duplicates()

In [8]:
print(f"Original data length: {len(df_users)}")
print(f"Length after removing duplicates: {len(df_temp)}")

Original data length: 495
Length after removing duplicates: 212


In [9]:
# Checking for Nan values in signUpSource and state.

df_users.isna().sum()

_id              0
active           0
createdDate      0
lastLogin       62
role             0
signUpSource    48
state           56
dtype: int64

In [10]:
print(f'For signUpSource:')
print(f'Nan count in original dataframe: {df_users.signUpSource.isna().sum()}/{len(df_users)}')
print(f'Nan count after dropping duplicates: {df_temp.signUpSource.isna().sum()}/{len(df_temp)}')

print(f'For state:')
print(f'Nan count in original dataframe: {df_users.state.isna().sum()}/{len(df_users)}')
print(f'Nan count after dropping duplicates: {df_temp.state.isna().sum()}/{len(df_temp)}')

For signUpSource:
Nan count in original dataframe: 48/495
Nan count after dropping duplicates: 5/212
For state:
Nan count in original dataframe: 56/495
Nan count after dropping duplicates: 6/212


In [11]:
print(f"Unique `signUpSource` values in users.json: {df_users['signUpSource'].unique()}")

Unique `signUpSource` values in users.json: ['Email' 'Google' nan]


In [12]:
print(f"Unique `state` values in users.json: {df_users['state'].unique()}")

Unique `state` values in users.json: ['WI' 'KY' 'AL' 'CO' 'IL' nan 'OH' 'SC' 'NH']


---

### Summary of issues with brands.json
1. Critical attributes like category, categoryCode, brandCode missing for several brands 
2. 155/1167 brands missing category, 650/1167 missing categoryCode, 234/1167 missing brandCode
3. Missing brand codes in brands.json cripples receipt item to brand affiliation 
4. Populating missing category, categoryCode, brandCode attributes crucial for accurate brand analytics
5. Current brand data gaps impact receipt parsing and item-to-brand mapping

In [13]:
df_brands = pd.read_json('data/brands.json')

In [14]:
# Viewing the columns in users df.

df_brands.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1167 entries, 0 to 1166
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   _id           1167 non-null   object 
 1   barcode       1167 non-null   int64  
 2   category      1012 non-null   object 
 3   categoryCode  517 non-null    object 
 4   cpg           1167 non-null   object 
 5   name          1167 non-null   object 
 6   topBrand      555 non-null    float64
 7   brandCode     933 non-null    object 
dtypes: float64(1), int64(1), object(6)
memory usage: 73.1+ KB


In [15]:
# Looking at if column values are Nan

df_brands.isnull().any()

_id             False
barcode         False
category         True
categoryCode     True
cpg             False
name            False
topBrand         True
brandCode        True
dtype: bool

In [16]:
df_brands.head(10)

Unnamed: 0,_id,barcode,category,categoryCode,cpg,name,topBrand,brandCode
0,{'$oid': '601ac115be37ce2ead437551'},511111019862,Baking,BAKING,"{'$id': {'$oid': '601ac114be37ce2ead437550'}, ...",test brand @1612366101024,0.0,
1,{'$oid': '601c5460be37ce2ead43755f'},511111519928,Beverages,BEVERAGES,"{'$id': {'$oid': '5332f5fbe4b03c9a25efd0ba'}, ...",Starbucks,0.0,STARBUCKS
2,{'$oid': '601ac142be37ce2ead43755d'},511111819905,Baking,BAKING,"{'$id': {'$oid': '601ac142be37ce2ead437559'}, ...",test brand @1612366146176,0.0,TEST BRANDCODE @1612366146176
3,{'$oid': '601ac142be37ce2ead43755a'},511111519874,Baking,BAKING,"{'$id': {'$oid': '601ac142be37ce2ead437559'}, ...",test brand @1612366146051,0.0,TEST BRANDCODE @1612366146051
4,{'$oid': '601ac142be37ce2ead43755e'},511111319917,Candy & Sweets,CANDY_AND_SWEETS,"{'$id': {'$oid': '5332fa12e4b03c9a25efd1e7'}, ...",test brand @1612366146827,0.0,TEST BRANDCODE @1612366146827
5,{'$oid': '601ac142be37ce2ead43755b'},511111719885,Baking,BAKING,"{'$id': {'$oid': '601ac142be37ce2ead437559'}, ...",test brand @1612366146091,0.0,TEST BRANDCODE @1612366146091
6,{'$oid': '601ac142be37ce2ead43755c'},511111219897,Baking,BAKING,"{'$id': {'$oid': '601ac142be37ce2ead437559'}, ...",test brand @1612366146133,0.0,TEST BRANDCODE @1612366146133
7,{'$oid': '5cdad0f5166eb33eb7ce0faa'},511111104810,Condiments & Sauces,,"{'$ref': 'Cogs', '$id': {'$oid': '559c2234e4b0...",J.L. Kraft,,J.L. KRAFT
8,{'$oid': '5ab15636e4b0be0a89bb0b07'},511111504412,Canned Goods & Soups,,"{'$ref': 'Cogs', '$id': {'$oid': '5a734034e4b0...",Campbell's Home Style,0.0,CAMPBELLS HOME STYLE
9,{'$oid': '5c408e8bcd244a1fdb47aee7'},511111504788,Baking,,"{'$ref': 'Cogs', '$id': {'$oid': '59ba6f1ce4b0...",test,,TEST


In [17]:
# Let's unfold the _id map. 

df_brands['_id'] = df_brands['_id'].apply(
            lambda x: x['$oid'])

In [18]:
# Let's unfold cpg.

df_brands['cpg'] = df_brands['cpg'].apply(
            lambda x: x['$ref'] + "<###>" + x['$id']['$oid'] if x.get('$ref') else x['$id']['$oid'])

In [19]:
# Checking for duplicates, first create a deep copy of df_brands.

df_brands_cp = df_brands.copy()

df_brands_cp = df_brands_cp.drop_duplicates()

In [20]:
# The length before and after removing duplicates is the same.

print(f"Brands:")
print(f"Original data length: {len(df_brands)}")
print(f"Length after removing duplicates: {len(df_brands_cp)}")

Brands:
Original data length: 1167
Length after removing duplicates: 1167


In [21]:
print(f'For category:')
print(f'Nan count in original dataframe: {df_brands.category.isna().sum()}/{len(df_brands)}')

print(f'For categoryCode:')
print(f'Nan count in original dataframe: {df_brands.categoryCode.isna().sum()}/{len(df_brands)}')

print(f'For brandCode:')
print(f'Nan count in original dataframe: {df_brands.brandCode.isna().sum()}/{len(df_brands)}')

For category:
Nan count in original dataframe: 155/1167
For categoryCode:
Nan count in original dataframe: 650/1167
For brandCode:
Nan count in original dataframe: 234/1167


---

### Summary of issues with receipts.json
1. Inconsistency between Receipts and Brands - brandCodes in Receipts missing from Brands
2. Analysis shows brandCodes in Receipts not defined in Brands master list
3. Receipts reference orphan brandCodes not present in Brands

Enriching Brands to include all brandCodes in Receipts needed for accurate analytics

In [22]:
# Reading Receipts.json

df_receipts = pd.read_json('data/receipts.json')

In [23]:
# Checking column names and types.

df_receipts.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1119 entries, 0 to 1118
Data columns (total 15 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   _id                      1119 non-null   object 
 1   bonusPointsEarned        544 non-null    float64
 2   bonusPointsEarnedReason  544 non-null    object 
 3   createDate               1119 non-null   object 
 4   dateScanned              1119 non-null   object 
 5   finishedDate             568 non-null    object 
 6   modifyDate               1119 non-null   object 
 7   pointsAwardedDate        537 non-null    object 
 8   pointsEarned             609 non-null    float64
 9   purchaseDate             671 non-null    object 
 10  purchasedItemCount       635 non-null    float64
 11  rewardsReceiptItemList   679 non-null    object 
 12  rewardsReceiptStatus     1119 non-null   object 
 13  totalSpent               684 non-null    float64
 14  userId                  

In [24]:
# Looking at if column values are Nan

df_receipts.isnull().any()

_id                        False
bonusPointsEarned           True
bonusPointsEarnedReason     True
createDate                 False
dateScanned                False
finishedDate                True
modifyDate                 False
pointsAwardedDate           True
pointsEarned                True
purchaseDate                True
purchasedItemCount          True
rewardsReceiptItemList      True
rewardsReceiptStatus       False
totalSpent                  True
userId                     False
dtype: bool

In [25]:
# Let's unfold the _id map. 

df_receipts['_id'] = df_receipts['_id'].apply(
            lambda x: x['$oid'])

In [26]:
# Let's unfold and convert createDate, dateScanned, finishedDate, modifyDate, pointsAwardedDate and purchaseDate timestamp to datetime.
# Here, we need to handle NaN for finishedDate, pointsAwardedDate and purchaseDate

df_receipts['createDate'] = df_receipts['createDate'].apply(
            lambda x: datetime.fromtimestamp(int(x["$date"]/1000)))

df_receipts['dateScanned'] = df_receipts['dateScanned'].apply(
            lambda x: datetime.fromtimestamp(int(x["$date"]/1000)))

df_receipts['finishedDate'] = df_receipts['finishedDate'].apply(
            lambda x: datetime.fromtimestamp(int(x["$date"]/1000)) if not pd.isna(x) else np.nan)

df_receipts['modifyDate'] = df_receipts['modifyDate'].apply(
            lambda x: datetime.fromtimestamp(int(x["$date"]/1000)))

df_receipts['pointsAwardedDate'] = df_receipts['pointsAwardedDate'].apply(
            lambda x: datetime.fromtimestamp(int(x["$date"]/1000)) if not pd.isna(x) else np.nan)

df_receipts['purchaseDate'] = df_receipts['purchaseDate'].apply(
            lambda x: datetime.fromtimestamp(int(x["$date"]/1000)) if not pd.isna(x) else np.nan)

In [27]:
# Unnesting the rewardsReceiptItemList column

df_receipts = df_receipts.explode('rewardsReceiptItemList')
df_receipts.reset_index(inplace=True)

In [28]:
# Some rewardsReceiptItemList are Nan so handle that case

df_receipts = df_receipts.fillna({'rewardsReceiptItemList':'{}'})

df_receipts['rewardsReceiptItemList'] = df_receipts['rewardsReceiptItemList'].apply(lambda x:str(x))
df_receipts['rewardsReceiptItemList'] = df_receipts['rewardsReceiptItemList'].apply(literal_eval)

In [29]:
df_receipts_normalized = pd.json_normalize(df_receipts['rewardsReceiptItemList'],errors='ignore',record_prefix='rewardsReceiptItemList')\
.add_prefix('rewardsReceiptItemList_')

In [30]:
df_receipts_final = pd.merge(df_receipts, df_receipts_normalized, left_index=True, right_index=True, how='outer')

In [31]:
df_receipts_final.head(10)

Unnamed: 0,index,_id,bonusPointsEarned,bonusPointsEarnedReason,createDate,dateScanned,finishedDate,modifyDate,pointsAwardedDate,pointsEarned,...,rewardsReceiptItemList_itemNumber,rewardsReceiptItemList_originalMetaBriteQuantityPurchased,rewardsReceiptItemList_pointsEarned,rewardsReceiptItemList_targetPrice,rewardsReceiptItemList_competitiveProduct,rewardsReceiptItemList_originalFinalPrice,rewardsReceiptItemList_originalMetaBriteItemPrice,rewardsReceiptItemList_deleted,rewardsReceiptItemList_priceAfterCoupon,rewardsReceiptItemList_metabriteCampaignId
0,0,5ff1e1eb0a720f0523000575,500.0,"Receipt number 2 completed, bonus point schedu...",2021-01-03 09:25:31,2021-01-03 09:25:31,2021-01-03 09:25:31,2021-01-03 09:25:36,2021-01-03 09:25:31,500.0,...,,,,,,,,,,
1,1,5ff1e1bb0a720f052300056b,150.0,"Receipt number 5 completed, bonus point schedu...",2021-01-03 09:24:43,2021-01-03 09:24:43,2021-01-03 09:24:43,2021-01-03 09:24:48,2021-01-03 09:24:43,150.0,...,,,,,,,,,,
2,1,5ff1e1bb0a720f052300056b,150.0,"Receipt number 5 completed, bonus point schedu...",2021-01-03 09:24:43,2021-01-03 09:24:43,2021-01-03 09:24:43,2021-01-03 09:24:48,2021-01-03 09:24:43,150.0,...,,,,,,,,,,
3,2,5ff1e1f10a720f052300057a,5.0,All-receipts receipt bonus,2021-01-03 09:25:37,2021-01-03 09:25:37,NaT,2021-01-03 09:25:42,NaT,5.0,...,,,,,,,,,,
4,3,5ff1e1ee0a7214ada100056f,5.0,All-receipts receipt bonus,2021-01-03 09:25:34,2021-01-03 09:25:34,2021-01-03 09:25:34,2021-01-03 09:25:39,2021-01-03 09:25:34,5.0,...,,,,,,,,,,
5,4,5ff1e1d20a7214ada1000561,5.0,All-receipts receipt bonus,2021-01-03 09:25:06,2021-01-03 09:25:06,2021-01-03 09:25:11,2021-01-03 09:25:11,2021-01-03 09:25:06,5.0,...,,,,,,,,,,
6,4,5ff1e1d20a7214ada1000561,5.0,All-receipts receipt bonus,2021-01-03 09:25:06,2021-01-03 09:25:06,2021-01-03 09:25:11,2021-01-03 09:25:11,2021-01-03 09:25:06,5.0,...,,,,,,,,,,
7,5,5ff1e1e40a7214ada1000566,750.0,"Receipt number 1 completed, bonus point schedu...",2021-01-03 09:25:24,2021-01-03 09:25:24,2021-01-03 09:25:25,2021-01-03 09:25:30,2021-01-03 09:25:25,750.0,...,,,,,,,,,,
8,6,5ff1e1cd0a720f052300056f,5.0,All-receipts receipt bonus,2021-01-03 09:25:01,2021-01-03 09:25:01,2021-01-03 09:25:02,2021-01-03 09:25:02,2021-01-03 09:25:02,5.0,...,,,,,,,,,,
9,7,5ff1e1a40a720f0523000569,500.0,"Receipt number 2 completed, bonus point schedu...",2021-01-03 09:24:20,2021-01-03 09:24:20,2021-01-03 09:24:21,2021-01-03 09:24:21,2021-01-03 09:24:21,500.0,...,,,,,,,,,,


In [32]:
df_receipts_final.columns.tolist()

['index',
 '_id',
 'bonusPointsEarned',
 'bonusPointsEarnedReason',
 'createDate',
 'dateScanned',
 'finishedDate',
 'modifyDate',
 'pointsAwardedDate',
 'pointsEarned',
 'purchaseDate',
 'purchasedItemCount',
 'rewardsReceiptItemList',
 'rewardsReceiptStatus',
 'totalSpent',
 'userId',
 'rewardsReceiptItemList_barcode',
 'rewardsReceiptItemList_description',
 'rewardsReceiptItemList_finalPrice',
 'rewardsReceiptItemList_itemPrice',
 'rewardsReceiptItemList_needsFetchReview',
 'rewardsReceiptItemList_partnerItemId',
 'rewardsReceiptItemList_preventTargetGapPoints',
 'rewardsReceiptItemList_quantityPurchased',
 'rewardsReceiptItemList_userFlaggedBarcode',
 'rewardsReceiptItemList_userFlaggedNewItem',
 'rewardsReceiptItemList_userFlaggedPrice',
 'rewardsReceiptItemList_userFlaggedQuantity',
 'rewardsReceiptItemList_needsFetchReviewReason',
 'rewardsReceiptItemList_pointsNotAwardedReason',
 'rewardsReceiptItemList_pointsPayerId',
 'rewardsReceiptItemList_rewardsGroup',
 'rewardsReceiptIte

In [33]:
# df_receipts_final.rewardsReceiptItemList_brandCode = df_receipts_final.rewardsReceiptItemList_brandCode.fillna('')

In [34]:
df_receipts_final.rewardsReceiptItemList_brandCode.unique()

array([nan, 'MISSION', 'BRAND', 'KRAFT EASY CHEESE', 'PEPSI', 'DORITOS',
       'KLEENEX', 'WINGSTOP', 'GERM-X', 'BEN AND JERRYS', 'BORDEN',
       'KNORR', 'KLARBRUNN', 'HY-VEE', 'LIGHT & FIT GREEK',
       "CONNIE'S PIZZA", "VAN DE KAMP'S", 'HATCH FARMS', "KELLOGG'S",
       'TEMPTATIONS', "NATURE'S PATH ORGANIC", 'DOLE', 'EL MONTEREY',
       'BIGELOW', 'HY-VEE SELECT', 'KIKKOMAN', 'SPECIAL K', 'SWANSON',
       'YUBAN', 'HILLSHIRE FARM', 'JUST BARE', "LAURA'S LEAN BEEF",
       'CAL-ORGANIC FARMS', 'DOLE CHILLED FRUIT JUICES', "BUSH'S BEST",
       'FOLGERS', 'KASHI', 'LIPTON', 'KRAFT', 'GREEN GIANT',
       'HARVEST SNAPS', "THAT'S SMART!", 'TOSTITOS', 'ADVIL',
       'CHICKEN OF THE SEA', 'RICE-A-RONI', 'STARKIST', 'TIC TAC',
       'SO DELICIOUS', 'WONDERFUL', 'LIGHT & FIT', 'HANOVER',
       'HIDDEN VALLEY', 'DANNON', 'KETTLE BRAND', 'FAGE', 'ORAL-B GLIDE',
       "CAMPBELL'S", "FRENCH'S", 'CRISPIX', 'KING ARTHUR FLOUR',
       'KITCHEN BASICS', 'MCCORMICK', 'OLD EL PASO', 'PEP

In [35]:
# df_brands.brandCode = df_brands.brandCode.fillna('')

In [36]:
df_brands.brandCode.unique()

array([nan, 'STARBUCKS', 'TEST BRANDCODE @1612366146176',
       'TEST BRANDCODE @1612366146051', 'TEST BRANDCODE @1612366146827',
       'TEST BRANDCODE @1612366146091', 'TEST BRANDCODE @1612366146133',
       'J.L. KRAFT', 'CAMPBELLS HOME STYLE', 'TEST',
       'TEST BRANDCODE @1598813526777', 'CALUMET', '511111205012',
       'AUNT JEMIMA SYRUP', 'MOLSON', 'LOTRIMIN',
       'TEST BRANDCODE @1597342520277', 'ST IVES', 'CHRISIMAGE',
       'ALKA SELTZER', "JACK DANIEL'S BARBECUE", 'MAGNUM Ice Cream',
       '511111105329', 'TEST BRANDCODE @1598635634882', 'TACO BELL',
       'FROSTED CHEERIOS', 'TEST BRANDCODE @1598639199674',
       'GODIVA DRY PACKAGED DESSERTS', 'LARABAR',
       'TEST BRANDCODE @1597350074333', 'TEST BRANDCODE @1607636368717',
       'TEST BRANDCODE @1607707830095', 'COTTONELLE', 'IZZE', 'MIO',
       '511111505365', 'QUILTING SPECIAL EDITION',
       'TEST BRANDCODE @1604437351617', 'HERMAN', 'KEVITA', 'DELIMEX',
       'THE RIGHT TO SHOWER', 'CARESS', 'TEST BRA

In [74]:
# Lookup brandCode in brands and receipts

# Remove all the Nan values in Receipts' brandCode and convert all brandCodes to string from object
df_rec_2 = df_receipts_final.dropna(subset=['rewardsReceiptItemList_brandCode'])
df_rec_2 = df_rec_2['rewardsReceiptItemList_brandCode'].apply(lambda x: str(x))

# Finding unique brandCodes from df_brands
unique_brands = set(df_brands.dropna(subset=['brandCode'])['brandCode'])

In [80]:
# Take left join between Receipts and Brands on brandCode

df_rec_brands = pd.merge(df_receipts_final, df_brands_2, left_on='rewardsReceiptItemList_brandCode', right_on='brandCode', how='left')

In [81]:
# Get unique brandCodes from the joined results: df_rec_brands

brand_code_join = set(df_rec_brands['rewardsReceiptItemList_brandCode'].unique())

In [82]:
# Basic set difference can help us find the which brands are present in Receipts but not in Brands json.

brand_code_join - unique_brands

{'7UP',
 'ADVIL',
 'AMERICAN BEAUTY',
 'ARROWHEAD',
 'AZTECA',
 'BANZA',
 'BEAR CREEK COUNTRY KITCHENS',
 'BEN AND JERRYS',
 'BETTY CROCKER',
 'BIC',
 'BIGELOW',
 'BLUE DIAMOND',
 "BOAR'S HEAD",
 'BORDEN',
 'BOTA BOX',
 'BRAND',
 "BRASWELL'S",
 'BUNNY',
 "BUSH'S BEST",
 'C&H',
 'CADBURY',
 'CAL-ORGANIC FARMS',
 'CALIFIA FARMS',
 "CAMPBELL'S",
 'CARAMELLO',
 'CHEERIOS',
 'CHEESE',
 'CHEEZ-IT',
 'CHEX',
 'CHICKEN OF THE SEA',
 'CHIQUITA',
 'CINNAMON TOAST CRUNCH',
 'COKE',
 'COLEMAN NATURAL',
 "CONNIE'S PIZZA",
 'CREST 3D WHITE',
 'CRISPIX',
 'DANNON',
 'DARE',
 'DELI',
 'DIET COKE',
 'DIGIORNO',
 'DOLE',
 'DR PEPPER',
 'EDWARDS',
 "EGGLAND'S BEST",
 'EGGO',
 'EL MONTEREY',
 'ENERGIZER MAX',
 'ESSENTIAL EVERYDAY',
 'FAGE',
 "FAMOUS DAVE'S",
 "FLORIDA'S NATURAL",
 'FOLGERS',
 'FORTUNE YAKISOBA',
 'FRANZ',
 "FRENCH'S",
 'FRESH EXPRESS',
 'FRESH STEP',
 'FRONTERA',
 'GALLO FAMILY VINEYARDS',
 'GENERAL MILLS',
 'GERBER',
 'GERM-X',
 'GREEN GIANT',
 'GRIMMWAY FARMS',
 'HANOVER',
 'HARVEST SNA

This difference makes it clear that there are entries in Receipts with brandCode's not present in Brands table. So, the Brands table does not have all the information pertaining to the Brands. This creates an inconsistency in the data which needs to be handled.

---