In [1]:
import json
import pandas as pd

In [2]:
# Load the JSON files
def load_json_lines(file_path):
    with open(file_path, 'r') as f:
        return [json.loads(line) for line in f]

brands_data = load_json_lines('../data/brands.json')
receipts_data = load_json_lines('../data/receipts.json')
users_data = load_json_lines('../data/users.json')

In [3]:
# Convert JSON data to DataFrames
brands_df = pd.DataFrame(brands_data)
receipts_df = pd.DataFrame(receipts_data)
users_df = pd.DataFrame(users_data)

In [4]:
brands_df.head()

Unnamed: 0,_id,barcode,category,categoryCode,cpg,name,topBrand,brandCode
0,{'$oid': '601ac115be37ce2ead437551'},511111019862,Baking,BAKING,"{'$id': {'$oid': '601ac114be37ce2ead437550'}, ...",test brand @1612366101024,False,
1,{'$oid': '601c5460be37ce2ead43755f'},511111519928,Beverages,BEVERAGES,"{'$id': {'$oid': '5332f5fbe4b03c9a25efd0ba'}, ...",Starbucks,False,STARBUCKS
2,{'$oid': '601ac142be37ce2ead43755d'},511111819905,Baking,BAKING,"{'$id': {'$oid': '601ac142be37ce2ead437559'}, ...",test brand @1612366146176,False,TEST BRANDCODE @1612366146176
3,{'$oid': '601ac142be37ce2ead43755a'},511111519874,Baking,BAKING,"{'$id': {'$oid': '601ac142be37ce2ead437559'}, ...",test brand @1612366146051,False,TEST BRANDCODE @1612366146051
4,{'$oid': '601ac142be37ce2ead43755e'},511111319917,Candy & Sweets,CANDY_AND_SWEETS,"{'$id': {'$oid': '5332fa12e4b03c9a25efd1e7'}, ...",test brand @1612366146827,False,TEST BRANDCODE @1612366146827


In [5]:
receipts_df.head()

Unnamed: 0,_id,bonusPointsEarned,bonusPointsEarnedReason,createDate,dateScanned,finishedDate,modifyDate,pointsAwardedDate,pointsEarned,purchaseDate,purchasedItemCount,rewardsReceiptItemList,rewardsReceiptStatus,totalSpent,userId
0,{'$oid': '5ff1e1eb0a720f0523000575'},500.0,"Receipt number 2 completed, bonus point schedu...",{'$date': 1609687531000},{'$date': 1609687531000},{'$date': 1609687531000},{'$date': 1609687536000},{'$date': 1609687531000},500.0,{'$date': 1609632000000},5.0,"[{'barcode': '4011', 'description': 'ITEM NOT ...",FINISHED,26.0,5ff1e1eacfcf6c399c274ae6
1,{'$oid': '5ff1e1bb0a720f052300056b'},150.0,"Receipt number 5 completed, bonus point schedu...",{'$date': 1609687483000},{'$date': 1609687483000},{'$date': 1609687483000},{'$date': 1609687488000},{'$date': 1609687483000},150.0,{'$date': 1609601083000},2.0,"[{'barcode': '4011', 'description': 'ITEM NOT ...",FINISHED,11.0,5ff1e194b6a9d73a3a9f1052
2,{'$oid': '5ff1e1f10a720f052300057a'},5.0,All-receipts receipt bonus,{'$date': 1609687537000},{'$date': 1609687537000},,{'$date': 1609687542000},,5.0,{'$date': 1609632000000},1.0,"[{'needsFetchReview': False, 'partnerItemId': ...",REJECTED,10.0,5ff1e1f1cfcf6c399c274b0b
3,{'$oid': '5ff1e1ee0a7214ada100056f'},5.0,All-receipts receipt bonus,{'$date': 1609687534000},{'$date': 1609687534000},{'$date': 1609687534000},{'$date': 1609687539000},{'$date': 1609687534000},5.0,{'$date': 1609632000000},4.0,"[{'barcode': '4011', 'description': 'ITEM NOT ...",FINISHED,28.0,5ff1e1eacfcf6c399c274ae6
4,{'$oid': '5ff1e1d20a7214ada1000561'},5.0,All-receipts receipt bonus,{'$date': 1609687506000},{'$date': 1609687506000},{'$date': 1609687511000},{'$date': 1609687511000},{'$date': 1609687506000},5.0,{'$date': 1609601106000},2.0,"[{'barcode': '4011', 'description': 'ITEM NOT ...",FINISHED,1.0,5ff1e194b6a9d73a3a9f1052


In [6]:
users_df.head()

Unnamed: 0,_id,active,createdDate,lastLogin,role,signUpSource,state
0,{'$oid': '5ff1e194b6a9d73a3a9f1052'},True,{'$date': 1609687444800},{'$date': 1609687537858},consumer,Email,WI
1,{'$oid': '5ff1e194b6a9d73a3a9f1052'},True,{'$date': 1609687444800},{'$date': 1609687537858},consumer,Email,WI
2,{'$oid': '5ff1e194b6a9d73a3a9f1052'},True,{'$date': 1609687444800},{'$date': 1609687537858},consumer,Email,WI
3,{'$oid': '5ff1e1eacfcf6c399c274ae6'},True,{'$date': 1609687530554},{'$date': 1609687530597},consumer,Email,WI
4,{'$oid': '5ff1e194b6a9d73a3a9f1052'},True,{'$date': 1609687444800},{'$date': 1609687537858},consumer,Email,WI


In [7]:
# Clean column headers removing underscores
brands_df.columns = brands_df.columns.str.replace('_', ' ')
receipts_df.columns = receipts_df.columns.str.replace('_', ' ')
users_df.columns = users_df.columns.str.replace('_', ' ')

# Flatten the dataframes to remove nested structures
brands_df = pd.json_normalize(brands_df.to_dict(orient='records'))
receipts_df = pd.json_normalize(receipts_df.to_dict(orient='records'))
users_df = pd.json_normalize(users_df.to_dict(orient='records'))

In [8]:
brands_df.head()

Unnamed: 0,barcode,category,categoryCode,name,topBrand,brandCode,id.$oid,cpg.$id.$oid,cpg.$ref
0,511111019862,Baking,BAKING,test brand @1612366101024,False,,601ac115be37ce2ead437551,601ac114be37ce2ead437550,Cogs
1,511111519928,Beverages,BEVERAGES,Starbucks,False,STARBUCKS,601c5460be37ce2ead43755f,5332f5fbe4b03c9a25efd0ba,Cogs
2,511111819905,Baking,BAKING,test brand @1612366146176,False,TEST BRANDCODE @1612366146176,601ac142be37ce2ead43755d,601ac142be37ce2ead437559,Cogs
3,511111519874,Baking,BAKING,test brand @1612366146051,False,TEST BRANDCODE @1612366146051,601ac142be37ce2ead43755a,601ac142be37ce2ead437559,Cogs
4,511111319917,Candy & Sweets,CANDY_AND_SWEETS,test brand @1612366146827,False,TEST BRANDCODE @1612366146827,601ac142be37ce2ead43755e,5332fa12e4b03c9a25efd1e7,Cogs


In [9]:
receipts_df.head()

Unnamed: 0,bonusPointsEarned,bonusPointsEarnedReason,pointsEarned,purchasedItemCount,rewardsReceiptItemList,rewardsReceiptStatus,totalSpent,userId,id.$oid,createDate.$date,dateScanned.$date,finishedDate.$date,modifyDate.$date,pointsAwardedDate.$date,purchaseDate.$date,finishedDate,pointsAwardedDate,purchaseDate
0,500.0,"Receipt number 2 completed, bonus point schedu...",500.0,5.0,"[{'barcode': '4011', 'description': 'ITEM NOT ...",FINISHED,26.0,5ff1e1eacfcf6c399c274ae6,5ff1e1eb0a720f0523000575,1609687531000,1609687531000,1609688000000.0,1609687536000,1609688000000.0,1609632000000.0,,,
1,150.0,"Receipt number 5 completed, bonus point schedu...",150.0,2.0,"[{'barcode': '4011', 'description': 'ITEM NOT ...",FINISHED,11.0,5ff1e194b6a9d73a3a9f1052,5ff1e1bb0a720f052300056b,1609687483000,1609687483000,1609687000000.0,1609687488000,1609687000000.0,1609601000000.0,,,
2,5.0,All-receipts receipt bonus,5.0,1.0,"[{'needsFetchReview': False, 'partnerItemId': ...",REJECTED,10.0,5ff1e1f1cfcf6c399c274b0b,5ff1e1f10a720f052300057a,1609687537000,1609687537000,,1609687542000,,1609632000000.0,,,
3,5.0,All-receipts receipt bonus,5.0,4.0,"[{'barcode': '4011', 'description': 'ITEM NOT ...",FINISHED,28.0,5ff1e1eacfcf6c399c274ae6,5ff1e1ee0a7214ada100056f,1609687534000,1609687534000,1609688000000.0,1609687539000,1609688000000.0,1609632000000.0,,,
4,5.0,All-receipts receipt bonus,5.0,2.0,"[{'barcode': '4011', 'description': 'ITEM NOT ...",FINISHED,1.0,5ff1e194b6a9d73a3a9f1052,5ff1e1d20a7214ada1000561,1609687506000,1609687506000,1609688000000.0,1609687511000,1609688000000.0,1609601000000.0,,,


In [10]:
users_df.head()

Unnamed: 0,active,role,signUpSource,state,id.$oid,createdDate.$date,lastLogin.$date,lastLogin
0,True,consumer,Email,WI,5ff1e194b6a9d73a3a9f1052,1609687444800,1609688000000.0,
1,True,consumer,Email,WI,5ff1e194b6a9d73a3a9f1052,1609687444800,1609688000000.0,
2,True,consumer,Email,WI,5ff1e194b6a9d73a3a9f1052,1609687444800,1609688000000.0,
3,True,consumer,Email,WI,5ff1e1eacfcf6c399c274ae6,1609687530554,1609688000000.0,
4,True,consumer,Email,WI,5ff1e194b6a9d73a3a9f1052,1609687444800,1609688000000.0,


In [11]:
# Strip leading/trailing spaces from column names
brands_df.columns = brands_df.columns.str.strip()
receipts_df.columns = receipts_df.columns.str.strip()
users_df.columns = users_df.columns.str.strip()

# Rename columns for clarity
brands_df.rename(columns={'id.$oid': 'id', 'cpg.$id.$oid': 'cpgId', 'cpg.$ref': 'cpgRef'}, inplace=True)
receipts_df.drop(columns=['createDate', 'dateScanned', 'finishedDate', 'modifyDate', 'pointsAwardedDate', 'purchaseDate'], inplace=True, errors='ignore')
receipts_df.rename(columns={'id.$oid': 'id', 'createDate.$date': 'createdDate', 'dateScanned.$date': 'dateScanned', 'finishedDate.$date': 'finishedDate', 'modifyDate.$date': 'modifyDate', 'pointsAwardedDate.$date': 'pointsAwardedDate', 'purchaseDate.$date': 'purchaseDate'}, inplace=True)
users_df.drop(columns=['createDate', 'lastLogin'], inplace=True, errors='ignore')
users_df.rename(columns={'id.$oid': 'id', 'createdDate.$date': 'createdDate', 'lastLogin.$date': 'lastLogin'}, inplace=True)

In [12]:
brands_df.head()

Unnamed: 0,barcode,category,categoryCode,name,topBrand,brandCode,id,cpgId,cpgRef
0,511111019862,Baking,BAKING,test brand @1612366101024,False,,601ac115be37ce2ead437551,601ac114be37ce2ead437550,Cogs
1,511111519928,Beverages,BEVERAGES,Starbucks,False,STARBUCKS,601c5460be37ce2ead43755f,5332f5fbe4b03c9a25efd0ba,Cogs
2,511111819905,Baking,BAKING,test brand @1612366146176,False,TEST BRANDCODE @1612366146176,601ac142be37ce2ead43755d,601ac142be37ce2ead437559,Cogs
3,511111519874,Baking,BAKING,test brand @1612366146051,False,TEST BRANDCODE @1612366146051,601ac142be37ce2ead43755a,601ac142be37ce2ead437559,Cogs
4,511111319917,Candy & Sweets,CANDY_AND_SWEETS,test brand @1612366146827,False,TEST BRANDCODE @1612366146827,601ac142be37ce2ead43755e,5332fa12e4b03c9a25efd1e7,Cogs


In [13]:
receipts_df.head()

Unnamed: 0,bonusPointsEarned,bonusPointsEarnedReason,pointsEarned,purchasedItemCount,rewardsReceiptItemList,rewardsReceiptStatus,totalSpent,userId,id,createdDate,dateScanned,finishedDate,modifyDate,pointsAwardedDate,purchaseDate
0,500.0,"Receipt number 2 completed, bonus point schedu...",500.0,5.0,"[{'barcode': '4011', 'description': 'ITEM NOT ...",FINISHED,26.0,5ff1e1eacfcf6c399c274ae6,5ff1e1eb0a720f0523000575,1609687531000,1609687531000,1609688000000.0,1609687536000,1609688000000.0,1609632000000.0
1,150.0,"Receipt number 5 completed, bonus point schedu...",150.0,2.0,"[{'barcode': '4011', 'description': 'ITEM NOT ...",FINISHED,11.0,5ff1e194b6a9d73a3a9f1052,5ff1e1bb0a720f052300056b,1609687483000,1609687483000,1609687000000.0,1609687488000,1609687000000.0,1609601000000.0
2,5.0,All-receipts receipt bonus,5.0,1.0,"[{'needsFetchReview': False, 'partnerItemId': ...",REJECTED,10.0,5ff1e1f1cfcf6c399c274b0b,5ff1e1f10a720f052300057a,1609687537000,1609687537000,,1609687542000,,1609632000000.0
3,5.0,All-receipts receipt bonus,5.0,4.0,"[{'barcode': '4011', 'description': 'ITEM NOT ...",FINISHED,28.0,5ff1e1eacfcf6c399c274ae6,5ff1e1ee0a7214ada100056f,1609687534000,1609687534000,1609688000000.0,1609687539000,1609688000000.0,1609632000000.0
4,5.0,All-receipts receipt bonus,5.0,2.0,"[{'barcode': '4011', 'description': 'ITEM NOT ...",FINISHED,1.0,5ff1e194b6a9d73a3a9f1052,5ff1e1d20a7214ada1000561,1609687506000,1609687506000,1609688000000.0,1609687511000,1609688000000.0,1609601000000.0


In [14]:
users_df.head()

Unnamed: 0,active,role,signUpSource,state,id,createdDate,lastLogin
0,True,consumer,Email,WI,5ff1e194b6a9d73a3a9f1052,1609687444800,1609688000000.0
1,True,consumer,Email,WI,5ff1e194b6a9d73a3a9f1052,1609687444800,1609688000000.0
2,True,consumer,Email,WI,5ff1e194b6a9d73a3a9f1052,1609687444800,1609688000000.0
3,True,consumer,Email,WI,5ff1e1eacfcf6c399c274ae6,1609687530554,1609688000000.0
4,True,consumer,Email,WI,5ff1e194b6a9d73a3a9f1052,1609687444800,1609688000000.0


In [15]:
#Modify date columns from epoch to datetime
def convert_epoch_to_datetime(df, columns):
    for column in columns:
        if column in df.columns:
            df[column] = pd.to_datetime(df[column], unit='ms')
    return df
# Convert epoch columns to datetime
brands_df = convert_epoch_to_datetime(brands_df, ['createdDate'])
receipts_df = convert_epoch_to_datetime(receipts_df, ['createdDate', 'dateScanned', 'finishedDate', 'modifyDate', 'pointsAwardedDate', 'purchaseDate'])
users_df = convert_epoch_to_datetime(users_df, ['createdDate', 'lastLogin'])

In [16]:
brands_df.head()

Unnamed: 0,barcode,category,categoryCode,name,topBrand,brandCode,id,cpgId,cpgRef
0,511111019862,Baking,BAKING,test brand @1612366101024,False,,601ac115be37ce2ead437551,601ac114be37ce2ead437550,Cogs
1,511111519928,Beverages,BEVERAGES,Starbucks,False,STARBUCKS,601c5460be37ce2ead43755f,5332f5fbe4b03c9a25efd0ba,Cogs
2,511111819905,Baking,BAKING,test brand @1612366146176,False,TEST BRANDCODE @1612366146176,601ac142be37ce2ead43755d,601ac142be37ce2ead437559,Cogs
3,511111519874,Baking,BAKING,test brand @1612366146051,False,TEST BRANDCODE @1612366146051,601ac142be37ce2ead43755a,601ac142be37ce2ead437559,Cogs
4,511111319917,Candy & Sweets,CANDY_AND_SWEETS,test brand @1612366146827,False,TEST BRANDCODE @1612366146827,601ac142be37ce2ead43755e,5332fa12e4b03c9a25efd1e7,Cogs


In [17]:
receipts_df.head()

Unnamed: 0,bonusPointsEarned,bonusPointsEarnedReason,pointsEarned,purchasedItemCount,rewardsReceiptItemList,rewardsReceiptStatus,totalSpent,userId,id,createdDate,dateScanned,finishedDate,modifyDate,pointsAwardedDate,purchaseDate
0,500.0,"Receipt number 2 completed, bonus point schedu...",500.0,5.0,"[{'barcode': '4011', 'description': 'ITEM NOT ...",FINISHED,26.0,5ff1e1eacfcf6c399c274ae6,5ff1e1eb0a720f0523000575,2021-01-03 15:25:31,2021-01-03 15:25:31,2021-01-03 15:25:31,2021-01-03 15:25:36,2021-01-03 15:25:31,2021-01-03 00:00:00
1,150.0,"Receipt number 5 completed, bonus point schedu...",150.0,2.0,"[{'barcode': '4011', 'description': 'ITEM NOT ...",FINISHED,11.0,5ff1e194b6a9d73a3a9f1052,5ff1e1bb0a720f052300056b,2021-01-03 15:24:43,2021-01-03 15:24:43,2021-01-03 15:24:43,2021-01-03 15:24:48,2021-01-03 15:24:43,2021-01-02 15:24:43
2,5.0,All-receipts receipt bonus,5.0,1.0,"[{'needsFetchReview': False, 'partnerItemId': ...",REJECTED,10.0,5ff1e1f1cfcf6c399c274b0b,5ff1e1f10a720f052300057a,2021-01-03 15:25:37,2021-01-03 15:25:37,NaT,2021-01-03 15:25:42,NaT,2021-01-03 00:00:00
3,5.0,All-receipts receipt bonus,5.0,4.0,"[{'barcode': '4011', 'description': 'ITEM NOT ...",FINISHED,28.0,5ff1e1eacfcf6c399c274ae6,5ff1e1ee0a7214ada100056f,2021-01-03 15:25:34,2021-01-03 15:25:34,2021-01-03 15:25:34,2021-01-03 15:25:39,2021-01-03 15:25:34,2021-01-03 00:00:00
4,5.0,All-receipts receipt bonus,5.0,2.0,"[{'barcode': '4011', 'description': 'ITEM NOT ...",FINISHED,1.0,5ff1e194b6a9d73a3a9f1052,5ff1e1d20a7214ada1000561,2021-01-03 15:25:06,2021-01-03 15:25:06,2021-01-03 15:25:11,2021-01-03 15:25:11,2021-01-03 15:25:06,2021-01-02 15:25:06


In [18]:
users_df.head()

Unnamed: 0,active,role,signUpSource,state,id,createdDate,lastLogin
0,True,consumer,Email,WI,5ff1e194b6a9d73a3a9f1052,2021-01-03 15:24:04.800,2021-01-03 15:25:37.858
1,True,consumer,Email,WI,5ff1e194b6a9d73a3a9f1052,2021-01-03 15:24:04.800,2021-01-03 15:25:37.858
2,True,consumer,Email,WI,5ff1e194b6a9d73a3a9f1052,2021-01-03 15:24:04.800,2021-01-03 15:25:37.858
3,True,consumer,Email,WI,5ff1e1eacfcf6c399c274ae6,2021-01-03 15:25:30.554,2021-01-03 15:25:30.597
4,True,consumer,Email,WI,5ff1e194b6a9d73a3a9f1052,2021-01-03 15:24:04.800,2021-01-03 15:25:37.858


In [19]:
# Get fill rates for each column listed individually
def get_fill_rates(df):
    fill_rates = {}
    for column in df.columns:
        fill_rate = df[column].notnull().mean()
        fill_rates[column] = fill_rate
    return fill_rates
# Calculate fill rates for each DataFrame
brands_fill_rates = get_fill_rates(brands_df)
receipts_fill_rates = get_fill_rates(receipts_df)
users_fill_rates = get_fill_rates(users_df)
# Convert fill rates to DataFrames for better visualization
brands_fill_rates_df = pd.DataFrame(list(brands_fill_rates.items()), columns=['Column', 'Fill Rate'])
receipts_fill_rates_df = pd.DataFrame(list(receipts_fill_rates.items()), columns=['Column', 'Fill Rate'])
users_fill_rates_df = pd.DataFrame(list(users_fill_rates.items()), columns=['Column', 'Fill Rate'])
# Display fill rates for columns less than 1
brands_fill_rates_df = brands_fill_rates_df[brands_fill_rates_df['Fill Rate'] < 1]
receipts_fill_rates_df = receipts_fill_rates_df[receipts_fill_rates_df['Fill Rate'] < 1]
users_fill_rates_df = users_fill_rates_df[users_fill_rates_df['Fill Rate'] < 1]
print("Brands Fill Rates < 1:")
print(brands_fill_rates_df)
print("\nReceipts Fill Rates < 1:")
print(receipts_fill_rates_df)
print("\nUsers Fill Rates < 1:")
print(users_fill_rates_df)

Brands Fill Rates < 1:
         Column  Fill Rate
1      category   0.867181
2  categoryCode   0.443016
4      topBrand   0.475578
5     brandCode   0.799486

Receipts Fill Rates < 1:
                     Column  Fill Rate
0         bonusPointsEarned   0.486148
1   bonusPointsEarnedReason   0.486148
2              pointsEarned   0.544236
3        purchasedItemCount   0.567471
4    rewardsReceiptItemList   0.606792
6                totalSpent   0.611260
11             finishedDate   0.507596
13        pointsAwardedDate   0.479893
14             purchaseDate   0.599643

Users Fill Rates < 1:
         Column  Fill Rate
2  signUpSource   0.903030
3         state   0.886869
6     lastLogin   0.874747


In [20]:
# Create a new DataFrame with only the necessary columns
temp_df = receipts_df[['id', 'userId', 'rewardsReceiptItemList']].copy()

# Explode the rewardsReceiptItemList column
temp_df = temp_df.explode('rewardsReceiptItemList')

# Normalize the nested structure while keeping the id and userId columns
receipts_items_df = pd.concat(
    [temp_df[['id', 'userId']].reset_index(drop=True),
     pd.json_normalize(temp_df['rewardsReceiptItemList'])],
    axis=1
)

receipts_items_df.head()

Unnamed: 0,id,userId,barcode,description,finalPrice,itemPrice,needsFetchReview,partnerItemId,preventTargetGapPoints,quantityPurchased,...,itemNumber,originalMetaBriteQuantityPurchased,pointsEarned,targetPrice,competitiveProduct,originalFinalPrice,originalMetaBriteItemPrice,deleted,priceAfterCoupon,metabriteCampaignId
0,5ff1e1eb0a720f0523000575,5ff1e1eacfcf6c399c274ae6,4011.0,ITEM NOT FOUND,26.0,26.0,False,1,True,5.0,...,,,,,,,,,,
1,5ff1e1bb0a720f052300056b,5ff1e194b6a9d73a3a9f1052,4011.0,ITEM NOT FOUND,1.0,1.0,,1,,1.0,...,,,,,,,,,,
2,5ff1e1bb0a720f052300056b,5ff1e194b6a9d73a3a9f1052,28400642255.0,DORITOS TORTILLA CHIP SPICY SWEET CHILI REDUCE...,10.0,10.0,True,2,True,1.0,...,,,,,,,,,,
3,5ff1e1f10a720f052300057a,5ff1e1f1cfcf6c399c274b0b,,,,,False,1,True,,...,,,,,,,,,,
4,5ff1e1ee0a7214ada100056f,5ff1e1eacfcf6c399c274ae6,4011.0,ITEM NOT FOUND,28.0,28.0,False,1,True,4.0,...,,,,,,,,,,


In [21]:
# Convert all column names from camelCase to snake_case
def camel_to_snake_case(column_name):
    return ''.join(['_' + c.lower() if c.isupper() else c for c in column_name]).lstrip('_')

# Apply the conversion to all DataFrames
for df in [brands_df, receipts_df, receipts_items_df, users_df]:
    df.columns = [camel_to_snake_case(col) for col in df.columns]


In [22]:
# # Save cleaned data to CSV files
brands_df.to_csv('../data/brands_cleaned.csv', index=False)
receipts_df.to_csv('../data/receipts_cleaned.csv', index=False)
receipts_items_df.to_csv('../data/receipts_items_cleaned.csv', index=False)
users_df.to_csv('../data/users_cleaned.csv', index=False)