In [1]:
import pandas as pd
import os
import numpy as np
import json
import tarfile
import time

In [2]:
# use this command to compress the uncompressed tarfile when the scrape is ready:
# xz -zk tarfile.tar

In [3]:
DATE = "2023_01_15"
TYPE = ""

In [4]:
tar = tarfile.open(f"../databases/archives/{DATE}_rawdata{TYPE}.tar", "r")

In [5]:
def create_nutrition_df(data, name):
    transformed_data = [name] + [row[2] for row in data["data"]]
    columns = ["id"] + [row[0] for row in data["data"]]
    return pd.DataFrame([transformed_data], columns=columns)

In [6]:
start_time = time.time()
all_nutrition_df = []
for member in tar.getmembers():
    if member.name.find("nutrition.json") != -1:
        file_content = tar.extractfile(member.name).read().decode("utf-8")
        data = json.loads(file_content)
        all_nutrition_df.append(create_nutrition_df(data, member.name.split("/")[-2]))

nutritions_df = pd.concat(all_nutrition_df, ignore_index=True)
end_time = time.time()
print(end_time - start_time)
print((end_time - start_time) / len(nutritions_df))

217.64545226097107
0.02024044008750777


In [7]:
nutritions_df

Unnamed: 0,id,Energia (kJ / kcal),Zsír (g),Telített zsírsavak (g),Szénhidrát (g),Cukrok (g),Fehérje (g),Só (g),Rost (g),C vitamin (mg),...,Cink (mg),Réz (mg),Jód (µg),Nátrium (g),Ebből koleszterin,Mangán (mg),Fluor (µg),Szelén (µg),Króm (µg),Molibdén (µg)
0,4072139,235/56,2.8000,1.8000,4.7000,4.7000,3.0000,0.1300,,,...,,,,,,,,,,
1,50586,255/60,0.8000,0.6000,13.0000,6.5000,0.0000,0.0800,0.5000,,...,,,,,,,,,,
2,3727387,2351/564,36.0000,22.0000,51.0000,30.0000,8.0000,0.2000,2.0000,2.00000,...,,,,,,,,,,
3,4061272,1,1,1,1,1,1,1,,,...,,,,,,,,,,
4,35341,645/155,4.0000,0.4000,25.5000,0.8000,2.5000,0.1000,2.8000,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10748,4117341,2030/601,23,9,67,33,56,066,,,...,,,,,,,,,,
10749,4117392,129/31,04,01,61,02,07,09,,,...,,,,,,,,,,
10750,4117335,2079/625,28,11,61,30,55,123,,,...,,,,,,,,,,
10751,4117305,1835/437,18,72,636,28,51,03,,,...,,,,,,,,,,


In [34]:
nutritions_df.to_csv(f"../databases/csv/{DATE}/nutritions_raw.csv", escapechar="\\", compression="xz")

In [9]:
def create_df(data: dict):
    return pd.DataFrame([data.values()], columns=list(data.keys()))

In [10]:
start_time = time.time()
all_product_df = []
all_variant_df = []
all_prices_df = []
for i, member in enumerate(tar.getmembers()):
    if member.name.find("product.json") != -1:
        file_content = tar.extractfile(member.name).read().decode("utf-8")
        data = json.loads(file_content)

        price_data = data["selectedVariant"]["price"]
        price_data["id"] = data["selectedVariant"]["id"]
        all_prices_df.append(create_df(price_data))

        del data["selectedVariant"]["price"]
        del data["selectedVariant"]["packageInfo"]["unitPrice"]
        data["selectedVariant"]["packageUnit"] = data["selectedVariant"]["packageInfo"]["packageUnit"]
        data["selectedVariant"]["packageSize"] = data["selectedVariant"]["packageInfo"]["packageSize"]
        del data["selectedVariant"]["packageInfo"]

        all_variant_df.append(create_df(data["selectedVariant"]))

        data["selectedVariant"] = data["selectedVariant"]["id"]
        data["defaultVariant"] = data["defaultVariant"]["id"]
        all_product_df.append(create_df(data))

    if i % 1000 == 0:
        print(f'processed {i} members in {time.time() - start_time} seconds')

df_products = pd.concat(all_product_df, ignore_index=True)
df_variants = pd.concat(all_variant_df, ignore_index=True)
df_prices = pd.concat(all_prices_df, ignore_index=True)
end_time = time.time()
print(end_time - start_time)
print((end_time - start_time) / len(df_products))

processed 0 members in 0.10580062866210938 seconds
processed 1000 members in 19.980668544769287 seconds
processed 2000 members in 38.69478225708008 seconds
processed 3000 members in 56.95269584655762 seconds
processed 4000 members in 75.39728784561157 seconds
processed 5000 members in 93.95096778869629 seconds
processed 6000 members in 112.56591892242432 seconds
processed 7000 members in 131.40670132637024 seconds
processed 8000 members in 149.61479926109314 seconds
processed 9000 members in 168.1195409297943 seconds
processed 10000 members in 186.8265438079834 seconds
processed 11000 members in 205.21482849121094 seconds
processed 12000 members in 223.5796127319336 seconds
processed 13000 members in 241.70942616462708 seconds
processed 14000 members in 259.9118084907532 seconds
processed 15000 members in 278.3602650165558 seconds
processed 16000 members in 295.719605922699 seconds
processed 17000 members in 312.80211114883423 seconds
processed 18000 members in 330.6197421550751 second

In [11]:
df_prices.head(10)

Unnamed: 0,net,gross,currency,decimalPlaces,netDiscounted,grossDiscounted,discountPercentage,isDiscounted,id,discountValidFrom,discountValidTo
0,319,319,HUF,0,319,319,0,False,4071506,,
1,509,509,HUF,0,369,369,28,True,4071509,2023-01-12T00:00:00+01:00,2023-01-18T23:59:59+01:00
2,409,409,HUF,0,409,409,0,False,3664574,,
3,419,419,HUF,0,419,419,0,False,4006748,,
4,439,439,HUF,0,439,439,0,False,3664577,,
5,619,619,HUF,0,439,439,29,True,4071500,2023-01-12T00:00:00+01:00,2023-01-18T23:59:59+01:00
6,499,499,HUF,0,499,499,0,False,3665026,,
7,519,519,HUF,0,519,519,0,False,29452,,
8,529,529,HUF,0,529,529,0,False,3600419,,
9,539,539,HUF,0,539,539,0,False,3938513,,


In [12]:
df_variants.drop_duplicates(subset=["id"], inplace=True)
df_products.drop_duplicates(subset=["id"], inplace=True)
df_prices.drop_duplicates(subset=["id"], inplace=True)
df_variants

Unnamed: 0,id,name,sku,productId,addedName,selectValue,status,unit,eanCode,aided,...,flags,media,details,isInVirtualStock,shoppingListsContain,offerType,packageUnit,packageSize,itemVolumeInfo,roll
0,4071506,"Tarka ESL félzsíros tej 2,8% 0,5 l",520525,534116,,4071506,none,db,5998207771623,False,...,"[{'flag': 'flag_hungarian', 'name': 'Magyar te...",{'images': ['https://ahuazurewebblob0.azureedg...,"[description, ingredients, parameterList, nutr...",False,[],,LITER,0.5,,
1,4071509,"Tarka ESL zsírszegény tej 1,5% 1 l",520526,534119,,4071509,none,db,5998207771609,False,...,"[{'flag': 'flag_discount', 'name': 'Kiemelt aj...",{'images': ['https://ahuazurewebblob0.azureedg...,"[description, ingredients, parameterList, nutr...",False,[],discount,LITER,1.0,,
2,3664574,"Auchan Nívó Pasztőrözött dobozos tej 1,5% 1 l",119589,127379,,3664574,none,db,5999086442253,False,...,"[{'flag': 'flag_auchan_brand', 'name': 'Auchan...",{'images': ['https://ahuazurewebblob0.azureedg...,"[description, ingredients, parameterList, nutr...",False,[],,LITER,1.0,,
3,4006748,"Auchan Nívó ESL Palackozott tej 1,5% 1 l",391628,469376,,4006748,none,db,5999086444561,False,...,"[{'flag': 'flag_auchan_brand', 'name': 'Auchan...",{'images': ['https://ahuazurewebblob0.azureedg...,"[description, ingredients, parameterList, nutr...",False,[],,LITER,1.0,,
4,3664577,"Auchan Nívó Pasztőrözött dobozos tej 2,8% 1 l",119590,127382,,3664577,none,db,5999086442260,False,...,"[{'flag': 'flag_auchan_brand', 'name': 'Auchan...",{'images': ['https://ahuazurewebblob0.azureedg...,"[description, ingredients, parameterList, nutr...",False,[],,LITER,1.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41014,3617785,Stanley STA60480 Bitfej készlet 7 részes,964185,80695,,3617785,none,db,5035048372449,False,...,"[{'flag': 'flag_national_delivery', 'name': 'O...",{'images': ['https://ahuazurewebblob0.azureedg...,[description],False,[],,DB,1.0,,
41015,4063294,"Kantáros Munkanadrág Terepmintás, 60% Pamut, 4...",521639,525904,,4063294,none,db,5907558441987,False,...,"[{'flag': 'flag_national_delivery', 'name': 'O...",{'images': ['https://ahuazurewebblob0.azureedg...,[description],False,[],,DB,1.0,,
41016,4079799,"LED ovál lámpa 12W, 1080lm, IP65, IK08, 4000K,...",582922,542409,,4079799,none,db,5908254802584,False,...,"[{'flag': 'flag_national_delivery', 'name': 'O...",{'images': ['https://ahuazurewebblob0.azureedg...,[description],False,[],,DB,1.0,,
41017,4120135,Actuel sherpa pléd 125x150 cm barna,563683,582745,,4120135,none,db,3665257281915,False,...,[],"{'images': [], 'videos': []}","[description, parameterList]",False,[],,DB,1.0,,


In [13]:
assert len(df_products) == len(df_variants) == len(df_prices)

In [14]:
df_variants.to_csv(f"../databases/csv/{DATE}/variants_raw.csv", escapechar="\\", compression='xz')
df_products.to_csv(f"../databases/csv/{DATE}/products_raw.csv", escapechar="\\", compression='xz')
df_prices.to_csv(f"../databases/csv/{DATE}/prices_raw.csv", escapechar="\\", compression='xz')

In [15]:
df_variants.loc[df_variants["details"].str.contains("allergens").fillna(False)]

Unnamed: 0,id,name,sku,productId,addedName,selectValue,status,unit,eanCode,aided,...,flags,media,details,isInVirtualStock,shoppingListsContain,offerType,packageUnit,packageSize,itemVolumeInfo,roll


In [16]:
df_variants["details"].str[2].str.contains("allergens")

0        False
1        False
2        False
3        False
4        False
         ...  
41014      NaN
41015      NaN
41016      NaN
41017      NaN
41018      NaN
Name: details, Length: 35424, dtype: object

In [17]:
df_variants.loc[df_variants["details"].str[-1] == "allergens", "id"]

2        3664574
3        4006748
4        3664577
6        3665026
8        3600419
          ...   
40670      50490
40671      50487
40672      50463
40673      50751
40674      50748
Name: id, Length: 8326, dtype: int64

In [18]:
df_variants = pd.read_csv(f"../databases/csv/{DATE}/variants_raw.csv", index_col=0, compression='xz')
df_products = pd.read_csv(f"../databases/csv/{DATE}/products_raw.csv", index_col=0, compression='xz')
df_prices = pd.read_csv(f"../databases/csv/{DATE}/prices_raw.csv", index_col=0, compression='xz')

  df_variants = pd.read_csv(f"../databases/csv/{DATE}/variants_raw.csv", index_col=0, compression='xz')


In [19]:
df_products

Unnamed: 0,id,categoryId,categoryName,brandName,defaultVariant,selectedVariant,eancode,usedItem,reviewable,reviewSum,categories,inCategories,isNewProduct,stockInfos,adultsOnly,shipmentDays,ageConfirmed,isNonFood,documents,extraWeightPrice
0,534116,6538,Friss tej,Tarka,4071506,4071506,5998207771623,"{'canReturn': False, 'return': False}",login,"{'sumCount': 0, 'average': 0}","[{'id': 1, 'level': 0, 'name': 'Élelmiszer', '...",[6538],False,[],False,0,False,False,[],
1,534119,6538,Friss tej,Tarka,4071509,4071509,5998207771609,"{'canReturn': False, 'return': False}",login,"{'sumCount': 0, 'average': 0}","[{'id': 1, 'level': 0, 'name': 'Élelmiszer', '...",[6538],False,[],False,0,False,False,[],
2,127379,6538,Friss tej,Auchan,3664574,3664574,5999086442253,"{'canReturn': False, 'return': False}",login,"{'sumCount': 0, 'average': 0}","[{'id': 1, 'level': 0, 'name': 'Élelmiszer', '...",[6538],False,[],False,0,False,False,[],
3,469376,6538,Friss tej,Auchan,4006748,4006748,5999086444561,"{'canReturn': False, 'return': False}",login,"{'sumCount': 0, 'average': 0}","[{'id': 1, 'level': 0, 'name': 'Élelmiszer', '...",[6538],False,[],False,0,False,False,[],
4,127382,6538,Friss tej,Auchan,3664577,3664577,5999086442260,"{'canReturn': False, 'return': False}",login,"{'sumCount': 0, 'average': 0}","[{'id': 1, 'level': 0, 'name': 'Élelmiszer', '...",[6538],False,[],False,0,False,False,[],
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41014,80695,12446,Fúrószárak,Black & Decker,3617785,3617785,5035048372449,"{'canReturn': False, 'return': False}",login,"{'sumCount': 0, 'average': 0}","[{'id': 11620, 'name': 'Kert, Szabadidő, Meden...",[12446],False,[],False,0,False,True,[],
41015,525904,12434,Munkaruha,NEO,4063294,4063294,5907558441987,"{'canReturn': False, 'return': False}",login,"{'sumCount': 0, 'average': 0}","[{'id': 11620, 'name': 'Kert, Szabadidő, Meden...",[12434],False,[],False,0,False,True,[],
41016,542409,12509,"Szerelőlámpa, reflektor",Anco,4079799,4079799,5908254802584,"{'canReturn': False, 'return': False}",login,"{'sumCount': 0, 'average': 0}","[{'id': 11620, 'name': 'Kert, Szabadidő, Meden...",[12509],False,[],False,0,False,True,[],
41017,582745,12821,Ágytakaró és pléd,Actuel,4120135,4120135,3665257281915,"{'canReturn': False, 'return': False}",login,"{'sumCount': 0, 'average': 0}","[{'id': 12617, 'name': 'Állateledel, Otthon, H...",[12821],False,[],False,0,False,True,[],


In [20]:
df_variants

Unnamed: 0,id,name,sku,productId,addedName,selectValue,status,unit,eanCode,aided,...,flags,media,details,isInVirtualStock,shoppingListsContain,offerType,packageUnit,packageSize,itemVolumeInfo,roll
0,4071506,"Tarka ESL félzsíros tej 2,8% 0,5 l",520525,534116,,4071506,none,db,5998207771623,False,...,"[{'flag': 'flag_hungarian', 'name': 'Magyar te...",{'images': ['https://ahuazurewebblob0.azureedg...,"['description', 'ingredients', 'parameterList'...",False,[],,LITER,0.5,,
1,4071509,"Tarka ESL zsírszegény tej 1,5% 1 l",520526,534119,,4071509,none,db,5998207771609,False,...,"[{'flag': 'flag_discount', 'name': 'Kiemelt aj...",{'images': ['https://ahuazurewebblob0.azureedg...,"['description', 'ingredients', 'parameterList'...",False,[],discount,LITER,1.0,,
2,3664574,"Auchan Nívó Pasztőrözött dobozos tej 1,5% 1 l",119589,127379,,3664574,none,db,5999086442253,False,...,"[{'flag': 'flag_auchan_brand', 'name': 'Auchan...",{'images': ['https://ahuazurewebblob0.azureedg...,"['description', 'ingredients', 'parameterList'...",False,[],,LITER,1.0,,
3,4006748,"Auchan Nívó ESL Palackozott tej 1,5% 1 l",391628,469376,,4006748,none,db,5999086444561,False,...,"[{'flag': 'flag_auchan_brand', 'name': 'Auchan...",{'images': ['https://ahuazurewebblob0.azureedg...,"['description', 'ingredients', 'parameterList'...",False,[],,LITER,1.0,,
4,3664577,"Auchan Nívó Pasztőrözött dobozos tej 2,8% 1 l",119590,127382,,3664577,none,db,5999086442260,False,...,"[{'flag': 'flag_auchan_brand', 'name': 'Auchan...",{'images': ['https://ahuazurewebblob0.azureedg...,"['description', 'ingredients', 'parameterList'...",False,[],,LITER,1.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41014,3617785,Stanley STA60480 Bitfej készlet 7 részes,964185,80695,,3617785,none,db,5035048372449,False,...,"[{'flag': 'flag_national_delivery', 'name': 'O...",{'images': ['https://ahuazurewebblob0.azureedg...,['description'],False,[],,DB,1.0,,
41015,4063294,"Kantáros Munkanadrág Terepmintás, 60% Pamut, 4...",521639,525904,,4063294,none,db,5907558441987,False,...,"[{'flag': 'flag_national_delivery', 'name': 'O...",{'images': ['https://ahuazurewebblob0.azureedg...,['description'],False,[],,DB,1.0,,
41016,4079799,"LED ovál lámpa 12W, 1080lm, IP65, IK08, 4000K,...",582922,542409,,4079799,none,db,5908254802584,False,...,"[{'flag': 'flag_national_delivery', 'name': 'O...",{'images': ['https://ahuazurewebblob0.azureedg...,['description'],False,[],,DB,1.0,,
41017,4120135,Actuel sherpa pléd 125x150 cm barna,563683,582745,,4120135,none,db,3665257281915,False,...,[],"{'images': [], 'videos': []}","['description', 'parameterList']",False,[],,DB,1.0,,


In [21]:
df_nutritions = pd.read_csv("../databases/csv/2022_12_19/nutritions_raw.csv", index_col=0)

In [22]:
df_nutritions

Unnamed: 0,id,Energia (kJ / kcal),Zsír (g),Telített zsírsavak (g),Szénhidrát (g),Cukrok (g),Rost (g),Fehérje (g),Só (g),Riboflavin (mg),...,Ebből koleszterin,A vitamin (µg),Tiamin (mg),Niacin (mg),Pantoténsav (mg),Klorid (mg),Cink (mg),Mangán (mg),Fluor (µg),Króm (µg)
0,14914,1199/285,8.8000,4.8000,42.0000,2.3000,0.000000,8.3000,1.4000,,...,,,,,,,,,,
1,3963016,1091.830000/260.770000,7.670000,3.540000,45.350000,2.690000,3.840000,0.670000,3.840000,,...,,,,,,,,,,
2,4001800,1270/303,14.0000,7.6000,39.0000,17.0000,,4.1000,0.7800,,...,,,,,,,,,,
3,4001797,1122/268,13.0000,7.0000,34.0000,14.0000,,3.7000,0.7000,,...,,,,,,,,,,
4,3988465,1755.000000/420.000000,23.400000,15.000000,42.400000,12.600000,,8.100000,0.840000,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10595,4117341,2030/601,23,9,67,33,,56,066,,...,,,,,,,,,,
10596,4117392,129/31,04,01,61,02,,07,09,,...,,,,,,,,,,
10597,4117335,2079/625,28,11,61,30,,55,123,,...,,,,,,,,,,
10598,4117305,1835/437,18,72,636,28,,51,03,,...,,,,,,,,,,


In [23]:
# merge variants with prices and nutritions
df_merged = df_variants.merge(df_prices, on="id")
df_merged = df_merged.merge(df_nutritions, on="id")

In [24]:
df_merged.sort_values(by="discountPercentage", ascending=False).head(100)[["name", "gross", "discountPercentage", "grossDiscounted"]]

Unnamed: 0,name,gross,discountPercentage,grossDiscounted
8336,Morzsás linzer kajszibarack ízű töltelékkel tö...,199,50,100
8335,Ischler kajsziízzel töltve,199,50,100
8345,Porcukros linzer kajszibarack ízű töltelékkel ...,199,50,100
4869,Knoppers töltött ostya kakaós tejbevonóval fél...,225,47,119
3997,"Alföldi gyorsfagyasztott sajttal, sonkával töl...",1859,46,999
...,...,...,...,...
8609,Sajtos pogácsa 85 g,175,26,129
6344,Univer majonézes torma 70 g,339,26,250
6911,Hamé csirkemájas libamájjal 105 g,445,26,330
4934,3Bit tejcsokoládéval bevont keksz szelet mogyo...,229,26,169


In [25]:
df_merged.loc[df_merged["name"].str.contains("zab"), ["name", "Fehérje (g)"]]

Unnamed: 0,name,Fehérje (g)
160,Natrue UHT zabital 1 l,1.4000
201,ALPRO cukormentes zabital 1 l,0.2000
255,ALPRO zabital 1 l,0.3000
268,Alpro zabital hozzáadott kalciummal és D-vitam...,0.7000
269,Alpro zabital hozzáadott kalciummal és D-vitam...,0.7000
...,...,...
8519,Abonett Classic extrudált kenyér zabbal 100 g,13.0000
8611,Nett Food teljeskiőrlésű zabszelet 50 g,15.3000
9464,CSIKÓS CSÍPŐS SZALÁMI Szürkemarha és sertéshús...,21
9476,"SZALÁMI mangalicahússal házias jellegű, füstöl...",21


# Collect categories

In [26]:
start_time = time.time()
all_categories_df = []
for member in tar.getmembers():
    if member.name.find("product.json") != -1:
        file_content = tar.extractfile(member.name).read().decode("utf-8")
        data = json.loads(file_content)
        all_categories_df.append(pd.DataFrame(data["categories"]))


df_categories = pd.concat(all_categories_df, ignore_index=True)
end_time = time.time()
print(end_time - start_time)
print((end_time - start_time) / len(df_products))

684.5262358188629
0.019323798436621017


In [27]:
df_categories["id"].value_counts()

1        13307
4         6860
12617     6216
5549      4067
13307     3401
         ...  
6279         1
7814         1
12770        1
12839        1
11863        1
Name: id, Length: 1751, dtype: int64

In [28]:
df_categories.drop_duplicates(subset=["id"], inplace=True)

In [29]:
df_categories = df_categories.sort_values(by="id").reset_index(drop=True)

In [30]:
df_categories

Unnamed: 0,id,level,name,imageUrl,thumbnailUrl,mobileImageUrl,productCount,comparable,discountedCount,childCount,children,boutique,preferredDisplayStyle,promotionContentType,promotionContentId,slug,promotionRecommendationCarouselId
0,1,0.0,Élelmiszer,,,https://ahuazurewebblob0.blob.core.windows.net...,12454,False,437,9,[],False,icon_style,banner,1528.0,elelmiszer,
1,4,0.0,Elektronika,,,https://ahuazurewebblob0.blob.core.windows.net...,4250,True,81,9,[],False,image_style,banner,1462.0,elektronika,
2,5549,1.0,Tartós élelmiszerek,,,https://ahuazurewebblob0.blob.core.windows.net...,3783,False,61,12,[],False,,banner,1528.0,tartos-elelmiszerek,
3,5551,1.0,"Édesség, csokoládé, nasi",,,https://ahuazurewebblob0.blob.core.windows.net...,1304,False,39,13,[],False,,banner,1528.0,edesseg-csokolade-nasi,
4,5552,1.0,Egészséges életmód,,,https://ahuazurewebblob0.blob.core.windows.net...,1486,False,38,7,[],False,,,,egeszseges-eletmod,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1746,14206,1.0,Cumisüveg és cumi,,,,90,False,7,2,[],False,,,,cumisuveg-es-cumi,
1747,14211,3.0,"32"" -ig",,,,24,True,0,0,[],False,,banner,1462.0,32-ig,6.0
1748,14214,3.0,"33""-50""",,,,42,True,0,0,[],False,,banner,1462.0,33-50,6.0
1749,14217,3.0,"55""-65""",,,,35,True,0,0,[],False,,banner,1462.0,55-65,6.0


In [33]:
df_categories.to_csv(f"../databases/csv/{DATE}/categories_raw.csv", escapechar="\\")

# Compress after extraction

In [32]:
!xz -zk ../databases/archives/2023_01_15_rawdata.tar