In [None]:
# Install translator for catalog
%pip install googletrans==4.0.0-rc1
%pip install faker

In [1]:
import pandas as pd
import numpy as np
from googletrans import Translator
import time
import gc
import random
from faker import Faker
from datetime import datetime
import json

root = 'archive/'

# Dataset Description
### <a href='https://www.kaggle.com/datasets/svizor/retail-sales-forecasting-data'> Retail sales forcastiong data</a></br>
This dataset contains sales information from four stores of one of the retailers over 25 months. </br>
Participants are expected to use these files to develop models that can predict customer demand. </br>
Additionally, the dataset includes a holdout sample with sales data for a 1-month period for which </br>
forecasts should be provided.

### What will be done with data
1.	Create English names for products. (long process, data store in final_catalog.csv)
2.	Create synthetic clients, employees, and orders.
3.	Read online sales data and add all dimensions to it.
4.	Read offline data and reduce it from 7.5 million to 1.5 million records.
5.	Add all necessary dimensions to offline sales.

In [10]:
# ! Reading down in code !

# sales  = pd.read_csv(root + 'sales.csv', index_col=0)
# online  = pd.read_csv(root + 'online.csv', index_col=0)

# catalog = pd.read_csv(root + 'catalog.csv', index_col=0)
# stores  = pd.read_csv(root + 'stores.csv', index_col=0)

# price_history  = pd.read_csv(root + 'price_history.csv', index_col=0)

# actual_matrix  = pd.read_csv(root + 'actual_matrix.csv', index_col=0)
# discounts_history  = pd.read_csv(root + 'discounts_history.csv', index_col=0)
# markdowns  = pd.read_csv(root + 'markdowns.csv', index_col=0)

## catalog.csv
Purpose: Product catalog with characteristics.</br>
Columns:</br>
item_id: A unique identifier for each product</br>
dept_name: Product department (hierarchy level)</br>
class_name: Product class (hierarchy level)</br>
subclass_name: Product subclass (hierarchy level)</br>
item_type: Product type</br>
weight_volume: Volumetric weight</br>
weight_netto: Net weight</br>
fatness: Fat content</br>

In [2]:
catalog = pd.read_csv(root + 'catalog.csv', index_col=0)
catalog.head(3) # TRANSLATE dept_name class_name subclass_name item_type into ENGLISH, and add to catalog

Unnamed: 0,item_id,dept_name,class_name,subclass_name,item_type,weight_volume,weight_netto,fatness
0,da17e2d5feda,БУМАЖНО-ВАТНАЯ ПРОДУКЦИЯ,БУМАЖНАЯ ПРОДУКЦИЯ,ВЛАЖНЫЕ САЛФЕТКИ,,150.0,,
1,614de2b96018,БУМАЖНО-ВАТНАЯ ПРОДУКЦИЯ,ВАТНАЯ ПРОДУКЦИЯ,ВАТНЫЕ ДИСКИ,,30.0,,
2,0c1f1f3e3e11,БУМАЖНО-ВАТНАЯ ПРОДУКЦИЯ,ВАТНАЯ ПРОДУКЦИЯ,ВАТНЫЕ ДИСКИ,,,,


In [3]:
# Function to process batches to translate
def batch_translate(items, batch_size, pause_seconds):
    # create df for translation
    translator = Translator()
    names_translated = pd.DataFrame(columns=['name_ru', 'name_en'])

    for i in range(0, len(items), batch_size):
        batch = items[i:i+batch_size]
        
        # Translate each item in the batch
        for item in batch:
            try:
                translation = translator.translate(item, src='ru', dest='en').text
                names_translated.loc[len(names_translated)] = [item, translation.capitalize()]
            except Exception as e:
                names_translated.loc[len(names_translated)] = [item, 'Error']
        
        # Pause between batches
        time.sleep(pause_seconds)
    
    return names_translated

In [4]:
# get unique values
dept_name = catalog['dept_name'].unique()
class_name = catalog['class_name'].unique()
subclass_name = catalog['subclass_name'].unique()
item_type = catalog['item_type'].unique()

# Translate in batches of 100 with a 2-second pause between batches
batch_size = 100
pause_seconds = 2

print('dept_name', len(dept_name))
print('class_name', len(class_name))
print('subclass_name', len(subclass_name))
print('item_type', len(item_type))
print('item_id', len(catalog['item_id'].unique()))

dept_name 196
class_name 613
subclass_name 1007
item_type 674
item_id 219810


In [5]:
# Department name
dept_name_translated = batch_translate(dept_name, batch_size, pause_seconds)

# ! Manual substitute ! Department
# data = dept_name_translated['name_en']
# data[data.duplicated(keep=False)].unique()

dept_name_translated[dept_name_translated['name_en'] == 'Cakes']
dept_name_translated.iloc[10].name_en = 'Cupcakes'

# 196
len(dept_name_translated['name_en'].unique())

196

In [6]:
# Class name   
class_name_translated = batch_translate(class_name, batch_size, pause_seconds)

# ! Manual substitute ! Class
data = class_name_translated['name_en']

# data[data.duplicated(keep=False)].unique() # list of duplicates
# ['Paste', 'Wheat', 'Green', 'Cottage cheese', 'Domestic', 'Salmon',
#        'Honey', 'Black', 'Weight', 'Dumplings', 'Cakes', 'Puff', 'Bread',
#        'Import', 'Crackers', 'Other', 'Soy sauces']

class_name_translated[class_name_translated['name_en'] == 'Paste']
class_name_translated.iloc[600].name_en = 'Pasta'

class_name_translated[class_name_translated['name_en'] == 'Green']
class_name_translated.iloc[32].name_en = 'Greens'

class_name_translated[class_name_translated['name_en'] == 'Dumplings']
class_name_translated.iloc[285].name_en = 'Varenyki'

class_name_translated[class_name_translated['name_en'] == 'Cakes']
class_name_translated.iloc[205].name_en = 'Cupcakes'

# Change duplication
len(class_name_translated['name_en'].unique())

492

In [7]:
# Fix error translation
error_value = class_name_translated[class_name_translated['name_en'] == 'Error']['name_ru']
try_trans = batch_translate(error_value, batch_size, pause_seconds)
try_trans[try_trans['name_en'] == 'Error']

name_mapping = try_trans.set_index('name_ru')['name_en'].to_dict()

# Replace "Error" with the correct values from df2, and keep the original for unmatched keys
class_name_translated['name_en'] = class_name_translated.apply(
    lambda row: name_mapping.get(row['name_ru'], row['name_en']) if row['name_en'] == 'Error' else row['name_en'], axis=1
)

# 599
len(class_name_translated['name_en'].unique())

599

In [8]:
# Subclass name
subclass_name_translated = batch_translate(subclass_name, batch_size, pause_seconds)

# ! Manual substitute ! Subclass
# data = subclass_name_translated['name_en']
# data[data.duplicated(keep=False)].unique() # list of duplicates
# ['White', 'Piece', 'Wheat', 'Red', 'Green', 'Cakes', 'Potato',
#        'Traditional', 'Salmon', 'Weight', 'Own production', 'Jam',
#        'Sweet', 'Corn', 'Other accessories', 'Waffle cakes', 'Plates',
#        'Crackers', 'Breakfast', 'Cream', 'Lamps', 'Vegetables',
#        'Croissants', 'Bread', 'Pear', 'Yeast', 'Paste', 'Other', 'Pasta',
#        'Business lunch', 'Protein']

subclass_name_translated[subclass_name_translated['name_en'] == 'Green']
subclass_name_translated.iloc[32].name_en = 'Greens'

subclass_name_translated[subclass_name_translated['name_en'] == 'Cakes']
subclass_name_translated.iloc[124].name_en = 'Cupcakes'

subclass_name_translated[subclass_name_translated['name_en'] == 'Paste']
subclass_name_translated.iloc[432].name_en = 'Pastille'
subclass_name_translated.iloc[791].name_en = 'Pasta'

len(subclass_name_translated['name_en'].unique())

815

In [9]:
# Fix error translation
error_value = subclass_name_translated[subclass_name_translated['name_en'] == 'Error']['name_ru']
try_trans = batch_translate(error_value, batch_size, pause_seconds)
try_trans[try_trans['name_en'] == 'Error']

name_mapping = try_trans.set_index('name_ru')['name_en'].to_dict()
subclass_name_translated['name_en'] = subclass_name_translated.apply(
    lambda row: name_mapping.get(row['name_ru'], row['name_en']) if row['name_en'] == 'Error' else row['name_en'], axis=1
)

# 975
len(subclass_name_translated['name_en'].unique())

975

In [10]:
# Item type name
item_type = pd.Series(catalog['item_type'].unique()).dropna()
item_type_translated = batch_translate(item_type, batch_size, pause_seconds)

# # ! Manual substitute ! Department
# data = item_type_translated['name_en']
# data[data.duplicated(keep=False)].unique()
# ['Red', 'Green', 'Sausages', 'Coffee', 'Cottage cheese', 'Cakes',
#        'White', 'Ham', 'Vegetables', 'Seeds', 'Rolls', 'Black',
#        'Chocolate', 'Cream', 'Serum', 'Paste', 'Diapers', 'Jam',
#        'National', 'Olive', 'Fresh', 'Crackers', 'Spick', 'Fruit',
#        'Mashed potatoes', 'Mashed potatoes+side dish', 'Frozen',
#        'Pancakes', 'Bulbs', 'Vegetable', 'Brushes', 'Egg', 'Smoked',
#        'Blueberry', 'Radish', 'Red frozen', 'Corn', 'Cold']

item_type_translated[item_type_translated['name_en'] == 'Cakes']
item_type_translated.iloc[22].name_en = 'Cupcakes'

item_type_translated[item_type_translated['name_en'] == 'Green']
item_type_translated.iloc[3].name_en = 'Greens'

item_type_translated[item_type_translated['name_en'] == 'Paste']
item_type_translated.iloc[151].name_en = 'Pastille'
item_type_translated.iloc[210].name_en = 'Pasta'
# 674
len(item_type_translated['name_en'].unique())

545

In [21]:
# Fix error translation
error_value = item_type_translated[item_type_translated['name_en'] == 'Error']['name_ru']
# error_value
try_trans = batch_translate(error_value, batch_size, pause_seconds)
try_trans[try_trans['name_en'] == 'Error']

name_mapping = try_trans.set_index('name_ru')['name_en'].to_dict()
item_type_translated['name_en'] = item_type_translated.apply(
    lambda row: name_mapping.get(row['name_ru'], row['name_en']) if row['name_en'] == 'Error' else row['name_en'], axis=1
)

# 635
len(item_type_translated['name_en'].unique())


Series([], Name: name_ru, dtype: object)

In [25]:
# Add English department name to catalog
dept_catalog = pd.merge(catalog, dept_name_translated, left_on='dept_name', right_on='name_ru', how='left')
dept_catalog.drop(columns=['name_ru'], inplace=True)
dept_catalog.rename(columns={'name_en': 'dept_name_en'}, inplace=True)

# Add Eng department name to catalog
class_catalog = pd.merge(dept_catalog, class_name_translated, left_on='class_name', right_on='name_ru', how='left')
class_catalog.drop(columns=['name_ru'], inplace=True)
class_catalog.rename(columns={'name_en': 'class_name_en'}, inplace=True)

# Add Eng subclass name to catalog
subclass_catalog = pd.merge(class_catalog, subclass_name_translated, left_on='subclass_name', right_on='name_ru', how='left')
subclass_catalog.drop(columns=['name_ru'], inplace=True)
subclass_catalog.rename(columns={'name_en': 'subclass_name_en'}, inplace=True)

final_catalog = pd.merge(subclass_catalog, item_type_translated, left_on='item_type', right_on='name_ru', how='left')
final_catalog.drop(columns=['name_ru'], inplace=True)
final_catalog.rename(columns={'name_en': 'item_type_en'}, inplace=True)

dept_name_translated.to_csv('translate_products/dept_name_translated.csv')
class_name_translated.to_csv('translate_products/class_name_translated.csv')
subclass_name_translated.to_csv('translate_products/subclass_name_translated.csv')
item_type_translated.to_csv('translate_products/item_type_translated.csv')

final_catalog.head(3)

Unnamed: 0,item_id,dept_name,class_name,subclass_name,item_type,weight_volume,weight_netto,fatness,dept_name_en,class_name_en,subclass_name_en,item_type_en
0,da17e2d5feda,БУМАЖНО-ВАТНАЯ ПРОДУКЦИЯ,БУМАЖНАЯ ПРОДУКЦИЯ,ВЛАЖНЫЕ САЛФЕТКИ,,150.0,,,Paper products,Paper products,Wet napkins,
1,614de2b96018,БУМАЖНО-ВАТНАЯ ПРОДУКЦИЯ,ВАТНАЯ ПРОДУКЦИЯ,ВАТНЫЕ ДИСКИ,,30.0,,,Paper products,Cotton products,Cotton wheels,
2,0c1f1f3e3e11,БУМАЖНО-ВАТНАЯ ПРОДУКЦИЯ,ВАТНАЯ ПРОДУКЦИЯ,ВАТНЫЕ ДИСКИ,,,,,Paper products,Cotton products,Cotton wheels,


In [23]:
#final_catalog
final_catalog.to_csv('final_catalog.csv')

In [26]:
# Delete not used df from memory
del dept_catalog
del class_catalog
del subclass_catalog
del dept_name_translated
del class_name_translated
del subclass_name_translated
del item_type_translated

gc.collect()

0

## stores.csv
Purpose: Contains stores info data.</br>
Columns:</br>
store_id: Store number</br>
division: Store division</br>
format: Store format</br>
city: Location</br>
area: Store sales area</br>

In [None]:
stores  = pd.read_csv(root + 'stores.csv', index_col=0)

# stores['name'] = stores['division'] + " - " + stores['format']
# stores['location'] = stores['city'] + " (" + stores['area'].astype(str) + " sqm)"

stores.head(3)

## Create synthetic clients, orders, managers

In [4]:
# Initialize Faker
fake = Faker()

# Generate random clients
def generate_clients(num_clients):
    clients = []
    for _ in range(num_clients):
        dob = fake.date_of_birth(minimum_age=18, maximum_age=80)  # Generate date of birth between 18 and 80 years
        age = datetime.now().year - dob.year
        loyalty_card = str(random.randint(1000000000, 9999999999))  # 10-digit number
        clients.append({
            'client_id': fake.uuid4(),
            'client_name': fake.first_name(),
            'client_surname': fake.last_name(),
            'client_email': fake.email(),
            'client_phone': fake.phone_number(),
            'client_dob': dob.strftime("%Y-%m-%d"), 
            'client_age': age,
            'client_loyalty_card': loyalty_card if random.random() > 0.17 else 0000000000  # 17% missing cards 
        })

    return clients

# Generate random managers
def generate_managers(num_managers):
    positions = ['Sales Manager', 'Store Manager', 'Regional Manager', 'Area Supervisor']
    departments = ['Online Sales', 'Customer Service', 'Operations', 'Logistics']
    
    managers = []
    for _ in range(num_managers):
        managers.append({
            'manager_id': fake.uuid4(),  # Unique manager ID
            'manager_name': fake.first_name(),
            'manager_surname': fake.last_name(),
            'manager_position': random.choice(positions),  # Random position
            'manager_department': random.choice(departments)  # Random department
        })
    return managers

# Generate random orders
def generate_orders(num_orders, managers_df):
    orders = []
    order_status_choices = ['Pending', 'Confirmed', 'Shipped', 'Delivered', 'Cancelled']
    payment_methods = ['Credit Card', 'PayPal', 'Bank Transfer', 'Cash on Delivery']
    currencies = ['USD', 'EUR']
    packaging_choices = ['Standard', 'Gift Wrap', 'Eco-friendly', 'Custom Packaging']
    for _ in range(num_orders):
        manager = managers_df.sample(1).iloc[0] # random manager
        orders.append({
            'order_id': fake.uuid4(),  
            'order_number': fake.ean(length=8), 
            'order_payment_type': random.choice(payment_methods),  
            'order_status': random.choice(order_status_choices), 
            'order_currency': random.choice(currencies), 
            'order_packaging_instructions': random.choice(packaging_choices),
            'manager_id': manager['manager_id'] 
        })
            
    return orders

In [None]:
clients = generate_clients(100_000) # 100 000 clients for 2 years

# Loading the list to a file and reading it into a DataFrame works significantly faster (18.7 seconds) 
# than transforming the list directly into a DataFrame (over 22 minutes).
json_result = json.dumps(clients, indent=4)
with open(root + "clients.json", "w") as json_file:
    json_file.write(json_result)

print('json write')

In [None]:
# Create cliends df and add Nan values
clients_df = pd.read_json(root + "clients.json")
for col in ['client_phone', 'client_email', 'client_dob', 'client_age']:
    clients_df.loc[clients_df.sample(frac=0.3).index, col] = np.nan  # 30% of rows

clients_df.to_csv(root + "clients.csv")

clients_df.head(3)

In [None]:
managers= generate_managers(100) # 100 manager for 2 years
managers_df = pd.DataFrame(managers)
managers_df.to_csv(root + 'managers.csv')
managers_df.head(3)

In [None]:
orders = generate_orders(250_000, managers_df) # 250 000 orders in 2 years
json_result = json.dumps(orders, indent=4)
with open(root + "orders.json", "w") as json_file:
     json_file.write(json_result)

print('json write')

In [None]:
# Create orders df and add Nan values
orders_df = pd.read_json(root + "orders.json")
for col in ['order_payment_type', 'order_status', 'order_currency', 'order_packaging_instructions']:
    orders_df.loc[orders_df.sample(frac=0.3).index, col] = np.nan  # 30%

orders_df.to_csv(root + "orders.csv")

orders_df.head(3)

# Merge everything

## sales.csv and online.csv
Purpose: 1 This file contains aggregated store sales for specific dates. </br>
Purpose: 2 This file contains aggregated online sales by store for specific dates.</br>
Columns:</br>
date: Sales date</br>
item_id: A unique identifier for each product</br>
quantity: Total quantity of product sold per day</br>
price_base: Average sales price per day</br>
sum_total: Total daily sales amount</br>
store_id: Store number</br>

### Online sales

In [None]:
# Create online sales file
online_sales  = pd.read_csv(root + 'online.csv', index_col=0)
stores  = pd.read_csv(root + 'stores.csv', index_col=0)
discounts_history  = pd.read_csv(root + 'discounts_history.csv', index_col=0)

catalog = pd.read_csv('final_catalog.csv', index_col=0)
catalog = catalog.drop(columns=['dept_name', 'class_name' , 'subclass_name', 'item_type'])
catalog.columns

orders_df = pd.read_csv(root + 'orders.csv', index_col=0)
clients_df = pd.read_csv(root + 'clients.csv', index_col=0)
managers_df = pd.read_csv(root + 'managers.csv', index_col=0)

print(orders_df.shape, clients_df.shape, managers_df.shape)

(250000, 7) (100000, 8) (100, 5)


In [29]:
# Combine everything

orders_df = orders_df.merge(managers_df, on='manager_id', how='left')  # Order + manager
online_sales['order_id'] = random.choices(orders_df['order_id'], k=len(online_sales))  # + new random column
orders_df['client_id'] = random.choices(clients_df['client_id'], k=len(orders_df))  # + new random column
online_sales = online_sales.merge(orders_df, on='order_id', how='left')  # sales + orders
online_sales = online_sales.merge(clients_df, on='client_id', how='left')  # sales + client
online_sales = online_sales.merge(catalog, on='item_id', how='left')  # sales + products
online_sales = online_sales.merge(stores, on='store_id', how='left')  # sales + store

online_sales.columns

Index(['date', 'item_id', 'quantity', 'price_base', 'sum_total', 'store_id',
       'order_id', 'order_number', 'order_payment_type', 'order_status',
       'order_currency', 'order_packaging_instructions', 'manager_id',
       'manager_name', 'manager_surname', 'manager_position',
       'manager_department', 'client_id', 'client_name', 'client_surname',
       'client_email', 'client_phone', 'client_dob', 'client_age',
       'client_loyalty_card', 'weight_volume', 'weight_netto', 'fatness',
       'dept_name_en', 'class_name_en', 'subclass_name_en', 'item_type_en',
       'division', 'format', 'city', 'area'],
      dtype='object')

In [30]:
# recalculste sales
online_sales['sum_total'] = online_sales['price_base'] * online_sales['quantity']

# change price for basic
# online_sales['cost'] = online_sales['price_base']

# result = discounts_history[['item_id', 'sale_price_before_promo']].groupby('item_id')['sale_price_before_promo'].max().reset_index()
# result.rename(columns={'sale_price_before_promo': 'cost'}, inplace=True)

# online_sales = online_sales.merge(result, on='item_id', how='left') 

online_sales.head(3)

Unnamed: 0,date,item_id,quantity,price_base,sum_total,store_id,order_id,order_number,order_payment_type,order_status,...,weight_netto,fatness,dept_name_en,class_name_en,subclass_name_en,item_type_en,division,format,city,area
0,2023-08-04,4aa8dbe05246,3.0,12.4,37.2,1,fd6e0d6e-0ce2-469f-9e56-56d12305f2b8,97990154,Bank Transfer,,...,0.3,,Bread,The bread is white,Own production,White,Div1,Format-1,City1,1500
1,2023-08-04,4e0fbcf99cf9,2.0,56.27,112.54,1,ba9d9fcf-9b1c-495a-b518-a089f777ab6a,70756678,,,...,0.95,,Juices,For children and adults,0.9l.and more,Nectars,Div1,Format-1,City1,1500
2,2023-08-04,2e008b673129,2.0,56.27,112.54,1,e3c27070-5c16-4d7a-89c9-ed658e4a239f,53137210,Bank Transfer,,...,0.95,,Juices,For children and adults,0.9l.and more,Nectars,Div1,Format-1,City1,1500


In [31]:
online_sales.to_csv('online_sales.csv')

In [35]:
online_sales.loc[:100].to_csv('online_sales_sample_100_rows.csv')

### Offline sales 

In [37]:
# Crop dataset from 7.1 million records to 1.5 million records
sales  = pd.read_csv(root + 'sales.csv', index_col=0)

sales['date'] = pd.to_datetime(sales['date'])

# Group by date to ensure proportional sampling across all dates
grouped = sales.groupby(sales['date'])

# Target total number of records
target_records = 1_500_000

# Calculate the sampling fraction
sampling_fraction = target_records / len(sales)

# Sample from each group proportionally
reduced_sales = grouped.apply(lambda x: x.sample(frac=sampling_fraction, random_state=42)).reset_index(drop=True)

del sales

reduced_sales.shape

  reduced_sales = grouped.apply(lambda x: x.sample(frac=sampling_fraction, random_state=42)).reset_index(drop=True)


(1500018, 6)

In [38]:
offline_sales = reduced_sales.merge(catalog, on='item_id', how='left')  # sales + products
offline_sales = offline_sales.merge(stores, on='store_id', how='left')  # sales + store

offline_sales['client_id'] = random.choices(clients_df['client_id'], k=len(offline_sales))  # + new random column
num_nan = int(len(offline_sales) * 0.7)  # Calculate the number of NaN rows, 70%
# Randomly select rows to assign NaN to client_id
nan_indices = random.sample(range(len(offline_sales)), num_nan)
offline_sales.loc[nan_indices, 'client_id'] = np.nan

offline_sales = offline_sales.merge(clients_df, on='client_id', how='left')  # sales + client

offline_sales['sum_total'] = offline_sales['price_base'] * offline_sales['quantity']

In [39]:
offline_sales.to_csv('offline_sales.csv')

In [40]:
offline_sales.loc[:100].to_csv('offline_sales_sample_100_rows.csv')

## Additional tables

In [None]:
price_history  = pd.read_csv(root + 'price_history.csv', index_col=0)
actual_matrix  = pd.read_csv(root + 'actual_matrix.csv', index_col=0)
discounts_history  = pd.read_csv(root + 'discounts_history.csv', index_col=0)
markdowns  = pd.read_csv(root + 'markdowns.csv', index_col=0)

## markdowns.csv
Purpose: This file provides data on products sold at markdown prices in each store. </br>
Columns:</br>
date: Date of markdown</br>
item_id: A unique identifier for each product</br>
normal_price: Regular price</br>
price: Price during markdown</br>
quantity: Quantity sold at markdown</br>
store_id: Store number</br>

In [None]:
markdowns.head(3) # уцінка

## price_history.csv
Purpose: This file contains price changes data in each store.</br>
Columns:</br>
date: Date of price change</br>
item_id: A unique identifier for each product</br>
price: Item new price</br>
code: Price change code</br>
store_id: Store number</br>

In [None]:
price_history.head(3)

## actual_matrix.csv
Purpose: Contains the list of products available in stores.</br>
Columns:</br>
item_id: A unique identifier for each product</br>
date: Date of last product appearance in the current matrix</br>
store_id: Store number</br>

In [None]:
actual_matrix.head(3)

## discounts_history.csv
Purpose: Contains historical promo data for each specific store.</br>
Columns:</br>
date: Date</br>
item_id: A unique identifier for each product</br>
sale_price_before_promo: Price before promo period started</br>
sale_price_time_promo: Price during the promo period</br>
promo_type_code: Promo code type</br>
doc_id: Promo document number</br>
number_disc_day: Sequential day number of the current promo period</br>
store_id: Store number</br>

In [None]:
discounts_history.head(3)