In [1]:
import pandas as pd
import numpy as np
import random
import typing

In [47]:
random.seed(123)

CharacterPair = tuple[str, int]
CharacterPairs = list[CharacterPair]

sales_1: CharacterPairs = [\
                   ('A', 3),
                   ('B', 5),
                   ('C', 7)]

sales_2: CharacterPairs = [\
                   ('A', 2),
                   ('B', 8),
                   ('C', 5)]

def generate_shuffled_list(char_pairs: CharacterPairs, seed:int) -> list[str]:
    random.seed(seed)
    preshuff = [ [char] * multiplier for (char, multiplier) in char_pairs]
    flattened = [ x for char_list in preshuff for x in char_list ]
    random.shuffle(flattened)
    return flattened

def generate_sales_data(char_pairs: CharacterPairs, seed:int, name: str) -> pd.DataFrame:
    products = pd.Series(generate_shuffled_list(char_pairs, seed))
    random.seed(seed)
    sales = pd.Series(random.sample(range(10, 100), len(products)))
    
    sales_data = pd.concat([products, sales], axis=1)
    sales_data.columns = ['Product', 'Sales']
    sales_data.index.name = name
    return sales_data
    
    
sales1_df = generate_sales_data(sales_1, 123, 'Store 1')
sales2_df = generate_sales_data(sales_2, 321, 'Store 2') 

In [48]:
from IPython.display import display
display(sales1_df)
display(sales2_df)

Unnamed: 0_level_0,Product,Sales
Store 1,Unnamed: 1_level_1,Unnamed: 2_level_1
0,C,16
1,B,44
2,C,21
3,B,62
4,A,23
5,B,14
6,C,58
7,C,78
8,C,81
9,C,52


Unnamed: 0_level_0,Product,Sales
Store 2,Unnamed: 1_level_1,Unnamed: 2_level_1
0,C,45
1,A,60
2,A,26
3,C,57
4,C,81
5,B,66
6,C,53
7,B,41
8,C,87
9,B,68


In [40]:
sales2_df[sales2_df['Sales'].gt(sales1_df['Sales'])]

Unnamed: 0,Product,Sales
0,C,45
1,A,60
2,A,26
4,C,81
5,B,66
8,C,87
9,B,68
10,B,64
11,B,95
12,B,38


In [41]:
sales2_df[sales2_df['Sales'].lt(sales1_df['Sales'])]

Unnamed: 0,Product,Sales
3,C,57
6,C,53
7,B,41
13,B,11


In [42]:
sales2_df.iloc[[2, 5, 6, 8]]

Unnamed: 0,Product,Sales
2,A,26
5,B,66
6,C,53
8,C,87


In [52]:
tab1 = sales1_df.groupby(['Product']).agg('sum')
display(tab1)
display(tab1.index)
display(tab1.info())
tab2 = sales2_df.groupby('Product').agg('sum')
display(tab2)

Unnamed: 0_level_0,Sales
Product,Unnamed: 1_level_1
A,91
B,249
C,359


Index(['A', 'B', 'C'], dtype='object', name='Product')

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, A to C
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Sales   3 non-null      int64
dtypes: int64(1)
memory usage: 48.0+ bytes


None

Unnamed: 0_level_0,Sales
Product,Unnamed: 1_level_1
A,86
B,458
C,323


In [59]:
sales_tab = pd.merge(tab1, tab2, on=['Product'])
sales_tab.columns = ['Store 1' , 'Store 2']
sales_tab

Unnamed: 0_level_0,Store 1,Store 2
Product,Unnamed: 1_level_1,Unnamed: 2_level_1
A,91,86
B,249,458
C,359,323
