In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from numpy import random as rnd

%matplotlib inline

In [2]:
url = 'https://raw.githubusercontent.com/gamesconsort/cga-internship-projects/main/probabilistic-sales-forecasting/data/supermarket_sales_data'
df = pd.read_csv(url) # Make sure the url is the raw version of the file on GitHub
df['Date'].replace('2019', '2022', regex=True, inplace=True) #replace 2019 with 2022
df['Date'] = pd.to_datetime(df['Date'])  # convert date column to a datetime format

df.head()

Unnamed: 0,Invoice ID,Branch,City,Customer type,Gender,Product line,Unit price,Quantity,Tax 5%,Total,Date,Time,Payment,cogs,gross margin percentage,gross income,Rating
0,750-67-8428,A,Yangon,Member,Female,Health and beauty,74.69,7,26.1415,548.9715,2022-01-05,13:08,Ewallet,522.83,4.761905,26.1415,9.1
1,226-31-3081,C,Naypyitaw,Normal,Female,Electronic accessories,15.28,5,3.82,80.22,2022-03-08,10:29,Cash,76.4,4.761905,3.82,9.6
2,631-41-3108,A,Yangon,Normal,Male,Home and lifestyle,46.33,7,16.2155,340.5255,2022-03-03,13:23,Credit card,324.31,4.761905,16.2155,7.4
3,123-19-1176,A,Yangon,Member,Male,Health and beauty,58.22,8,23.288,489.048,2022-01-27,20:33,Ewallet,465.76,4.761905,23.288,8.4
4,373-73-7910,A,Yangon,Normal,Male,Sports and travel,86.31,7,30.2085,634.3785,2022-02-08,10:37,Ewallet,604.17,4.761905,30.2085,5.3


### See _EDA_ Notebook for Exploratory Data Analysis

# Question 1a

In [3]:
#total number of events
n_e = len(df)
n_e

1000

In [4]:
# total number of times sales were made in a day
    
number = np.array([len(df[df['Date']==i]) for i in df['Date'].unique()])
number

array([12, 11, 14, 14, 12,  9, 16,  9,  9, 10, 13, 16,  8, 20,  8, 13, 11,
       12,  8, 17, 12, 13, 18, 10, 12, 17, 14,  9, 11, 11, 14, 12,  9, 14,
       11, 16, 14,  9,  6, 10, 10, 12, 19, 11,  8, 10, 13,  9,  8,  7, 10,
        8, 11, 17, 17,  8,  8, 13, 13, 10,  6, 10, 11, 12, 16, 10, 18,  7,
        9,  8, 11, 12,  6, 11,  9,  8,  6,  8, 13,  7, 15,  6, 14,  8,  9,
        6, 18, 11,  9])

### Branch A

1.1

In [5]:
# total number of times sales were made in branch A daily

A = np.array([len(df[(df['Date'] == j) & (df['Branch'] == 'A')]) for j in df['Date'].unique()])
A

array([5, 4, 5, 6, 3, 4, 6, 2, 3, 3, 2, 6, 3, 5, 2, 5, 5, 5, 5, 2, 3, 7,
       3, 7, 6, 5, 2, 3, 6, 4, 4, 9, 3, 5, 4, 6, 5, 3, 2, 5, 4, 2, 5, 4,
       3, 3, 5, 5, 2, 2, 4, 1, 3, 3, 7, 1, 2, 3, 6, 3, 3, 6, 2, 5, 8, 2,
       4, 3, 4, 1, 3, 5, 2, 4, 1, 3, 4, 2, 2, 2, 6, 1, 3, 6, 2, 2, 4, 5,
       4])

In [6]:
# daily probabilities of success for branch A

Probability_A = [i/j for i, j in zip(A, number)]
Probability_A

[0.4166666666666667,
 0.36363636363636365,
 0.35714285714285715,
 0.42857142857142855,
 0.25,
 0.4444444444444444,
 0.375,
 0.2222222222222222,
 0.3333333333333333,
 0.3,
 0.15384615384615385,
 0.375,
 0.375,
 0.25,
 0.25,
 0.38461538461538464,
 0.45454545454545453,
 0.4166666666666667,
 0.625,
 0.11764705882352941,
 0.25,
 0.5384615384615384,
 0.16666666666666666,
 0.7,
 0.5,
 0.29411764705882354,
 0.14285714285714285,
 0.3333333333333333,
 0.5454545454545454,
 0.36363636363636365,
 0.2857142857142857,
 0.75,
 0.3333333333333333,
 0.35714285714285715,
 0.36363636363636365,
 0.375,
 0.35714285714285715,
 0.3333333333333333,
 0.3333333333333333,
 0.5,
 0.4,
 0.16666666666666666,
 0.2631578947368421,
 0.36363636363636365,
 0.375,
 0.3,
 0.38461538461538464,
 0.5555555555555556,
 0.25,
 0.2857142857142857,
 0.4,
 0.125,
 0.2727272727272727,
 0.17647058823529413,
 0.4117647058823529,
 0.125,
 0.25,
 0.23076923076923078,
 0.46153846153846156,
 0.3,
 0.5,
 0.6,
 0.18181818181818182,
 0.41666

In [7]:
# binomial simulation for Branch A

rnd.binomial(number, Probability_A)

array([ 6,  7,  6,  7,  3,  4,  5,  2,  4,  2,  1,  4,  3,  3,  1,  5,  4,
        4,  6,  1,  5,  8,  5,  5,  7,  4,  4,  1,  4,  5,  3, 10,  3,  5,
        5,  4,  6,  4,  1,  4,  2,  2,  7,  1,  4,  4,  4,  3,  0,  1,  3,
        1,  2,  2,  5,  3,  1,  5,  8,  3,  1,  4,  3,  6,  7,  2,  4,  2,
        2,  0,  0,  5,  1,  4,  3,  1,  6,  1,  1,  2,  3,  2,  4,  7,  2,
        3,  2,  3,  6])

In [8]:
# binomial simulation for Branch A with expectation

bino = [(sum(rnd.binomial(a, b, 1000))/1000).astype(int) for a, b in zip (number, Probability_A)]

bino

[5,
 3,
 4,
 6,
 2,
 4,
 5,
 2,
 2,
 2,
 1,
 6,
 3,
 4,
 1,
 4,
 4,
 5,
 4,
 2,
 2,
 6,
 2,
 6,
 6,
 5,
 1,
 2,
 6,
 3,
 4,
 9,
 3,
 5,
 4,
 6,
 5,
 2,
 2,
 4,
 4,
 1,
 4,
 3,
 2,
 2,
 5,
 4,
 2,
 1,
 4,
 1,
 3,
 3,
 7,
 1,
 2,
 3,
 5,
 2,
 3,
 5,
 2,
 4,
 7,
 2,
 4,
 2,
 4,
 1,
 3,
 4,
 1,
 4,
 1,
 3,
 3,
 2,
 2,
 1,
 6,
 0,
 3,
 6,
 1,
 1,
 4,
 4,
 3]

***1.2**

In [9]:
# total number of times sales were made in Branch A/ total number of events in Branch A

n_a = len(df[df['Branch'] == 'A'])
n_a

340

In [10]:
#probability of success in Branch A wrt total events

p_a = n_a/n_e
p_a

0.34

In [11]:
#possible daily outcomes for branch A in Q2

bino2 = [((sum(rnd.binomial(1000, 0.34, 1000))/1000).astype(int) + rnd.binomial(a, b) - n_a) for a, b in zip (number, Probability_A)]
bino2

[4,
 2,
 5,
 7,
 4,
 3,
 7,
 1,
 3,
 4,
 1,
 6,
 1,
 5,
 2,
 7,
 2,
 3,
 6,
 3,
 3,
 2,
 2,
 3,
 9,
 4,
 0,
 3,
 6,
 4,
 2,
 9,
 1,
 6,
 4,
 3,
 6,
 2,
 5,
 6,
 3,
 4,
 7,
 2,
 3,
 1,
 5,
 4,
 0,
 2,
 5,
 0,
 4,
 3,
 6,
 -1,
 3,
 2,
 5,
 0,
 0,
 2,
 3,
 7,
 7,
 1,
 1,
 4,
 4,
 0,
 0,
 4,
 1,
 4,
 -1,
 1,
 5,
 3,
 4,
 2,
 3,
 -1,
 1,
 6,
 2,
 1,
 2,
 4,
 4]

### Branch B

1.1

In [12]:
# total number of times sales were made in branch B daily

B = np.array([len(df[(df['Date'] == j) & (df['Branch'] == 'B')]) for j in df['Date'].unique()])
B

array([ 3,  1,  3,  2,  6,  2,  8,  3,  3,  6,  5,  6,  3,  6,  4,  5,  3,
        3,  1, 10,  8,  2,  9,  3,  3,  7,  6,  3,  1,  4,  4,  1,  3,  6,
        4,  3,  5,  2,  2,  4,  2,  5,  7,  6,  2,  3,  6,  2,  3,  2,  3,
        2,  6, 10,  0,  4,  4,  4,  3,  5,  0,  1,  4,  3,  3,  3,  6,  1,
        2,  4,  5,  3,  3,  3,  2,  3,  1,  3,  5,  2,  6,  2,  7,  0,  6,
        2,  5,  4,  1])

In [13]:
# daily probabilities of success for branch B

Probability_B = [i/j for i, j in zip(B, number)]
Probability_B

[0.25,
 0.09090909090909091,
 0.21428571428571427,
 0.14285714285714285,
 0.5,
 0.2222222222222222,
 0.5,
 0.3333333333333333,
 0.3333333333333333,
 0.6,
 0.38461538461538464,
 0.375,
 0.375,
 0.3,
 0.5,
 0.38461538461538464,
 0.2727272727272727,
 0.25,
 0.125,
 0.5882352941176471,
 0.6666666666666666,
 0.15384615384615385,
 0.5,
 0.3,
 0.25,
 0.4117647058823529,
 0.42857142857142855,
 0.3333333333333333,
 0.09090909090909091,
 0.36363636363636365,
 0.2857142857142857,
 0.08333333333333333,
 0.3333333333333333,
 0.42857142857142855,
 0.36363636363636365,
 0.1875,
 0.35714285714285715,
 0.2222222222222222,
 0.3333333333333333,
 0.4,
 0.2,
 0.4166666666666667,
 0.3684210526315789,
 0.5454545454545454,
 0.25,
 0.3,
 0.46153846153846156,
 0.2222222222222222,
 0.375,
 0.2857142857142857,
 0.3,
 0.25,
 0.5454545454545454,
 0.5882352941176471,
 0.0,
 0.5,
 0.5,
 0.3076923076923077,
 0.23076923076923078,
 0.5,
 0.0,
 0.1,
 0.36363636363636365,
 0.25,
 0.1875,
 0.3,
 0.3333333333333333,
 0.1428

In [14]:
# binomial simulation for Branch B

rnd.binomial(number, Probability_B)

array([ 5,  1,  3,  1,  5,  0,  3,  2,  0,  7,  6,  7,  3,  6,  5,  5,  3,
        1,  1,  9,  8,  0,  5,  3,  2, 10,  5,  0,  2,  2,  4,  1,  1,  6,
        3,  2,  2,  2,  2,  3,  4,  5,  8,  6,  2,  3,  7,  2,  3,  2,  3,
        2,  2,  9,  0,  6,  3,  7,  4,  5,  0,  0,  3,  1,  8,  4,  6,  1,
        2,  3,  4,  6,  2,  4,  3,  2,  1,  3,  5,  4,  8,  3,  8,  0,  6,
        2,  7,  4,  2])

In [15]:
# binomial simulation for Branch B with expectation

binoB = [(sum(rnd.binomial(a, b, 1000))/1000).astype(int) for a, b in zip (number, Probability_B)]

binoB

[2,
 1,
 3,
 1,
 5,
 1,
 7,
 2,
 3,
 5,
 4,
 5,
 2,
 6,
 4,
 5,
 2,
 3,
 1,
 10,
 7,
 2,
 8,
 3,
 2,
 6,
 5,
 3,
 1,
 4,
 4,
 1,
 3,
 5,
 3,
 3,
 5,
 1,
 2,
 4,
 1,
 4,
 7,
 5,
 1,
 2,
 6,
 1,
 3,
 2,
 3,
 2,
 6,
 9,
 0,
 3,
 4,
 3,
 3,
 4,
 0,
 1,
 3,
 3,
 3,
 2,
 6,
 0,
 1,
 4,
 5,
 3,
 2,
 3,
 2,
 3,
 1,
 2,
 4,
 1,
 5,
 2,
 7,
 0,
 6,
 2,
 5,
 3,
 1]

1.2

In [16]:
# total number of times sales were made in Branch B/ total number of events in Branch B

n_b = len(df[df['Branch'] == 'B'])
n_b

332

In [17]:
#probability of success in Branch B wrt total events

p_b = n_b/n_e
p_b

0.332

In [18]:
#possible daily outcomes for branch B in Q2

binoB2 = [((sum(rnd.binomial(1000, 0.332, 1000))/1000).astype(int) + rnd.binomial(a, b) - n_b) for a, b in zip (number, Probability_B)]
binoB2

[3,
 -1,
 6,
 1,
 5,
 1,
 8,
 2,
 4,
 4,
 7,
 6,
 1,
 3,
 7,
 6,
 0,
 1,
 1,
 11,
 8,
 1,
 11,
 0,
 3,
 8,
 6,
 2,
 1,
 7,
 2,
 0,
 4,
 0,
 4,
 3,
 1,
 0,
 3,
 2,
 0,
 6,
 6,
 6,
 3,
 2,
 7,
 2,
 2,
 3,
 2,
 -1,
 4,
 7,
 -1,
 5,
 4,
 5,
 5,
 4,
 -1,
 2,
 3,
 2,
 4,
 1,
 4,
 -1,
 3,
 5,
 6,
 3,
 1,
 1,
 3,
 1,
 0,
 3,
 7,
 1,
 5,
 0,
 4,
 0,
 6,
 1,
 1,
 4,
 -1]

### Branch C

1.1

In [19]:
# total number of times sales were made in branch C daily

C = np.array([len(df[(df['Date'] == j) & (df['Branch'] == 'C')]) for j in df['Date'].unique()])
C

array([ 4,  6,  6,  6,  3,  3,  2,  4,  3,  1,  6,  4,  2,  9,  2,  3,  3,
        4,  2,  5,  1,  4,  6,  0,  3,  5,  6,  3,  4,  3,  6,  2,  3,  3,
        3,  7,  4,  4,  2,  1,  4,  5,  7,  1,  3,  4,  2,  2,  3,  3,  3,
        5,  2,  4, 10,  3,  2,  6,  4,  2,  3,  3,  5,  4,  5,  5,  8,  3,
        3,  3,  3,  4,  1,  4,  6,  2,  1,  3,  6,  3,  3,  3,  4,  2,  1,
        2,  9,  2,  4])

In [20]:
# daily probabilities of success for branch C

Probability_C = [i/j for i, j in zip(C, number)]
Probability_C

[0.3333333333333333,
 0.5454545454545454,
 0.42857142857142855,
 0.42857142857142855,
 0.25,
 0.3333333333333333,
 0.125,
 0.4444444444444444,
 0.3333333333333333,
 0.1,
 0.46153846153846156,
 0.25,
 0.25,
 0.45,
 0.25,
 0.23076923076923078,
 0.2727272727272727,
 0.3333333333333333,
 0.25,
 0.29411764705882354,
 0.08333333333333333,
 0.3076923076923077,
 0.3333333333333333,
 0.0,
 0.25,
 0.29411764705882354,
 0.42857142857142855,
 0.3333333333333333,
 0.36363636363636365,
 0.2727272727272727,
 0.42857142857142855,
 0.16666666666666666,
 0.3333333333333333,
 0.21428571428571427,
 0.2727272727272727,
 0.4375,
 0.2857142857142857,
 0.4444444444444444,
 0.3333333333333333,
 0.1,
 0.4,
 0.4166666666666667,
 0.3684210526315789,
 0.09090909090909091,
 0.375,
 0.4,
 0.15384615384615385,
 0.2222222222222222,
 0.375,
 0.42857142857142855,
 0.3,
 0.625,
 0.18181818181818182,
 0.23529411764705882,
 0.5882352941176471,
 0.375,
 0.25,
 0.46153846153846156,
 0.3076923076923077,
 0.2,
 0.5,
 0.3,
 0.4

In [21]:
# binomial simulation for Branch C

rnd.binomial(number, Probability_C)

array([ 5,  8,  9,  7,  1,  1,  0,  3,  1,  0,  7,  5,  2,  6,  3,  1,  2,
        2,  3,  9,  0,  3,  6,  0,  4,  3,  6,  2,  6,  2,  4,  4,  1,  3,
        2,  8,  5,  2,  1,  0,  3,  2,  6,  2,  4,  4,  1,  2,  1,  4,  3,
        3,  1,  2, 11,  5,  3,  5,  6,  3,  2,  4,  8,  3,  2,  5,  9,  3,
        2,  3,  5,  3,  1,  3,  7,  2,  1,  4,  7,  4,  0,  4,  3,  2,  2,
        4,  9,  1,  3])

In [22]:
# binomial simulation for Branch C with expectation

binoC = [(sum(rnd.binomial(a, b, 1000))/1000).astype(int) for a, b in zip (number, Probability_C)]

binoC

[4,
 6,
 6,
 6,
 3,
 2,
 2,
 3,
 3,
 1,
 6,
 4,
 1,
 8,
 1,
 3,
 2,
 3,
 1,
 4,
 0,
 3,
 5,
 0,
 2,
 5,
 5,
 2,
 3,
 3,
 6,
 1,
 3,
 2,
 2,
 6,
 3,
 4,
 1,
 1,
 3,
 4,
 6,
 1,
 2,
 3,
 2,
 2,
 2,
 3,
 2,
 4,
 2,
 4,
 10,
 3,
 2,
 6,
 3,
 1,
 2,
 2,
 5,
 4,
 5,
 5,
 8,
 2,
 2,
 3,
 3,
 4,
 0,
 3,
 5,
 2,
 0,
 3,
 6,
 2,
 3,
 3,
 4,
 2,
 0,
 2,
 8,
 2,
 3]

1.2

In [23]:
# total number of times sales were made in Branch C/ total number of events in Branch C

n_c = len(df[df['Branch'] == 'C'])
n_c

328

In [24]:
#probability of success in Branch C wrt total events

p_c = n_c/n_e
p_c

0.328

In [25]:
#possible daily outcomes for branch C in Q2

binoC2 = [((sum(rnd.binomial(1000, 0.328, 1000))/1000).astype(int) + rnd.binomial(a, b) - n_c) for a, b in zip (number, Probability_C)]
binoC2

[5,
 4,
 5,
 8,
 3,
 3,
 -1,
 2,
 -1,
 1,
 7,
 3,
 1,
 8,
 3,
 2,
 3,
 1,
 5,
 6,
 1,
 3,
 6,
 -1,
 2,
 3,
 5,
 4,
 6,
 2,
 9,
 2,
 1,
 4,
 2,
 8,
 4,
 4,
 4,
 -1,
 4,
 5,
 5,
 1,
 4,
 -1,
 0,
 2,
 1,
 1,
 4,
 6,
 0,
 4,
 12,
 4,
 3,
 9,
 3,
 1,
 1,
 4,
 6,
 4,
 4,
 4,
 7,
 1,
 2,
 6,
 1,
 6,
 0,
 2,
 7,
 4,
 -1,
 3,
 7,
 2,
 0,
 3,
 5,
 1,
 0,
 3,
 9,
 -1,
 6]

In [26]:
#generate dates for Q2

new_date = []
nd = pd.date_range(start="2022-04-01",end="2022-06-30").to_pydatetime().tolist()
for date in nd:
    a = str(date)
    new_date.append(a.split(' ')[0])
    
len(new_date)

91

In [27]:
aggregate = pd.DataFrame({'Date': pd.to_datetime(new_date[:89]),
                         'Branch A': bino2,
                         'Branch B': binoB2,
                         'Branch C': binoC2})

aggregate

Unnamed: 0,Date,Branch A,Branch B,Branch C
0,2022-04-01,4,3,5
1,2022-04-02,2,-1,4
2,2022-04-03,5,6,5
3,2022-04-04,7,1,8
4,2022-04-05,4,5,3
...,...,...,...,...
84,2022-06-24,2,6,0
85,2022-06-25,1,1,3
86,2022-06-26,2,1,9
87,2022-06-27,4,4,-1


# Question 1b

In [28]:
qty_e = sum(df.Quantity)
qty_e

5510

In [29]:
# quantity of goods sold in daily
    
qty_sold = [df.Quantity[df['Date'] == i].sum() for i in df['Date'].unique()]
qty_sold

[55,
 60,
 95,
 87,
 70,
 42,
 66,
 59,
 55,
 53,
 73,
 99,
 43,
 128,
 54,
 88,
 60,
 81,
 32,
 103,
 66,
 79,
 95,
 52,
 59,
 80,
 91,
 53,
 67,
 47,
 83,
 75,
 47,
 97,
 62,
 87,
 82,
 37,
 30,
 45,
 56,
 60,
 106,
 63,
 37,
 57,
 84,
 52,
 54,
 34,
 50,
 58,
 80,
 77,
 95,
 40,
 48,
 69,
 52,
 45,
 40,
 48,
 57,
 54,
 91,
 61,
 95,
 24,
 54,
 49,
 59,
 67,
 32,
 67,
 39,
 37,
 35,
 31,
 64,
 27,
 80,
 18,
 82,
 40,
 37,
 40,
 117,
 50,
 61]

## Branch A

In [30]:
#total quantity of goods sold in branch A daily 

qty_soldA = [df.Quantity[(df['Date'] == i) & (df['Branch'] == 'A')].sum() for i in df['Date'].unique()]
qty_soldA

[27,
 22,
 24,
 29,
 18,
 18,
 22,
 12,
 15,
 17,
 11,
 39,
 13,
 28,
 13,
 33,
 26,
 37,
 24,
 5,
 21,
 39,
 9,
 34,
 26,
 21,
 15,
 22,
 34,
 26,
 29,
 57,
 17,
 38,
 23,
 30,
 37,
 14,
 9,
 22,
 24,
 11,
 23,
 26,
 12,
 22,
 39,
 25,
 7,
 12,
 25,
 5,
 29,
 9,
 36,
 4,
 7,
 20,
 25,
 16,
 16,
 35,
 6,
 13,
 56,
 12,
 14,
 14,
 23,
 7,
 21,
 24,
 15,
 23,
 3,
 19,
 25,
 5,
 15,
 9,
 37,
 3,
 13,
 31,
 3,
 14,
 21,
 24,
 25]

In [31]:
# daily probabilities of success for branch A

Prob_A = [i/j for i, j in zip(qty_soldA, qty_sold)]
Prob_A

[0.4909090909090909,
 0.36666666666666664,
 0.25263157894736843,
 0.3333333333333333,
 0.2571428571428571,
 0.42857142857142855,
 0.3333333333333333,
 0.2033898305084746,
 0.2727272727272727,
 0.32075471698113206,
 0.1506849315068493,
 0.3939393939393939,
 0.3023255813953488,
 0.21875,
 0.24074074074074073,
 0.375,
 0.43333333333333335,
 0.4567901234567901,
 0.75,
 0.04854368932038835,
 0.3181818181818182,
 0.4936708860759494,
 0.09473684210526316,
 0.6538461538461539,
 0.4406779661016949,
 0.2625,
 0.16483516483516483,
 0.41509433962264153,
 0.5074626865671642,
 0.5531914893617021,
 0.3493975903614458,
 0.76,
 0.3617021276595745,
 0.3917525773195876,
 0.3709677419354839,
 0.3448275862068966,
 0.45121951219512196,
 0.3783783783783784,
 0.3,
 0.4888888888888889,
 0.42857142857142855,
 0.18333333333333332,
 0.2169811320754717,
 0.4126984126984127,
 0.32432432432432434,
 0.38596491228070173,
 0.4642857142857143,
 0.4807692307692308,
 0.12962962962962962,
 0.35294117647058826,
 0.5,
 0.086

In [32]:
# binomial simulation for Branch A

Out_A = [(sum(rnd.binomial(a, b, 1000))/1000).astype(int) for a, b in zip (qty_sold, Prob_A)]
Out_A

[27,
 21,
 23,
 29,
 17,
 17,
 22,
 11,
 14,
 17,
 10,
 38,
 12,
 27,
 12,
 33,
 25,
 37,
 23,
 4,
 20,
 39,
 8,
 34,
 25,
 21,
 15,
 22,
 33,
 25,
 29,
 56,
 17,
 38,
 22,
 29,
 36,
 14,
 8,
 22,
 24,
 10,
 22,
 25,
 11,
 22,
 38,
 24,
 7,
 11,
 24,
 4,
 28,
 8,
 35,
 3,
 6,
 19,
 24,
 16,
 16,
 35,
 6,
 13,
 55,
 12,
 13,
 14,
 22,
 7,
 21,
 23,
 14,
 23,
 2,
 19,
 24,
 5,
 15,
 9,
 37,
 2,
 13,
 30,
 2,
 13,
 21,
 23,
 24]

In [33]:
# total quantity of goods sold in Branch A for Q1

qty_a = sum(df.Quantity[df['Branch'] == 'A'])
qty_a

1859

In [34]:
#total quantity of goods sold in Q1 in Branch A

p_qtyA = qty_a/qty_e
p_qtyA

0.33738656987295823

In [35]:
# possible outcomes of quantity sold for Q2

Out_A2 = [(rnd.binomial(5510, 0.3374, 500).mean().astype(int) + rnd.binomial(a, b) - qty_a) for a, b in zip (qty_sold, Prob_A)]

Out_A2

[24,
 16,
 23,
 29,
 19,
 14,
 17,
 12,
 16,
 13,
 8,
 35,
 9,
 26,
 2,
 38,
 23,
 40,
 28,
 6,
 17,
 35,
 5,
 37,
 25,
 18,
 13,
 13,
 29,
 30,
 32,
 54,
 23,
 42,
 22,
 29,
 33,
 20,
 12,
 24,
 26,
 14,
 24,
 29,
 10,
 19,
 43,
 30,
 3,
 16,
 17,
 5,
 27,
 8,
 37,
 2,
 1,
 19,
 29,
 10,
 11,
 32,
 7,
 13,
 58,
 8,
 18,
 17,
 23,
 5,
 13,
 16,
 15,
 17,
 1,
 14,
 25,
 3,
 19,
 7,
 33,
 3,
 12,
 34,
 4,
 8,
 19,
 21,
 19]

## Branch B

In [36]:
#total quantity of goods sold in branch B daily 

qty_soldB = [df.Quantity[(df['Date'] == i) & (df['Branch'] == 'B')].sum() for i in df['Date'].unique()]
qty_soldB

[11,
 5,
 26,
 14,
 30,
 6,
 38,
 16,
 19,
 35,
 39,
 39,
 19,
 48,
 27,
 33,
 14,
 24,
 1,
 59,
 37,
 14,
 52,
 18,
 11,
 28,
 45,
 17,
 4,
 12,
 31,
 9,
 15,
 43,
 20,
 19,
 19,
 7,
 11,
 14,
 5,
 24,
 44,
 36,
 11,
 16,
 41,
 13,
 22,
 11,
 10,
 11,
 34,
 49,
 0,
 24,
 31,
 16,
 13,
 16,
 0,
 3,
 22,
 20,
 17,
 12,
 37,
 2,
 14,
 24,
 28,
 20,
 8,
 19,
 12,
 15,
 4,
 15,
 18,
 6,
 22,
 7,
 38,
 0,
 25,
 15,
 38,
 16,
 7]

In [37]:
# daily probabilities of success for branch B

Prob_B = [i/j for i, j in zip(qty_soldB, qty_sold)]
Prob_B

[0.2,
 0.08333333333333333,
 0.2736842105263158,
 0.16091954022988506,
 0.42857142857142855,
 0.14285714285714285,
 0.5757575757575758,
 0.2711864406779661,
 0.34545454545454546,
 0.660377358490566,
 0.5342465753424658,
 0.3939393939393939,
 0.4418604651162791,
 0.375,
 0.5,
 0.375,
 0.23333333333333334,
 0.2962962962962963,
 0.03125,
 0.5728155339805825,
 0.5606060606060606,
 0.17721518987341772,
 0.5473684210526316,
 0.34615384615384615,
 0.1864406779661017,
 0.35,
 0.4945054945054945,
 0.32075471698113206,
 0.05970149253731343,
 0.2553191489361702,
 0.37349397590361444,
 0.12,
 0.3191489361702128,
 0.44329896907216493,
 0.3225806451612903,
 0.21839080459770116,
 0.23170731707317074,
 0.1891891891891892,
 0.36666666666666664,
 0.3111111111111111,
 0.08928571428571429,
 0.4,
 0.41509433962264153,
 0.5714285714285714,
 0.2972972972972973,
 0.2807017543859649,
 0.4880952380952381,
 0.25,
 0.4074074074074074,
 0.3235294117647059,
 0.2,
 0.1896551724137931,
 0.425,
 0.6363636363636364,
 0

In [38]:
# binomial simulation for Branch B

Out_B = [(sum(rnd.binomial(a, b, 1000))/1000).astype(int) for a, b in zip (qty_sold, Prob_B)]
Out_B

[11,
 5,
 26,
 14,
 29,
 5,
 37,
 15,
 18,
 35,
 38,
 39,
 19,
 47,
 26,
 33,
 13,
 23,
 1,
 58,
 36,
 14,
 51,
 18,
 11,
 28,
 45,
 16,
 4,
 12,
 30,
 8,
 14,
 43,
 20,
 18,
 18,
 7,
 11,
 14,
 4,
 24,
 43,
 36,
 11,
 16,
 40,
 12,
 21,
 11,
 9,
 11,
 34,
 49,
 0,
 23,
 30,
 16,
 13,
 16,
 0,
 3,
 21,
 20,
 17,
 11,
 36,
 2,
 14,
 24,
 27,
 20,
 7,
 19,
 11,
 15,
 3,
 14,
 17,
 6,
 21,
 7,
 38,
 0,
 25,
 15,
 37,
 16,
 7]

In [39]:
# total quantity of goods sold in Branch B for Q1

qty_b = sum(df.Quantity[df['Branch'] == 'B'])
qty_b

1820

In [40]:
#total quantity of goods sold in Q1 in Branch B

p_qtyB = qty_b/qty_e
p_qtyB

0.33030852994555354

In [41]:
# possible outcomes of quantity sold for Q2

Out_B2 = [(rnd.binomial(5510, 0.3303, 500).mean().astype(int) + rnd.binomial(a, b) - qty_b) for a, b in zip (qty_sold, Prob_B)]

Out_B2

[8,
 4,
 31,
 16,
 26,
 4,
 41,
 15,
 24,
 27,
 38,
 40,
 14,
 53,
 24,
 38,
 9,
 30,
 0,
 58,
 35,
 13,
 52,
 14,
 5,
 21,
 41,
 21,
 4,
 18,
 39,
 7,
 11,
 47,
 17,
 15,
 16,
 6,
 11,
 8,
 5,
 26,
 38,
 48,
 8,
 18,
 44,
 10,
 18,
 12,
 9,
 14,
 42,
 46,
 0,
 18,
 28,
 17,
 15,
 17,
 0,
 0,
 24,
 16,
 17,
 11,
 41,
 2,
 16,
 28,
 28,
 13,
 5,
 17,
 11,
 10,
 1,
 13,
 14,
 9,
 17,
 7,
 42,
 1,
 23,
 19,
 38,
 19,
 8]

## Branch C

In [42]:
#total quantity of goods sold in branch B daily 

qty_soldC = [df.Quantity[(df['Date'] == i) & (df['Branch'] == 'C')].sum() for i in df['Date'].unique()]
qty_soldC

[17,
 33,
 45,
 44,
 22,
 18,
 6,
 31,
 21,
 1,
 23,
 21,
 11,
 52,
 14,
 22,
 20,
 20,
 7,
 39,
 8,
 26,
 34,
 0,
 22,
 31,
 31,
 14,
 29,
 9,
 23,
 9,
 15,
 16,
 19,
 38,
 26,
 16,
 10,
 9,
 27,
 25,
 39,
 1,
 14,
 19,
 4,
 14,
 25,
 11,
 15,
 42,
 17,
 19,
 59,
 12,
 10,
 33,
 14,
 13,
 24,
 10,
 29,
 21,
 18,
 37,
 44,
 8,
 17,
 18,
 10,
 23,
 9,
 25,
 24,
 3,
 6,
 11,
 31,
 12,
 21,
 8,
 31,
 9,
 9,
 11,
 58,
 10,
 29]

In [43]:
# daily probabilities of success for branch C

Prob_C = [i/j for i, j in zip(qty_soldC, qty_sold)]
Prob_C

[0.3090909090909091,
 0.55,
 0.47368421052631576,
 0.5057471264367817,
 0.3142857142857143,
 0.42857142857142855,
 0.09090909090909091,
 0.5254237288135594,
 0.38181818181818183,
 0.018867924528301886,
 0.3150684931506849,
 0.21212121212121213,
 0.2558139534883721,
 0.40625,
 0.25925925925925924,
 0.25,
 0.3333333333333333,
 0.24691358024691357,
 0.21875,
 0.3786407766990291,
 0.12121212121212122,
 0.3291139240506329,
 0.35789473684210527,
 0.0,
 0.3728813559322034,
 0.3875,
 0.34065934065934067,
 0.2641509433962264,
 0.43283582089552236,
 0.19148936170212766,
 0.27710843373493976,
 0.12,
 0.3191489361702128,
 0.16494845360824742,
 0.3064516129032258,
 0.4367816091954023,
 0.3170731707317073,
 0.43243243243243246,
 0.3333333333333333,
 0.2,
 0.48214285714285715,
 0.4166666666666667,
 0.36792452830188677,
 0.015873015873015872,
 0.3783783783783784,
 0.3333333333333333,
 0.047619047619047616,
 0.2692307692307692,
 0.46296296296296297,
 0.3235294117647059,
 0.3,
 0.7241379310344828,
 0.21

In [44]:
# binomial simulation for Branch C

Out_C = [(sum(rnd.binomial(a, b, 1000))/1000).astype(int) for a, b in zip (qty_sold, Prob_C)]
Out_C

[17,
 32,
 45,
 43,
 21,
 18,
 5,
 30,
 20,
 1,
 23,
 21,
 10,
 51,
 14,
 22,
 20,
 19,
 6,
 39,
 7,
 26,
 33,
 0,
 22,
 31,
 30,
 13,
 28,
 8,
 22,
 9,
 14,
 16,
 18,
 38,
 26,
 15,
 10,
 8,
 27,
 24,
 39,
 1,
 13,
 18,
 4,
 13,
 25,
 11,
 15,
 42,
 16,
 19,
 59,
 12,
 10,
 32,
 13,
 12,
 23,
 10,
 29,
 20,
 17,
 37,
 44,
 7,
 17,
 18,
 10,
 23,
 9,
 25,
 23,
 3,
 5,
 10,
 30,
 12,
 21,
 7,
 31,
 9,
 8,
 10,
 58,
 10,
 28]

In [45]:
# total quantity of goods sold in Branch C for Q1

qty_c = sum(df.Quantity[df['Branch'] == 'C'])
qty_c

1831

In [46]:
#total quantity of goods sold in Q1 in Branch C

p_qtyC = qty_c/qty_e
p_qtyC

0.3323049001814882

In [47]:
# possible outcomes of quantity sold for Q2

Out_C2 = [(rnd.binomial(5510, 0.3323, 500).mean().astype(int) + rnd.binomial(a, b) - qty_c) for a, b in zip (qty_sold, Prob_C)]

Out_C2

[18,
 32,
 44,
 49,
 24,
 15,
 5,
 45,
 23,
 -1,
 13,
 19,
 15,
 47,
 14,
 29,
 19,
 12,
 2,
 31,
 7,
 23,
 32,
 -2,
 16,
 33,
 32,
 15,
 22,
 10,
 19,
 10,
 16,
 15,
 23,
 37,
 23,
 19,
 11,
 12,
 25,
 21,
 40,
 -1,
 14,
 16,
 8,
 9,
 19,
 11,
 17,
 37,
 19,
 14,
 69,
 11,
 7,
 38,
 10,
 14,
 21,
 10,
 38,
 14,
 13,
 34,
 37,
 9,
 23,
 18,
 9,
 29,
 8,
 23,
 19,
 1,
 6,
 7,
 25,
 13,
 17,
 7,
 41,
 5,
 12,
 13,
 59,
 10,
 33]

In [48]:
#dataframe showing predicted quantities of goods sold daily in Q2 in each branch

aggregate2 = pd.DataFrame({'Date': pd.to_datetime(new_date[:89]),
                         'Branch A': Out_A2,
                         'Branch B': Out_B2,
                         'Branch C': Out_C2})

aggregate2

Unnamed: 0,Date,Branch A,Branch B,Branch C
0,2022-04-01,24,8,18
1,2022-04-02,16,4,32
2,2022-04-03,23,31,44
3,2022-04-04,29,16,49
4,2022-04-05,19,26,24
...,...,...,...,...
84,2022-06-24,4,23,12
85,2022-06-25,8,19,13
86,2022-06-26,19,38,59
87,2022-06-27,21,19,10
