In [1]:
import numpy as np
import pandas as pd

### Sales Data Generation

In [2]:
np.random.seed(42)
base = 1000.0
n_days = 30
noise = np.random.normal(0, 150, n_days)
revenue = np.maximum(base + noise, 0)
price_per_unit = 25.0
expected_units = revenue / price_per_unit
units_noise = np.random.normal(0, 5, n_days)
units_sold = np.maximum(np.round(expected_units + units_noise), 0).astype(int)
df_sales = pd.DataFrame({'day': np.arange(1, n_days+1), 'sales_revenue': revenue, 'units_sold': units_sold})
df_sales

Unnamed: 0,day,sales_revenue,units_sold
0,1,1074.507123,40
1,2,979.260355,48
2,3,1097.153281,44
3,4,1228.454478,44
4,5,964.876994,43
5,6,964.879456,32
6,7,1236.881922,51
7,8,1115.115209,35
8,9,929.578842,31
9,10,1081.384007,44


In [3]:
sales_revenue_arr = df_sales['sales_revenue'].to_numpy()
units_sold_arr = df_sales['units_sold'].to_numpy()
sales_revenue_arr, units_sold_arr

(array([1074.50712295,  979.26035482, 1097.15328072, 1228.45447846,
         964.87699379,  964.87945646, 1236.88192233, 1115.11520937,
         929.57884211, 1081.38400654,  930.48734608,  930.14053696,
        1036.29434073,  713.0079633 ,  741.26232512,  915.65687061,
         848.07533195, 1047.13709989,  863.79638867,  788.1544448 ,
        1219.84731534,  966.13355493, 1010.1292307 ,  786.28777207,
         918.34259132, 1016.63838846,  827.35096339, 1056.35470275,
         909.90419651,  956.24593753]),
 array([40, 48, 44, 44, 43, 32, 51, 35, 31, 44, 41, 38, 41, 27, 22, 33, 32,
        47, 36, 23, 50, 37, 37, 35, 42, 45, 29, 41, 38, 43]))

### Combine Data

In [4]:
sales_matrix = np.column_stack((sales_revenue_arr, units_sold_arr))
sales_matrix

array([[1074.50712295,   40.        ],
       [ 979.26035482,   48.        ],
       [1097.15328072,   44.        ],
       [1228.45447846,   44.        ],
       [ 964.87699379,   43.        ],
       [ 964.87945646,   32.        ],
       [1236.88192233,   51.        ],
       [1115.11520937,   35.        ],
       [ 929.57884211,   31.        ],
       [1081.38400654,   44.        ],
       [ 930.48734608,   41.        ],
       [ 930.14053696,   38.        ],
       [1036.29434073,   41.        ],
       [ 713.0079633 ,   27.        ],
       [ 741.26232512,   22.        ],
       [ 915.65687061,   33.        ],
       [ 848.07533195,   32.        ],
       [1047.13709989,   47.        ],
       [ 863.79638867,   36.        ],
       [ 788.1544448 ,   23.        ],
       [1219.84731534,   50.        ],
       [ 966.13355493,   37.        ],
       [1010.1292307 ,   37.        ],
       [ 786.28777207,   35.        ],
       [ 918.34259132,   42.        ],
       [1016.63838846,   

### Key Performance Indicators (KPIs)

In [5]:
monthly_total_revenue = float(df_sales['sales_revenue'].sum())
pd.DataFrame({'metric':['monthly_total_revenue'], 'value':[monthly_total_revenue]})

Unnamed: 0,metric,value
0,monthly_total_revenue,29153.338969


In [6]:
average_units_per_day = float(df_sales['units_sold'].mean())
pd.DataFrame({'metric':['average_units_per_day'], 'value':[average_units_per_day]})

Unnamed: 0,metric,value
0,average_units_per_day,38.3


In [7]:
max_revenue = float(df_sales['sales_revenue'].max())
max_revenue_idx = int(df_sales['sales_revenue'].idxmax())
max_revenue_daynum = int(df_sales.loc[max_revenue_idx, 'day'])
pd.DataFrame({'metric':['max_daily_revenue'], 'max_value':[max_revenue], 'df_index':[max_revenue_idx], 'day_number':[max_revenue_daynum]})

Unnamed: 0,metric,max_value,df_index,day_number
0,max_daily_revenue,1236.881922,6,7


In [8]:
total_rev = float(df_sales['sales_revenue'].sum())
total_units = int(df_sales['units_sold'].sum())
avg_revenue_per_unit = float(total_rev / total_units) if total_units > 0 else float('nan')
pd.DataFrame({'metric':['avg_revenue_per_unit'], 'total_revenue':[total_rev], 'total_units':[total_units], 'value':[avg_revenue_per_unit]})

Unnamed: 0,metric,total_revenue,total_units,value
0,avg_revenue_per_unit,29153.338969,1149,25.372793


### Conditional Analysis

In [9]:
target_revenue = 1200.0
above_days = df_sales.loc[df_sales['sales_revenue'] > target_revenue, 'day'].to_numpy()
days_above_target = int(above_days.size)
pd.DataFrame({'target_revenue':[target_revenue], 'days_above_target':[days_above_target], 'day_numbers':[above_days.tolist()]})

Unnamed: 0,target_revenue,days_above_target,day_numbers
0,1200.0,3,"[4, 7, 21]"


In [10]:
threshold_revenue = 900.0
mask = df_sales['sales_revenue'] < threshold_revenue
days_below_threshold = int(mask.sum())
avg_units_sold_below = float(df_sales.loc[mask, 'units_sold'].mean()) if days_below_threshold > 0 else float('nan')
pd.DataFrame({'threshold_revenue':[threshold_revenue], 'days_below_threshold':[days_below_threshold], 'avg_units_sold_below':[avg_units_sold_below]})

Unnamed: 0,threshold_revenue,days_below_threshold,avg_units_sold_below
0,900.0,7,29.142857


### Weekly Aggregations

In [11]:
weeks = [(1,7),(8,14),(15,21),(22,28)]
rows = []
for i,(s,e) in enumerate(weeks, start=1):
    total = float(df_sales.loc[(df_sales['day']>=s)&(df_sales['day']<=e),'sales_revenue'].sum())
    rows.append({'week':i,'start_day':s,'end_day':e,'total_revenue':total})
pd.DataFrame(rows)

Unnamed: 0,week,start_day,end_day,total_revenue
0,1,1,7,7546.01361
1,2,8,14,6736.008245
2,3,15,21,6423.929776
3,4,22,28,6581.237204
