In [1]:
import numpy as np
import pandas as pd

## Scenario: You are a junior data analyst at an online retail company. Your manager wants to understand daily sales patterns. You need to simulate a month's worth of sales data and then use NumPy to extract insights about sales performance.

#### Sales Data Generation:

Simulate daily sales_revenue for 30 days. Assume a base daily revenue (e.g., $1000) with random fluctuations. Use np.random.rand() or np.random.normal() to add variability. Ensure no negative sales.

In [122]:
base_daily_revenue = 1000
sales_revenue = base_daily_revenue + np.random.normal(0, 200, 30)
sales_revenue

array([1007.55561098,  987.4856007 , 1225.83849049,  927.49955447,
        779.67759573,  960.44643402, 1210.37271755, 1022.0629548 ,
        971.67845392,  956.32921681,  828.40455242,  994.40245297,
        500.74072957,  755.26003945,  853.95561416,  948.88354263,
        590.02074896,  858.98437673, 1123.69766904,  825.43519657,
       1108.3797071 ,  689.56914045,  696.88223983,  985.25196104,
        651.84103883, 1175.612416  , 1057.50572686,  754.87861845,
        730.81598027,  765.82618673])

Simulate units_sold for the same 30 days, correlated with sales revenue but with its own random fluctuations.

In [123]:
base_units = 50
units_sold = (sales_revenue / base_daily_revenue) * base_units + np.random.normal(0, 5, 30)
units_sold

array([41.29630482, 55.6551033 , 62.90620585, 46.72310513, 33.24305721,
       47.24180505, 62.80798022, 43.14465835, 45.1620618 , 45.5514057 ,
       41.2521532 , 49.08463819, 27.16959721, 33.78113666, 32.51051235,
       51.50807086, 27.47297135, 46.17507233, 59.48233111, 37.07961337,
       51.30742846, 35.35838211, 31.41675453, 52.23992135, 39.72770403,
       60.3172382 , 53.76331084, 36.29341144, 43.27080276, 34.22418265])

Create two 1D NumPy arrays, one for sales_revenue and one for units_sold.

In [124]:
arr_sales_revenue=np.array(sales_revenue)
arr_sales_revenue

array([1007.55561098,  987.4856007 , 1225.83849049,  927.49955447,
        779.67759573,  960.44643402, 1210.37271755, 1022.0629548 ,
        971.67845392,  956.32921681,  828.40455242,  994.40245297,
        500.74072957,  755.26003945,  853.95561416,  948.88354263,
        590.02074896,  858.98437673, 1123.69766904,  825.43519657,
       1108.3797071 ,  689.56914045,  696.88223983,  985.25196104,
        651.84103883, 1175.612416  , 1057.50572686,  754.87861845,
        730.81598027,  765.82618673])

In [125]:
arr_units_sold=np.array(units_sold)
arr_units_sold

array([41.29630482, 55.6551033 , 62.90620585, 46.72310513, 33.24305721,
       47.24180505, 62.80798022, 43.14465835, 45.1620618 , 45.5514057 ,
       41.2521532 , 49.08463819, 27.16959721, 33.78113666, 32.51051235,
       51.50807086, 27.47297135, 46.17507233, 59.48233111, 37.07961337,
       51.30742846, 35.35838211, 31.41675453, 52.23992135, 39.72770403,
       60.3172382 , 53.76331084, 36.29341144, 43.27080276, 34.22418265])

#### Combine Data:

Create a 2D NumPy array where the first column is sales_revenue and the second is units_sold.

In [126]:
combined = np.column_stack((sales_revenue,units_sold))
combined

array([[1007.55561098,   41.29630482],
       [ 987.4856007 ,   55.6551033 ],
       [1225.83849049,   62.90620585],
       [ 927.49955447,   46.72310513],
       [ 779.67759573,   33.24305721],
       [ 960.44643402,   47.24180505],
       [1210.37271755,   62.80798022],
       [1022.0629548 ,   43.14465835],
       [ 971.67845392,   45.1620618 ],
       [ 956.32921681,   45.5514057 ],
       [ 828.40455242,   41.2521532 ],
       [ 994.40245297,   49.08463819],
       [ 500.74072957,   27.16959721],
       [ 755.26003945,   33.78113666],
       [ 853.95561416,   32.51051235],
       [ 948.88354263,   51.50807086],
       [ 590.02074896,   27.47297135],
       [ 858.98437673,   46.17507233],
       [1123.69766904,   59.48233111],
       [ 825.43519657,   37.07961337],
       [1108.3797071 ,   51.30742846],
       [ 689.56914045,   35.35838211],
       [ 696.88223983,   31.41675453],
       [ 985.25196104,   52.23992135],
       [ 651.84103883,   39.72770403],
       [1175.612416  ,   

#### Key Performance Indicators (KPIs):

Calculate the total sales_revenue for the month.

In [127]:
total_sales_revenue = np.sum(arr_sales_revenue)
total_sales_revenue

26945.29456752302

Calculate the average units_sold per day.

In [128]:
average_units_sold = np.mean(units_sold)
average_units_sold

44.23889734699449

Determine the maximum daily sales_revenue and the day (index) it occurred.

In [129]:
max_revenue = np.max(arr_sales_revenue)
max_revenue_day = np.argmax(sales_revenue)
print("maximum sales revenue:",max_revenue)
print("the day occurred:",max_revenue_day)

maximum sales revenue: 1225.8384904921168
the day occurred: 2


Calculate the average revenue per unit sold for the entire month (total revenue / total units sold).

In [130]:
average_revenue_per_unit_sold = np.sum(sales_revenue) / np.sum(units_sold)
average_revenue_per_unit_sold

20.302867825549928

#### Conditional Analysis:

Identify and count how many days had sales_revenue above a certain target (e.g., $1200).

In [131]:
days = sales_revenue[sales_revenue>1200]
days

array([1225.83849049, 1210.37271755])

In [132]:
len(days)

2

Calculate the average units_sold only for days when sales_revenue was below a certain threshold (e.g., $900).

In [133]:
condition = sales_revenue[sales_revenue<900]
condition

array([779.67759573, 828.40455242, 500.74072957, 755.26003945,
       853.95561416, 590.02074896, 858.98437673, 825.43519657,
       689.56914045, 696.88223983, 651.84103883, 754.87861845,
       730.81598027, 765.82618673])

In [134]:
np.mean(units_sold,where=sales_revenue<900)

35.6410965129685

#### Weekly Aggregations:

Assume the 30 days start on a Monday. Calculate the total sales_revenue for each of the 4 full weeks (days 1-7, 8-14, 15-21, 22-28). You will need to reshape or carefully slice your data.

In [135]:
combined

array([[1007.55561098,   41.29630482],
       [ 987.4856007 ,   55.6551033 ],
       [1225.83849049,   62.90620585],
       [ 927.49955447,   46.72310513],
       [ 779.67759573,   33.24305721],
       [ 960.44643402,   47.24180505],
       [1210.37271755,   62.80798022],
       [1022.0629548 ,   43.14465835],
       [ 971.67845392,   45.1620618 ],
       [ 956.32921681,   45.5514057 ],
       [ 828.40455242,   41.2521532 ],
       [ 994.40245297,   49.08463819],
       [ 500.74072957,   27.16959721],
       [ 755.26003945,   33.78113666],
       [ 853.95561416,   32.51051235],
       [ 948.88354263,   51.50807086],
       [ 590.02074896,   27.47297135],
       [ 858.98437673,   46.17507233],
       [1123.69766904,   59.48233111],
       [ 825.43519657,   37.07961337],
       [1108.3797071 ,   51.30742846],
       [ 689.56914045,   35.35838211],
       [ 696.88223983,   31.41675453],
       [ 985.25196104,   52.23992135],
       [ 651.84103883,   39.72770403],
       [1175.612416  ,   

In [136]:
sales_revenue[7:13]

array([1022.0629548 ,  971.67845392,  956.32921681,  828.40455242,
        994.40245297,  500.74072957])

In [137]:
first_week = sales_revenue[0:7]
np.sum(first_week)

7098.876003938012

In [138]:
second_week = sales_revenue[7:14]
np.sum(second_week)

6028.8783999265925

In [139]:
third_week = sales_revenue[14:21]
np.sum(third_week)

6309.3568551972485

In [140]:
forth_week = sales_revenue[21:28]
np.sum(forth_week)

6011.541141463916

In [141]:
np.sum(sales_revenue[:28].reshape(4, 7), axis=1)

array([7098.87600394, 6028.87839993, 6309.3568552 , 6011.54114146])