# Read the CSV file into a pandas dataframe


In [13]:
import pandas as pd
df = pd.read_csv('data/product_sales.csv')

# Display the first 7 rows of the dataframe

In [14]:
print(df.head(7))

         Date             Product  Units Sold  Unit Price  Total Revenue
0  2022-01-01     Apple iPhone 13           5      1000.0         5000.0
1  2022-01-01  Samsung Galaxy S21           3       900.0         2700.0
2  2022-01-01  Sony PlayStation 5           2       500.0         1000.0
3  2022-01-02   Microsoft Surface           4      1200.0         4800.0
4  2022-01-02   Bose QuietComfort           6       350.0         2100.0
5  2022-01-02        Nike Air Max           8       100.0          800.0
6  2022-01-03   Apple AirPods Pro          10       200.0         2000.0


# Calculate basic statistics for the numerical columns


In [15]:
print(df.describe())

       Units Sold   Unit Price  Total Revenue
count   15.000000    15.000000      15.000000
mean     4.066667   752.000000    2244.666667
std      2.658320   623.059273    1645.621318
min      1.000000   100.000000     300.000000
25%      2.000000   250.000000    1000.000000
50%      3.000000   500.000000    2000.000000
75%      5.500000  1100.000000    2850.000000
max     10.000000  2000.000000    5400.000000


In [16]:
#df['Date'] = pd.to_datetime(df['Date'])


# Group the data by product and calculate the total units sold and revenue


In [17]:
product_sales = df.groupby('Product').agg({'Units Sold': 'sum', 'Total Revenue': 'sum'})
print(product_sales)

                     Units Sold  Total Revenue
Product                                       
Adidas Ultraboost             4          720.0
Apple AirPods Pro            10         2000.0
Apple MacBook Pro             1         2000.0
Apple iPhone 13               5         5000.0
Bose QuietComfort             6         2100.0
Dyson V11 Absolute            3         1800.0
Fitbit Charge 5               7         1050.0
GoPro Hero 10 Black           2         1000.0
LG OLED TV                    3         5400.0
Microsoft Surface             4         4800.0
Nike Air Max                  8          800.0
Nintendo Switch               1          300.0
Samsung Galaxy S21            3         2700.0
Samsung QLED TV               2         3000.0
Sony PlayStation 5            2         1000.0


## Create a line plot of units sold over time using plotly

In [18]:
import plotly.express as px

# Create a line plot of units sold over time
fig1 = px.line(df, x='Date', y='Units Sold', title='Units Sold Over Time')
fig1.show()

## Create a bar plot of total revenue by product


In [19]:
fig2 = px.bar(product_sales, y='Total Revenue', title='Total Revenue by Product')
fig2.show()

## Filter the dataframe by revenue above $1000 and Display

In [20]:
# Filter the dataframe by revenue above $1000
df_filtered = df[df['Total Revenue'] > 1000]

# Display the filtered dataframe
print(df_filtered)

          Date             Product  Units Sold  Unit Price  Total Revenue
0   2022-01-01     Apple iPhone 13           5      1000.0         5000.0
1   2022-01-01  Samsung Galaxy S21           3       900.0         2700.0
3   2022-01-02   Microsoft Surface           4      1200.0         4800.0
4   2022-01-02   Bose QuietComfort           6       350.0         2100.0
6   2022-01-03   Apple AirPods Pro          10       200.0         2000.0
7   2022-01-03     Fitbit Charge 5           7       150.0         1050.0
9   2022-01-04     Samsung QLED TV           2      1500.0         3000.0
10  2022-01-04  Dyson V11 Absolute           3       600.0         1800.0
12  2022-01-05   Apple MacBook Pro           1      2000.0         2000.0
14  2022-01-05          LG OLED TV           3      1800.0         5400.0
