In [47]:
import pandas as pd
import numpy as np

In [48]:
np.random.seed(1)
n = 1000
dates = pd.date_range(start='2050-01-01', periods=n, freq='D')
products = np.random.choice(['Tablet' , 'Camera', 'Phone', 'Microphone', 'Keyboard'], n)
prices = np.round(np.random.uniform(50.0, 200.0, n), 2)
quantities = np.random.randint(1, 15, n)

data = {
    'Date': dates,
    'Product': products,
    'Price': prices,
    'Quantity': quantities
}

df = pd.DataFrame(data)
df.head()

Unnamed: 0,Date,Product,Price,Quantity
0,2050-01-01,Microphone,90.3,1
1,2050-01-02,Keyboard,171.24,13
2,2050-01-03,Tablet,94.29,4
3,2050-01-04,Camera,131.62,7
4,2050-01-05,Microphone,123.19,5


In [49]:
df.dtypes

Date        datetime64[ns]
Product             object
Price              float64
Quantity             int32
dtype: object

In [50]:
df.to_csv('random_sales.csv', index=False)

In [51]:
df = pd.read_csv('random_sales.csv', parse_dates=['Date'], date_format='%Y-%m-%d')
df.head()

Unnamed: 0,Date,Product,Price,Quantity
0,2050-01-01,Microphone,90.3,1
1,2050-01-02,Keyboard,171.24,13
2,2050-01-03,Tablet,94.29,4
3,2050-01-04,Camera,131.62,7
4,2050-01-05,Microphone,123.19,5


In [52]:
df.dtypes

Date        datetime64[ns]
Product             object
Price              float64
Quantity             int64
dtype: object

In [53]:
# Download the patch !pip install openpyxl first to avoid mistake
df.to_excel('sales_data.xlsx')

In [54]:
df.to_json('sales_data.json')

In [55]:
df.shape

(1000, 4)

In [56]:
df.describe().round(2)

Unnamed: 0,Date,Price,Quantity
count,1000,1000.0,1000.0
mean,2051-05-15 11:59:59.999999488,126.46,7.49
min,2050-01-01 00:00:00,50.11,1.0
25%,2050-09-07 18:00:00,88.15,4.0
50%,2051-05-15 12:00:00,127.22,8.0
75%,2052-01-20 06:00:00,164.99,11.0
max,2052-09-26 00:00:00,199.78,14.0
std,,43.89,4.01


In [57]:
df.values

array([[Timestamp('2050-01-01 00:00:00'), 'Microphone', 90.3, 1],
       [Timestamp('2050-01-02 00:00:00'), 'Keyboard', 171.24, 13],
       [Timestamp('2050-01-03 00:00:00'), 'Tablet', 94.29, 4],
       ...,
       [Timestamp('2052-09-24 00:00:00'), 'Keyboard', 132.81, 8],
       [Timestamp('2052-09-25 00:00:00'), 'Keyboard', 180.14, 3],
       [Timestamp('2052-09-26 00:00:00'), 'Phone', 177.48, 10]],
      shape=(1000, 4), dtype=object)

In [58]:
df.values[:5]

array([[Timestamp('2050-01-01 00:00:00'), 'Microphone', 90.3, 1],
       [Timestamp('2050-01-02 00:00:00'), 'Keyboard', 171.24, 13],
       [Timestamp('2050-01-03 00:00:00'), 'Tablet', 94.29, 4],
       [Timestamp('2050-01-04 00:00:00'), 'Camera', 131.62, 7],
       [Timestamp('2050-01-05 00:00:00'), 'Microphone', 123.19, 5]],
      dtype=object)

In [59]:
df.groupby('Product')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000026639B88A40>

In [60]:
df.groupby('Product')['Quantity']

<pandas.core.groupby.generic.SeriesGroupBy object at 0x000002663BE111F0>

In [62]:
df.groupby('Product')['Quantity'].sum()

Product
Camera        1451
Keyboard      1501
Microphone    1444
Phone         1610
Tablet        1487
Name: Quantity, dtype: int64

In [None]:
df.groupby('Product')['Price'].mean().round(2)