In [1]:
import pandas as pd


In [2]:
data = {
    'Product_ID': [1, 2, 3, 4, 5, 6, 1, 2, 3, 4],
    'Product_Name': ['Toothpaste', 'Shampoo', 'Laptop', 'TV', 'Coffee Maker', 'Blender', 'Toothpaste', 'Shampoo', 'Laptop', 'TV'],
    'Category': ['Personal Care', 'Personal Care', 'Electronics', 'Electronics', 'Kitchen Appliances', 'Kitchen Appliances', 'Personal Care', 'Personal Care', 'Electronics', 'Electronics'],
    'Price': [5.0, 10.0, 1000.0, 600.0, 50.0, 30.0, 5.0, 10.0, 1000.0, 600.0],
    'Units_Sold': [100, 50, 20, 15, 30, 25, 110, 55, 22, 17]
}


In [3]:
df = pd.DataFrame(data)

In [4]:
df

Unnamed: 0,Product_ID,Product_Name,Category,Price,Units_Sold
0,1,Toothpaste,Personal Care,5.0,100
1,2,Shampoo,Personal Care,10.0,50
2,3,Laptop,Electronics,1000.0,20
3,4,TV,Electronics,600.0,15
4,5,Coffee Maker,Kitchen Appliances,50.0,30
5,6,Blender,Kitchen Appliances,30.0,25
6,1,Toothpaste,Personal Care,5.0,110
7,2,Shampoo,Personal Care,10.0,55
8,3,Laptop,Electronics,1000.0,22
9,4,TV,Electronics,600.0,17


### UNIQUE

#### How many unique Product_IDs are there in the DataFrame?

In [5]:
len(pd.unique(df.loc[:, 'Product_ID']))

6

#### What are the unique product categories available in the DataFrame?

In [6]:
df.loc[:, 'Category']

0         Personal Care
1         Personal Care
2           Electronics
3           Electronics
4    Kitchen Appliances
5    Kitchen Appliances
6         Personal Care
7         Personal Care
8           Electronics
9           Electronics
Name: Category, dtype: object

In [7]:
list(pd.unique(df.loc[:, 'Category']))

['Personal Care', 'Electronics', 'Kitchen Appliances']

#### How many different price points are in the DataFrame?

In [8]:
len(pd.unique(df.loc[:, 'Price']))

6

#### Are there any products that were sold in different units (check for unique 'Units_Sold')?

In [9]:
len(pd.unique(df.loc[:, 'Units_Sold'])) < len(df.loc[:, 'Units_Sold'].index)

False

#### How many unique combinations of 'Product_ID' and 'Price' are there?

In [10]:
#
# pd.unique can be applied to only single dimension,
# we can convert the dataframe of two columns to a series of tuples
# Then we can applu unique to the unique pair


In [11]:
prod_price_pair = df.loc[:, ['Product_ID', 'Price']]

In [12]:
prod_price_pair

Unnamed: 0,Product_ID,Price
0,1,5.0
1,2,10.0
2,3,1000.0
3,4,600.0
4,5,50.0
5,6,30.0
6,1,5.0
7,2,10.0
8,3,1000.0
9,4,600.0


In [13]:
def get_pair(row):
    return (row['Product_ID'], row['Price'])

In [14]:
prod_price_pair_series = prod_price_pair.apply(get_pair, axis=1)

In [15]:
prod_price_pair_series

0       (1.0, 5.0)
1      (2.0, 10.0)
2    (3.0, 1000.0)
3     (4.0, 600.0)
4      (5.0, 50.0)
5      (6.0, 30.0)
6       (1.0, 5.0)
7      (2.0, 10.0)
8    (3.0, 1000.0)
9     (4.0, 600.0)
dtype: object

In [16]:
unique_pairs = pd.unique(prod_price_pair_series)

In [17]:
unique_pairs

array([(1.0, 5.0), (2.0, 10.0), (3.0, 1000.0), (4.0, 600.0), (5.0, 50.0),
       (6.0, 30.0)], dtype=object)

In [18]:
# convert back to data frame

In [19]:
pd.DataFrame(list(unique_pairs), columns=['Product_ID', 'Price'])

Unnamed: 0,Product_ID,Price
0,1.0,5.0
1,2.0,10.0
2,3.0,1000.0
3,4.0,600.0
4,5.0,50.0
5,6.0,30.0


#### Are all 'Units_Sold' for 'Toothpaste' the same or are there unique values?

In [20]:
df

Unnamed: 0,Product_ID,Product_Name,Category,Price,Units_Sold
0,1,Toothpaste,Personal Care,5.0,100
1,2,Shampoo,Personal Care,10.0,50
2,3,Laptop,Electronics,1000.0,20
3,4,TV,Electronics,600.0,15
4,5,Coffee Maker,Kitchen Appliances,50.0,30
5,6,Blender,Kitchen Appliances,30.0,25
6,1,Toothpaste,Personal Care,5.0,110
7,2,Shampoo,Personal Care,10.0,55
8,3,Laptop,Electronics,1000.0,22
9,4,TV,Electronics,600.0,17


In [21]:
toothpaste_sold = df.loc[df['Product_Name'] == 'Toothpaste', 'Units_Sold']

In [22]:
toothpaste_sold

0    100
6    110
Name: Units_Sold, dtype: int64

In [23]:
if len(pd.unique(toothpaste_sold)) == len(toothpaste_sold):
    print('Unique values')

Unique values


### N_UNIQUE

#### How many unique categories of products does the Target store offer? Use nunique() to find out.

In [24]:
df

Unnamed: 0,Product_ID,Product_Name,Category,Price,Units_Sold
0,1,Toothpaste,Personal Care,5.0,100
1,2,Shampoo,Personal Care,10.0,50
2,3,Laptop,Electronics,1000.0,20
3,4,TV,Electronics,600.0,15
4,5,Coffee Maker,Kitchen Appliances,50.0,30
5,6,Blender,Kitchen Appliances,30.0,25
6,1,Toothpaste,Personal Care,5.0,110
7,2,Shampoo,Personal Care,10.0,55
8,3,Laptop,Electronics,1000.0,22
9,4,TV,Electronics,600.0,17


In [25]:
df.loc[:, ['Category']].nunique()

Category    3
dtype: int64

#### How many different unique price points exist for 'Electronics' in the DataFrame? Use nunique() to get the answer.

In [26]:
df.loc[df['Category'] == 'Electronics', ['Price']].nunique()

Price    2
dtype: int64

#### Are the units in which products sold always unique, or do some products share the same number of 'Units_Sold'? Use nunique() to determine this.

In [27]:
df1 = df.copy()

In [28]:
df1.loc[8, 'Units_Sold'] = 190 # to make one product duplicate

In [29]:
df1

Unnamed: 0,Product_ID,Product_Name,Category,Price,Units_Sold
0,1,Toothpaste,Personal Care,5.0,100
1,2,Shampoo,Personal Care,10.0,50
2,3,Laptop,Electronics,1000.0,20
3,4,TV,Electronics,600.0,15
4,5,Coffee Maker,Kitchen Appliances,50.0,30
5,6,Blender,Kitchen Appliances,30.0,25
6,1,Toothpaste,Personal Care,5.0,110
7,2,Shampoo,Personal Care,10.0,55
8,3,Laptop,Electronics,1000.0,190
9,4,TV,Electronics,600.0,17


In [30]:
df1.groupby('Product_Name')['Units_Sold'].sum()

Product_Name
Blender          25
Coffee Maker     30
Laptop          210
Shampoo         105
TV               32
Toothpaste      210
Name: Units_Sold, dtype: int64

In [31]:
# Toothpaste and Laptop has same units sold

In [32]:
actual_items = len(df1.groupby('Product_Name')['Units_Sold'].sum())

In [33]:
unique_items = df1.groupby('Product_Name')['Units_Sold'].sum().nunique()

In [34]:
actual_items, unique_items

(6, 5)

#### How many unique 'Product_ID' are there for products in the 'Personal Care' category? Utilize nunique() for this.

In [35]:
personal_care_prods = df.loc[df['Category'] == 'Personal Care', ['Product_Name']]

In [36]:
personal_care_prods

Unnamed: 0,Product_Name
0,Toothpaste
1,Shampoo
6,Toothpaste
7,Shampoo


In [37]:
personal_care_prods.nunique()

Product_Name    2
dtype: int64