In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {
    'Product_ID': [1, 2, 3, 4, 5, np.nan],
    'Product_Name': ['Laptop', 'Phone', 'TV', 'Headphones', None, 'Microwave'],
    'Stock': [20, 15, np.nan, 30, 25, 12],
    'Price': [999.99, 799.99, 399.99, np.nan, 59.99, 99.99],
    'Discounted': [True, False, True, False, np.nan, True],
}

target_df = pd.DataFrame(data)

In [3]:
target_df

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
0,1.0,Laptop,20.0,999.99,True
1,2.0,Phone,15.0,799.99,False
2,3.0,TV,,399.99,True
3,4.0,Headphones,30.0,,False
4,5.0,,25.0,59.99,
5,,Microwave,12.0,99.99,True


### Identify Null Values

#### How would you identify rows where the 'Stock' column has null values in the provided target_df DataFrame?

In [4]:
target_df.loc[target_df.loc[:, 'Stock'].isna() == True]

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
2,3.0,TV,,399.99,True


#### Can you list the columns in target_df that contain at least one null value?

In [5]:
target_df.isna().any()

Product_ID      True
Product_Name    True
Stock           True
Price           True
Discounted      True
dtype: bool

#### How would you use pd.isna() to identify null values in the entire target_df DataFrame?

In [6]:
target_df.isna()

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
0,False,False,False,False,False
1,False,False,False,False,False
2,False,False,True,False,False
3,False,False,False,True,False
4,False,True,False,False,True
5,True,False,False,False,False


#### If you wanted to identify rows where both 'Stock' and 'Price' have null values, how would you do it?

In [7]:
target_df.loc[(target_df.loc[:, "Stock"].isna() == True) & (target_df.loc[:, "Price"].isna() == True)]

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted


In [8]:
target_df

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
0,1.0,Laptop,20.0,999.99,True
1,2.0,Phone,15.0,799.99,False
2,3.0,TV,,399.99,True
3,4.0,Headphones,30.0,,False
4,5.0,,25.0,59.99,
5,,Microwave,12.0,99.99,True


In [9]:
target_df.loc[6] = [6.0, "Monitor", np.nan, np.nan, True]

In [10]:
target_df

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
0,1.0,Laptop,20.0,999.99,True
1,2.0,Phone,15.0,799.99,False
2,3.0,TV,,399.99,True
3,4.0,Headphones,30.0,,False
4,5.0,,25.0,59.99,
5,,Microwave,12.0,99.99,True
6,6.0,Monitor,,,True


In [11]:
target_df.loc[(target_df.loc[:, "Stock"].isna() == True) & (target_df.loc[:, "Price"].isna() == True)]

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
6,6.0,Monitor,,,True


### Filtering

#### How would you filter out rows where the 'Stock' column has null values in the target_df DataFrame?

In [12]:
target_df.loc[target_df.loc[:, "Stock"].isna() == True]

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
2,3.0,TV,,399.99,True
6,6.0,Monitor,,,True


#### What method would you use to remove any row that has at least one null value in target_df?

In [13]:
temp = target_df.copy()

In [14]:
temp

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
0,1.0,Laptop,20.0,999.99,True
1,2.0,Phone,15.0,799.99,False
2,3.0,TV,,399.99,True
3,4.0,Headphones,30.0,,False
4,5.0,,25.0,59.99,
5,,Microwave,12.0,99.99,True
6,6.0,Monitor,,,True


In [15]:
temp = temp.dropna(axis=0)

In [16]:
temp

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
0,1.0,Laptop,20.0,999.99,True
1,2.0,Phone,15.0,799.99,False


#### If you only want to drop rows where both 'Stock' and 'Price' have null values, how would you go about doing this in target_df?

In [17]:
temp = target_df.copy()

In [18]:
temp

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
0,1.0,Laptop,20.0,999.99,True
1,2.0,Phone,15.0,799.99,False
2,3.0,TV,,399.99,True
3,4.0,Headphones,30.0,,False
4,5.0,,25.0,59.99,
5,,Microwave,12.0,99.99,True
6,6.0,Monitor,,,True


In [19]:
temp.dropna(subset=['Stock', 'Price'])

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
0,1.0,Laptop,20.0,999.99,True
1,2.0,Phone,15.0,799.99,False
4,5.0,,25.0,59.99,
5,,Microwave,12.0,99.99,True


### Filling

#### How would you fill null values in the 'Stock' column with the median value of that column?

In [20]:
temp

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
0,1.0,Laptop,20.0,999.99,True
1,2.0,Phone,15.0,799.99,False
2,3.0,TV,,399.99,True
3,4.0,Headphones,30.0,,False
4,5.0,,25.0,59.99,
5,,Microwave,12.0,99.99,True
6,6.0,Monitor,,,True


In [21]:
stock_median = np.median(temp.loc[temp['Stock'].notna(), ["Stock"]])

In [22]:
stock_median

20.0

In [23]:
temp.loc[temp['Stock'].isna(), ["Stock"]] = stock_median

In [24]:
temp

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
0,1.0,Laptop,20.0,999.99,True
1,2.0,Phone,15.0,799.99,False
2,3.0,TV,20.0,399.99,True
3,4.0,Headphones,30.0,,False
4,5.0,,25.0,59.99,
5,,Microwave,12.0,99.99,True
6,6.0,Monitor,20.0,,True


#### Can you use the .fillna() method to fill null values in multiple columns ('Stock' and 'Price') at once? If so, how?

In [25]:
price_median = np.median(target_df.loc[target_df['Price'].notna(), ["Price"]])

In [26]:
price_median

399.99

In [27]:
replace = {'Stock' : stock_median, 'Price': price_median}
temp = target_df.fillna(replace)

In [28]:
temp

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
0,1.0,Laptop,20.0,999.99,True
1,2.0,Phone,15.0,799.99,False
2,3.0,TV,20.0,399.99,True
3,4.0,Headphones,30.0,399.99,False
4,5.0,,25.0,59.99,
5,,Microwave,12.0,99.99,True
6,6.0,Monitor,20.0,399.99,True


#### If you only want to fill the first null value in each column with a zero, how would you do it?

In [29]:
target_df

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
0,1.0,Laptop,20.0,999.99,True
1,2.0,Phone,15.0,799.99,False
2,3.0,TV,,399.99,True
3,4.0,Headphones,30.0,,False
4,5.0,,25.0,59.99,
5,,Microwave,12.0,99.99,True
6,6.0,Monitor,,,True


In [30]:
temp=target_df.fillna(value=0, limit=1)

In [31]:
temp

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
0,1.0,Laptop,20.0,999.99,True
1,2.0,Phone,15.0,799.99,False
2,3.0,TV,0.0,399.99,True
3,4.0,Headphones,30.0,0.0,False
4,5.0,0,25.0,59.99,0
5,0.0,Microwave,12.0,99.99,True
6,6.0,Monitor,,,True


### Counting

#### How would you count the number of null values in the entire DataFrame?

In [32]:
target_df.notna().values.sum()

28

#### Is there a way to get a count of non-null values for each column in the DataFrame?

In [33]:
target_df.notna().sum()

Product_ID      6
Product_Name    6
Stock           5
Price           5
Discounted      6
dtype: int64

#### How would you count null values in a specific row, say the row at index 2?

In [34]:
target_df.notna().sum(axis=1)[2]

4

#### Replacing

#### If you wanted to replace all null values in the DataFrame with the string "Unknown", how would you do it?

In [35]:
target_df.fillna("Unknown")

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
0,1.0,Laptop,20.0,999.99,True
1,2.0,Phone,15.0,799.99,False
2,3.0,TV,Unknown,399.99,True
3,4.0,Headphones,30.0,Unknown,False
4,5.0,Unknown,25.0,59.99,Unknown
5,Unknown,Microwave,12.0,99.99,True
6,6.0,Monitor,Unknown,Unknown,True


#### Can you replace null values in one column ('Stock') with zero and in another column ('Discounted') with False, all in a single line of code?

In [36]:
fill_map = {'Stock': 0, 'Discounted': False}
target_df.fillna(fill_map)

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
0,1.0,Laptop,20.0,999.99,True
1,2.0,Phone,15.0,799.99,False
2,3.0,TV,0.0,399.99,True
3,4.0,Headphones,30.0,,False
4,5.0,,25.0,59.99,False
5,,Microwave,12.0,99.99,True
6,6.0,Monitor,0.0,,True


#### How would you replace all instances of a specific value, let's say 999.99 in the 'Price' column, with np.nan?

In [37]:
target_df.replace(999.99, np.nan)

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
0,1.0,Laptop,20.0,,True
1,2.0,Phone,15.0,799.99,False
2,3.0,TV,,399.99,True
3,4.0,Headphones,30.0,,False
4,5.0,,25.0,59.99,
5,,Microwave,12.0,99.99,True
6,6.0,Monitor,,,True


### Operations

#### How would you calculate the sum of each column in target_df, considering that some columns might have null values?

In [38]:
target_df.sum(skipna=True)

Product_ID       21.0
Stock           102.0
Price         2359.95
Discounted          4
dtype: object

#### If you have null values in the 'Stock' and 'Price' columns, and you attempt to multiply these columns element-wise, what will happen to the resulting product where null values are involved?

In [39]:
# The result will be Null value even if one operand is not null.

In [40]:
target_df

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
0,1.0,Laptop,20.0,999.99,True
1,2.0,Phone,15.0,799.99,False
2,3.0,TV,,399.99,True
3,4.0,Headphones,30.0,,False
4,5.0,,25.0,59.99,
5,,Microwave,12.0,99.99,True
6,6.0,Monitor,,,True


In [41]:
target_df['Stock'] * target_df['Price']

0    19999.80
1    11999.85
2         NaN
3         NaN
4     1499.75
5     1199.88
6         NaN
dtype: float64

#### How would you use the dropna method to perform an operation, say calculating the mean, only on non-null elements in the 'Stock' column?

In [42]:
# By default the mean is calculated only on non null values

In [43]:
target_df['Stock'].mean()

20.4

In [44]:
target_df.dropna(subset=['Stock']).loc[:, 'Stock'].mean()

20.4

### Propagating

#### How would you propagate the last valid observation in the 'Stock' column to fill NaN values?

In [45]:
target_df['Stock']

0    20.0
1    15.0
2     NaN
3    30.0
4    25.0
5    12.0
6     NaN
Name: Stock, dtype: float64

In [46]:
target_df['Stock'].ffill()

0    20.0
1    15.0
2    15.0
3    30.0
4    25.0
5    12.0
6    12.0
Name: Stock, dtype: float64

#### Can you propagate the next valid observation backward to fill NaN values in the 'Price' column? If so, how would you do it?

In [47]:
target_df['Price']

0    999.99
1    799.99
2    399.99
3       NaN
4     59.99
5     99.99
6       NaN
Name: Price, dtype: float64

In [48]:
target_df['Price'].bfill()

0    999.99
1    799.99
2    399.99
3     59.99
4     59.99
5     99.99
6       NaN
Name: Price, dtype: float64

#### How would you limit the number of NaN values filled by forward fill (or backward fill) to 1 in the 'Stock' column?

In [49]:
target_df['Stock']

0    20.0
1    15.0
2     NaN
3    30.0
4    25.0
5    12.0
6     NaN
Name: Stock, dtype: float64

In [51]:
target_df.loc[3, ['Stock']] = np.nan

In [52]:
target_df['Stock']

0    20.0
1    15.0
2     NaN
3     NaN
4    25.0
5    12.0
6     NaN
Name: Stock, dtype: float64

In [53]:
target_df.ffill(limit=1)

Unnamed: 0,Product_ID,Product_Name,Stock,Price,Discounted
0,1.0,Laptop,20.0,999.99,True
1,2.0,Phone,15.0,799.99,False
2,3.0,TV,15.0,399.99,True
3,4.0,Headphones,,399.99,False
4,5.0,Headphones,25.0,59.99,False
5,5.0,Microwave,12.0,99.99,True
6,6.0,Monitor,12.0,99.99,True
