# Retail Store Inventory
### Problem Statement:
    -Clean inventory data, impute missing unit prices and stock quantities, and generate KPIs like out-of-stock products and category-wise average prices.
    -Key Pandas/NumPy Usage: .interpolate(), .corr(), .merge(), .fillna(method='bfill'), .round(), np.round()

In [15]:
import pandas as pd
import numpy as np

In [16]:
df = pd.read_csv('retail_inventory.csv')
df = df.copy()
df

Unnamed: 0,Item_ID,Item_Name,Category,Stock_Quantity,Unit_Price,Last_Stocked_Date
0,101,Table,Apparel,10.0,500.0,2023-11-01
1,102,Sofa,Electronics,10.0,,2023-11-04
2,103,Table,Electronics,,1500.0,2023-11-07
3,104,Shirt,Furniture,30.0,2000.0,2023-11-10
4,105,Sofa,Apparel,,1500.0,2023-11-13
5,106,Shirt,Furniture,,500.0,2023-11-16
6,107,Jeans,,40.0,,2023-11-19
7,108,TV,Cosmetics,40.0,1500.0,2023-11-22
8,109,Shirt,Furniture,20.0,2000.0,2023-11-25
9,110,TV,,,1000.0,2023-11-28


In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Item_ID            50 non-null     int64  
 1   Item_Name          50 non-null     object 
 2   Category           40 non-null     object 
 3   Stock_Quantity     43 non-null     float64
 4   Unit_Price         42 non-null     float64
 5   Last_Stocked_Date  50 non-null     object 
dtypes: float64(2), int64(1), object(3)
memory usage: 2.5+ KB


In [18]:
df.isnull().sum()

Item_ID               0
Item_Name             0
Category             10
Stock_Quantity        7
Unit_Price            8
Last_Stocked_Date     0
dtype: int64

In [19]:
df['Category'] = df['Category'].ffill()

In [20]:
df

Unnamed: 0,Item_ID,Item_Name,Category,Stock_Quantity,Unit_Price,Last_Stocked_Date
0,101,Table,Apparel,10.0,500.0,2023-11-01
1,102,Sofa,Electronics,10.0,,2023-11-04
2,103,Table,Electronics,,1500.0,2023-11-07
3,104,Shirt,Furniture,30.0,2000.0,2023-11-10
4,105,Sofa,Apparel,,1500.0,2023-11-13
5,106,Shirt,Furniture,,500.0,2023-11-16
6,107,Jeans,Furniture,40.0,,2023-11-19
7,108,TV,Cosmetics,40.0,1500.0,2023-11-22
8,109,Shirt,Furniture,20.0,2000.0,2023-11-25
9,110,TV,Furniture,,1000.0,2023-11-28


In [21]:
df.isnull().sum()

Item_ID              0
Item_Name            0
Category             0
Stock_Quantity       7
Unit_Price           8
Last_Stocked_Date    0
dtype: int64

In [22]:
df.fillna({'Stock_Quantity':0}, inplace=True)

In [23]:
df

Unnamed: 0,Item_ID,Item_Name,Category,Stock_Quantity,Unit_Price,Last_Stocked_Date
0,101,Table,Apparel,10.0,500.0,2023-11-01
1,102,Sofa,Electronics,10.0,,2023-11-04
2,103,Table,Electronics,0.0,1500.0,2023-11-07
3,104,Shirt,Furniture,30.0,2000.0,2023-11-10
4,105,Sofa,Apparel,0.0,1500.0,2023-11-13
5,106,Shirt,Furniture,0.0,500.0,2023-11-16
6,107,Jeans,Furniture,40.0,,2023-11-19
7,108,TV,Cosmetics,40.0,1500.0,2023-11-22
8,109,Shirt,Furniture,20.0,2000.0,2023-11-25
9,110,TV,Furniture,0.0,1000.0,2023-11-28


In [24]:
df["Stock_Quantity"] = df["Stock_Quantity"].astype(int)

In [25]:
df

Unnamed: 0,Item_ID,Item_Name,Category,Stock_Quantity,Unit_Price,Last_Stocked_Date
0,101,Table,Apparel,10,500.0,2023-11-01
1,102,Sofa,Electronics,10,,2023-11-04
2,103,Table,Electronics,0,1500.0,2023-11-07
3,104,Shirt,Furniture,30,2000.0,2023-11-10
4,105,Sofa,Apparel,0,1500.0,2023-11-13
5,106,Shirt,Furniture,0,500.0,2023-11-16
6,107,Jeans,Furniture,40,,2023-11-19
7,108,TV,Cosmetics,40,1500.0,2023-11-22
8,109,Shirt,Furniture,20,2000.0,2023-11-25
9,110,TV,Furniture,0,1000.0,2023-11-28


In [26]:
df.fillna({'Unit_Price':df['Unit_Price'].mean().round()}, inplace=True)

In [27]:
df

Unnamed: 0,Item_ID,Item_Name,Category,Stock_Quantity,Unit_Price,Last_Stocked_Date
0,101,Table,Apparel,10,500.0,2023-11-01
1,102,Sofa,Electronics,10,1274.0,2023-11-04
2,103,Table,Electronics,0,1500.0,2023-11-07
3,104,Shirt,Furniture,30,2000.0,2023-11-10
4,105,Sofa,Apparel,0,1500.0,2023-11-13
5,106,Shirt,Furniture,0,500.0,2023-11-16
6,107,Jeans,Furniture,40,1274.0,2023-11-19
7,108,TV,Cosmetics,40,1500.0,2023-11-22
8,109,Shirt,Furniture,20,2000.0,2023-11-25
9,110,TV,Furniture,0,1000.0,2023-11-28


In [28]:
df.isnull().sum()

Item_ID              0
Item_Name            0
Category             0
Stock_Quantity       0
Unit_Price           0
Last_Stocked_Date    0
dtype: int64

In [30]:
df.to_csv('cleaned_retail_data.csv', index=False)