In [9]:
import pandas as pd
from sklearn.impute import KNNImputer

In [10]:
# Load the Retail Sales dataset
retail_sales = pd.read_csv('retail_sales/retail_sales_dataset.csv')

In [11]:
#Display columns
print("Retail Sales Dataset Columns:")
print(retail_sales.columns)

Retail Sales Dataset Columns:
Index(['Transaction ID', 'Date', 'Customer ID', 'Gender', 'Age',
       'Product Category', 'Quantity', 'Price per Unit', 'Total Amount'],
      dtype='object')


In [12]:
# Identify non-numerical columns
non_numerical_columns = retail_sales.select_dtypes(include=['object']).columns
numerical_columns = retail_sales.select_dtypes(exclude=['object']).columns


In [13]:
print("\nNon-Numerical Columns:")
print(non_numerical_columns)

print("\nNumerical Columns:")
print(numerical_columns)


Non-Numerical Columns:
Index(['Date', 'Customer ID', 'Gender', 'Product Category'], dtype='object')

Numerical Columns:
Index(['Transaction ID', 'Age', 'Quantity', 'Price per Unit', 'Total Amount'], dtype='object')


In [14]:
# Handle non-numerical columns (for this example, we'll drop them, but you may encode or handle them as needed)
retail_sales_numerical = retail_sales[numerical_columns]


In [15]:
# Apply KNN Imputer
knn_imputer = KNNImputer(n_neighbors=5)
retail_sales_imputed = pd.DataFrame(knn_imputer.fit_transform(retail_sales_numerical), columns=numerical_columns)


In [16]:
# Combine with non-numerical data
retail_sales_combined = pd.concat([retail_sales[non_numerical_columns].reset_index(drop=True), retail_sales_imputed], axis=1)

In [17]:
# Display the first few rows of the imputed dataset
print("\nRetail Sales Dataset after KNN Imputation:")
print(retail_sales_combined.head())


Retail Sales Dataset after KNN Imputation:
         Date Customer ID  Gender Product Category  Transaction ID   Age  \
0  2023-11-24     CUST001    Male           Beauty             1.0  34.0   
1  2023-02-27     CUST002  Female         Clothing             2.0  26.0   
2  2023-01-13     CUST003    Male      Electronics             3.0  50.0   
3  2023-05-21     CUST004    Male         Clothing             4.0  37.0   
4  2023-05-06     CUST005    Male           Beauty             5.0  30.0   

   Quantity  Price per Unit  Total Amount  
0       3.0            50.0         150.0  
1       2.0           500.0        1000.0  
2       1.0            30.0          30.0  
3       1.0           500.0         500.0  
4       2.0            50.0         100.0  
