### Exploratory Data Analysis

In [2]:
import pandas as pd

# Load your data into a DataFrame
df = pd.read_csv('ecom-data.csv')

# Check for missing data
missing_data = df.isnull().sum()
print(missing_data)

Order_Date             0
Time                   0
Aging                  1
Customer_Id            0
Gender                 0
Device_Type            0
Customer_Login_type    0
Product_Category       0
Product                0
Sales                  1
Quantity               2
Discount               1
Profit                 0
Shipping_Cost          1
Order_Priority         2
Payment_method         0
dtype: int64


#### Checking and filling missing values

In [3]:
# Fill missing values with the mean value for each numerical column
numerical_columns = df.select_dtypes(include='number')  # Select numerical columns
df[numerical_columns.columns] = numerical_columns.fillna(numerical_columns.mean())

# Replace missing values in the "Order_Priority" column with "Medium"
df['Order_Priority'].fillna('Medium', inplace=True)

# Check for missing data
missing_data = df.isnull().sum()
print(missing_data)

Order_Date             0
Time                   0
Aging                  0
Customer_Id            0
Gender                 0
Device_Type            0
Customer_Login_type    0
Product_Category       0
Product                0
Sales                  0
Quantity               0
Discount               0
Profit                 0
Shipping_Cost          0
Order_Priority         0
Payment_method         0
dtype: int64


#### Summary Statistics

In [4]:
summary_stats = df.describe()
print(summary_stats)

              Aging   Customer_Id         Sales      Quantity      Discount  \
count  51290.000000  51290.000000  51290.000000  51290.000000  51290.000000   
mean       5.255035  58155.758764    152.340872      2.502983      0.303821   
std        2.959920  26032.215826     66.494771      1.511829      0.131025   
min        1.000000  10000.000000     33.000000      1.000000      0.100000   
25%        3.000000  35831.250000     85.000000      1.000000      0.200000   
50%        5.000000  61018.000000    136.500000      2.000000      0.300000   
75%        8.000000  80736.250000    218.000000      4.000000      0.400000   
max       10.500000  99999.000000    250.000000      5.000000      0.500000   

             Profit  Shipping_Cost  
count  51290.000000   51290.000000  
mean      70.407226       7.041557  
std       48.729488       4.871697  
min        0.500000       0.100000  
25%       24.900000       2.500000  
50%       59.900000       6.000000  
75%      118.400000      11.8

#### Checking outliers

In [5]:
from scipy import stats

z_scores = stats.zscore(df['Profit'])
outliers = df[(z_scores > 3) | (z_scores < -3)]
print(outliers)

Empty DataFrame
Columns: [Order_Date, Time, Aging, Customer_Id, Gender, Device_Type, Customer_Login_type, Product_Category, Product, Sales, Quantity, Discount, Profit, Shipping_Cost, Order_Priority, Payment_method]
Index: []


#### Exploring Categorical Variables

In [6]:
gender_distribution = df['Gender'].value_counts()
print(gender_distribution)

Gender
Male      28138
Female    23152
Name: count, dtype: int64


In [7]:
product_categories = df["Product_Category"].value_counts()
print(product_categories)

Product_Category
Fashion               25646
Home & Furniture      15438
Auto & Accessories     7505
Electronic             2701
Name: count, dtype: int64


In [8]:
payment_methods = df["Payment_method"].value_counts()
print(payment_methods)

Payment_method
credit_card    38137
money_order     9629
e_wallet        2789
debit_card       734
not_defined        1
Name: count, dtype: int64


In [9]:
login_type = df["Customer_Login_type"].value_counts()
print(login_type)

Customer_Login_type
Member          49097
Guest            1993
First SignUp      173
New                27
Name: count, dtype: int64


In [10]:
best_selling_products = df.groupby('Product')['Sales'].sum().nlargest(10).reset_index()
best_selling_products

Unnamed: 0,Product,Sales
0,T - Shirts,578336.0
1,Titak watch,531468.0
2,Running Shoes,522144.0
3,Jeans,508376.0
4,Formal Shoes,496503.0
5,Shirts,457072.0
6,Fossil Watch,370788.0
7,Towels,351348.0
8,Sofa Covers,332424.0
9,Bed Sheets,325151.0
