## Import Modules

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

## Loading Dataset

In [3]:
df = pd.read_csv('/kaggle/input/zara-sales-analysis/Zara_Sales_Analysis.csv', sep = ";")

## Exploratory Data Analysis

In [4]:
df.head() # view top 5 rows

Unnamed: 0,Product ID,Product Position,Promotion,Product Category,Seasonal,Sales Volume,brand,url,sku,name,description,price,currency,scraped_at,terms,section
0,185102,Aisle,No,Clothing,No,2823,Zara,https://www.zara.com/us/en/basic-puffer-jacket...,272145190-250-2,BASIC PUFFER JACKET,Puffer jacket made of tear-resistant ripstop f...,19.99,USD,2024-02-19T08:50:05.654618,jackets,MAN
1,188771,Aisle,No,Clothing,No,654,Zara,https://www.zara.com/us/en/tuxedo-jacket-p0889...,324052738-800-46,TUXEDO JACKET,Straight fit blazer. Pointed lapel collar and ...,169.0,USD,2024-02-19T08:50:06.590930,jackets,MAN
2,180176,End-cap,Yes,Clothing,Yes,2220,Zara,https://www.zara.com/us/en/slim-fit-suit-jacke...,335342680-800-44,SLIM FIT SUIT JACKET,Slim fit jacket. Notched lapel collar. Long sl...,129.0,USD,2024-02-19T08:50:07.301419,jackets,MAN
3,112917,Aisle,Yes,Clothing,Yes,1568,Zara,https://www.zara.com/us/en/stretch-suit-jacket...,328303236-420-44,STRETCH SUIT JACKET,Slim fit jacket made of viscose blend fabric. ...,129.0,USD,2024-02-19T08:50:07.882922,jackets,MAN
4,192936,End-cap,No,Clothing,Yes,2942,Zara,https://www.zara.com/us/en/double-faced-jacket...,312368260-800-2,DOUBLE FACED JACKET,Jacket made of faux leather faux shearling wit...,139.0,USD,2024-02-19T08:50:08.453847,jackets,MAN


In [5]:
df.tail() # view last 5 rows

Unnamed: 0,Product ID,Product Position,Promotion,Product Category,Seasonal,Sales Volume,brand,url,sku,name,description,price,currency,scraped_at,terms,section
247,159182,Front of Store,Yes,Clothing,No,1014,Zara,https://www.zara.com/us/en/basic-100-wool-swea...,321993245-500-2,FAUX LEATHER OVERSIZED JACKET LIMITED EDITION,Jacket made of technical fabric with padded in...,169.0,USD,2024-02-19T09:10:43.883037,jackets,MAN
248,199233,Aisle,Yes,Clothing,No,2222,Zara,https://www.zara.com/us/en/colorblock-knit-cro...,330590505-500-2,CONTRASTING PATCHES BOMBER JACKET,Oversized jacket. Notched lapel collar and lon...,159.0,USD,2024-02-19T09:10:44.463883,jackets,MAN
249,137044,Aisle,No,Clothing,Yes,2534,Zara,https://www.zara.com/us/en/hooded-technical-ja...,320680326-107-39,PATCH BOMBER JACKET,Varsity jacket with elastic collar and long sl...,12.99,USD,2024-02-19T09:10:45.009106,jackets,MAN
250,154736,Front of Store,Yes,Clothing,Yes,1466,Zara,https://www.zara.com/us/en/houndstooth-suit-ja...,311292244-800-39,CROPPED BOMBER JACKET LIMITED EDITION,Varsity jacket with padded interior. Rib elast...,19.9,USD,2024-02-19T09:10:45.631804,jackets,MAN
251,141434,Front of Store,No,Clothing,Yes,2870,Zara,https://www.zara.com/us/en/bomber-jacket-p0534...,311307610-400-39,FAUX LEATHER PUFFER JACKET,Jacket with lightly padded interior. Rib colla...,39.9,USD,2024-02-19T09:10:31.877972,jackets,MAN


In [6]:
# check for now of rows and columns
df.shape

(252, 16)

In [7]:
# summary of datatypes
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 252 entries, 0 to 251
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Product ID        252 non-null    int64  
 1   Product Position  252 non-null    object 
 2   Promotion         252 non-null    object 
 3   Product Category  252 non-null    object 
 4   Seasonal          252 non-null    object 
 5   Sales Volume      252 non-null    int64  
 6   brand             252 non-null    object 
 7   url               252 non-null    object 
 8   sku               252 non-null    object 
 9   name              251 non-null    object 
 10  description       250 non-null    object 
 11  price             252 non-null    float64
 12  currency          252 non-null    object 
 13  scraped_at        252 non-null    object 
 14  terms             252 non-null    object 
 15  section           252 non-null    object 
dtypes: float64(1), int64(2), object(13)
memory u

In [8]:
# statistical summary of numerical values
df.describe()

Unnamed: 0,Product ID,Sales Volume,price
count,252.0,252.0,252.0
mean,153370.503968,1823.702381,86.25254
std,26160.444549,697.703748,52.083205
min,110075.0,529.0,7.99
25%,131053.75,1243.0,49.9
50%,151681.5,1839.5,79.9
75%,175669.75,2398.75,109.0
max,199631.0,2989.0,439.0


In [9]:
# check for missing values
df.isnull().sum()

Product ID          0
Product Position    0
Promotion           0
Product Category    0
Seasonal            0
Sales Volume        0
brand               0
url                 0
sku                 0
name                1
description         2
price               0
currency            0
scraped_at          0
terms               0
section             0
dtype: int64

In [10]:
# handeling missing values
df = df.dropna(subset=['name', 'description'])

In [11]:
df[['name', 'description']].isnull().sum()

name           0
description    0
dtype: int64

In [12]:
# check for unique values in each column
df.nunique()

Product ID          250
Product Position      3
Promotion             2
Product Category      1
Seasonal              2
Sales Volume        233
brand                 1
url                 226
sku                 226
name                193
description         221
price                28
currency              1
scraped_at          227
terms                 5
section               2
dtype: int64

In [13]:
df.section.unique()

array(['MAN', 'WOMAN'], dtype=object)

## Data Visualization

In [14]:
!pip install plotly



In [15]:
# visualization Sales Volume with Gender
fig = px.histogram(
    x = 'section',
    y = 'Sales Volume',
    data_frame = df,
    title = 'Sales Volume Comparision by Gender',
    color = 'section',
    color_discrete_sequence = px.colors.qualitative.Prism,
    template = 'plotly_dark',
    labels = {
        'section': 'Gender of Customer'}
)
fig.show()

In [16]:
# visualization Sales Volume with Product Position
fig = px.histogram(
    x = 'Product Position',
    y = 'Sales Volume',
    data_frame = df,
    title = 'Sales Volume Comparision by Product Position',
    color = 'Product Position',
    color_discrete_sequence = px.colors.qualitative.Prism,
    template = 'plotly_dark',
)
fig.show()

In [17]:
# visualization Sales Volume with Seasonal
fig = px.histogram(
    x = 'Seasonal',
    y = 'Sales Volume',
    data_frame = df,
    title = 'Sales Volume Comparision by Seasonal',
    color = 'Seasonal',
    color_discrete_sequence = px.colors.qualitative.Prism,
    template = 'plotly_dark',
)
fig.show()

In [18]:
# Distribution Sales Volume by Price
fig = px.histogram(
    x = 'price',
    y = 'Sales Volume',
    data_frame = df,
    title = 'Sales Volume Comparision by price',
    color = 'price',
    color_discrete_sequence = px.colors.qualitative.Prism,
    template = 'plotly_dark'
)
fig.show()

In [19]:
# Distribution Sales Volume by Category
fig = px.histogram(
    x = 'terms',
    y = 'Sales Volume',
    data_frame = df,
    title = 'Sales Volume Comparision by Category',
    color = 'terms',
    color_discrete_sequence = px.colors.qualitative.Prism,
    template = 'plotly_dark'
)
fig.show()

In [20]:
# Price comparision by Category
fig = px.histogram(
    x = 'terms',
    y = 'price',
    data_frame = df,
    title = 'Price Comparision by Category',
    color = 'terms',
    color_discrete_sequence = px.colors.qualitative.Prism,
    template = 'plotly_dark'
)
fig.show()

In [21]:
# visualization Sales Volume by Promotion
fig = px.histogram(
    x = 'Promotion',
    y = 'Sales Volume',
    data_frame = df,
    title = 'Sales Volume Comparision by Promotion',
    color = 'Promotion',
    color_discrete_sequence = px.colors.qualitative.Prism,
    template = 'plotly_dark',
)
fig.show()

In [22]:
# visualization Sales Volume by Name
fig = px.histogram(
    x = 'name',
    y = 'Sales Volume',
    data_frame = df,
    title = 'Sales Volume Comparision by name',
    color = 'name',
    color_discrete_sequence = px.colors.qualitative.Prism,
    template = 'plotly_dark',
)
fig.show()

In [23]:
# visualization Name by Price
fig = px.histogram(
    x = 'name',
    y = 'price',
    data_frame = df,
    title = 'Name by Price',
    color = 'name',
    color_discrete_sequence = px.colors.qualitative.Prism,
    template = 'plotly_dark',
)
fig.show()