In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as ps
import numpy as ny
import plotly.express as plex
import matplotlib.pyplot as plt
import seaborn as sn
from pandas_profiling import ProfileReport

# reading the dataset(csv file)


sales_dataset = ps.read_csv('/kaggle/input/sales-store-product-details/Salesstore.csv')
sales_dataset.head(10)

In [None]:
# Primary checks
sales_dataset.describe()

In [None]:
# Generating a profile report for better and quicker reading.
pandas_profile_key = ProfileReport(sales_dataset,title="SSPD Report")
pandas_profile_key.to_notebook_iframe()

In [None]:
## How many null values exist ?

sales_dataset.isnull().sum() # Great !

In [None]:
# Top sales
sales_dataset.nlargest(10, 'Sales') 

# Notice how amount of sales does not nessacarily convert to profit. 
# Corporate,Regular Air,Copiers &Fax, Large box and Technology lead in top sales.

**Dropping columns that may hold least correlation and least relevance for our visualization excerise.**

In [None]:
clean_sspd_dataset = sales_dataset.drop(columns=['Order_ID','Customer_Name','Product_Name'])
clean_sspd_dataset.head(10)

In [None]:
# Checking unique values now.
clean_sspd_dataset['Region'].unique()

In [None]:
clean_sspd_dataset['Product_Container'].unique()

In [None]:
clean_sspd_dataset['Order_Priority'].unique()

# So far unique values don't need data cleanup. Moving on...

**Using Plotly for data visulization**

In [None]:
# How are the sales numbers skewed 
fig00 = plex.box(clean_sspd_dataset,y="Sales",range_y=[0,30000],title="Sales skew box plot")
fig00.show()

In [None]:
# Sales vs Region
fig = plex.bar(clean_sspd_dataset, x='Sales', y='Region',color="Region", title="Sales vs Region")
fig.show()

In [None]:
# Sales vs Segment
fig2 = plex.bar(clean_sspd_dataset, x='Sales', y='Customer_Segment',color='Customer_Segment',title="Sales vs Segment")
fig2.show()

In [None]:
# Sales vs Product Category
fig3 = plex.bar(clean_sspd_dataset, x='Sales', y='Product_Category',color='Product_Category',title="Sales Vs Product_Category")
fig3.show()

In [None]:
# Sales vs Order Priority
fig4 = plex.bar(clean_sspd_dataset, x='Sales', y='Order_Priority',color='Order_Priority',title="Sales vs Order_Priority")
fig4.show()

In [None]:
# More plots could be done like above. Now onto profit and loss numbers.

fig5 = plex.bar(clean_sspd_dataset,x='Product_Sub-Category',y='Sales', color='Profit',title = '<b>Sales & Profit by Sub Category</b>')
fig5.show()

In [None]:
# Profits region wise.

fig6 = plex.bar(clean_sspd_dataset,x='Profit',y='Region', color='Profit',title = '<b>Sales & Profit by Sub Category</b>')
fig6.show()

In [None]:
# Sales vs Region
fig61 = plex.bar(clean_sspd_dataset, x='Sales', y='Ship_Mode',color="Ship_Mode", title="Sales vs Ship_Mode")
fig61.show()

In [None]:
# Pairplot 

plt.figure(figsize=(18,15))
sn.pairplot(clean_sspd_dataset,hue="Region")

In [None]:
# Which states make for losses ?

losses_df = clean_sspd_dataset.loc[clean_sspd_dataset['Profit']<=0]
# Converting -Profit as +Loss percentages.
losses_df['Profit'] = losses_df['Profit'].abs()

# Same relationship with a Pie plot.
fig7 = plex.pie(losses_df,values='Profit',names='Region',title = '<b>Losses by region</b>',color_discrete_sequence=plex.colors.sequential.RdBu)
fig7.show()

# Loss making states( read all numbers as a % loss)

In [None]:
# Same relationship with a Pie plot.
fig7 = plex.pie(losses_df,values='Profit',names='Product_Category',title = '<b>Losses by product category</b>',color_discrete_sequence=plex.colors.sequential.RdBu)
fig7.show()