# Task - 4 Exploratory Data Analysis - Retail

Importing Dependencies

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

Importing the dataset

In [None]:
dataset = pd.read_csv("SampleSuperstore.csv")
dataset.head()

In [None]:
sns.heatmap(dataset.isnull(), cbar = False, cmap = 'viridis')

From the above heatmap it is clear that the dataset does not contain null values

# Exploratory Data Analysis (EDA)

# Correlation Heatmap 


In [None]:
sns.heatmap(dataset.corr(), cmap = 'viridis', annot = True)

# Sales vs Profit 

In [None]:
plt.figure(figsize = (15, 6))
sns.kdeplot(dataset['Sales'], color = 'Teal', label = 'Sales', shade = True, bw = 25)
sns.kdeplot(dataset['Profit'], color = 'Cornflowerblue', label = 'Profit', shade = True, bw = 25)
plt.xlim([0, 13000])
plt.ylim([0, 0.00007])
plt.ylabel('Density')
plt.xlabel('Monetary Value in USD$')
plt.title('Sales vs Profit', fontsize = 20)
plt.legend(loc = 'upper right', frameon = False) 
plt.show()

# Count on each Feature

In [None]:
fig, axs = plt.subplots(nrows = 2, ncols = 2, figsize=(15, 6))
sns.countplot(dataset['Category'], ax = axs[0][0], palette = 'PuBu')
sns.countplot(dataset['Segment'], ax = axs[0][1], palette = 'PuBu')
sns.countplot(dataset['Ship Mode'], ax = axs[1][0], palette = 'PuBu')
sns.countplot(dataset['Region'], ax = axs[1][1], palette = 'PuBu')
axs[0][0].set_title('Category', fontsize = 20)
axs[0][1].set_title('Segment', fontsize = 20)
axs[1][0].set_title('Ship Mode', fontsize = 20)
axs[1][1].set_title('Region', fontsize = 20)
plt.tight_layout()

# Total State-Wise Dealings

In [None]:
df_state_dealings = dataset.groupby('State')['Quantity'].count().sort_values(ascending = False).plot.bar(figsize = (15, 6), color = 'Cornflowerblue')
plt.ylabel('Total Number of Dealings')
plt.xlabel('American States')
plt.title('Total State-Wise Dealings', fontsize = 20)
plt.show()

# Total State-Wise Sale

In [None]:
df_state_sales = dataset.groupby('State')['Sales'].sum().sort_values(ascending = False).plot.bar(figsize = (15, 6), color = 'Cornflowerblue')
plt.ylabel('Total Sales in USD$')
plt.xlabel('American States')
plt.title('Total State-Wise Sales', fontsize = 20)
plt.show()

# Total State-wise Profit and Loss

In [None]:
df_state_profit = dataset.groupby('State')['Profit'].sum().sort_values(ascending = False).plot.bar(figsize = (15, 6), color = 'Cornflowerblue')
plt.ylabel('Total Profit/Loss in USD$')
plt.xlabel('American States')
plt.title('Total State-Wise Profit/Loss', fontsize = 20)
plt.show()

# Profit of product categories and sub-categories for the chosen states

In [None]:
def state_data_viewer(states):
    product_data = dataset.groupby(['State'])
    for state in states:
        data = product_data.get_group(state).groupby(['Category'])
        fig, ax = plt.subplots(1, 3, figsize = (30, 5))
        fig.suptitle(state, fontsize = 16)        
        ax_index = 0
        for category in ['Furniture', 'Office Supplies', 'Technology']:
            category_data = data.get_group(category).groupby(['Sub-Category']).mean()
            sns.barplot(x = category_data.Profit, y = category_data.index, 
                        ax = ax[ax_index], palette = 'Blues_d')
            ax[ax_index].set_ylabel(category)
            ax_index += 1
states = ['California', 'Washington', 'Mississippi', 'Arizona', 'Illinois', 'Texas']
state_data_viewer(states)