In [None]:
#In this notebook, we are going to visualize Hidden Patterns by using Plotly.

# Contents
### 1. Setup
### 2. Exploring Dataset
### 3. Auto Viz Report
### 4. Visualizing Patterns using Plotly
* #### Histogram 
* #### Box Plot  
* #### Bar Plot
* #### Grouped Bar Plot
* #### Scatter Plot
* #### Line Plot

In [None]:
#Installing Packages
!pip3 install xlrd
!pip install autoviz

In [None]:
# Importing Liabraries
import pandas as pd
from pandas_profiling import ProfileReport
from autoviz.AutoViz_Class import AutoViz_Class

from plotly.offline import init_notebook_mode, iplot, plot
import plotly as py
import plotly.express as px
init_notebook_mode(connected=True)
import plotly.graph_objs as go
from wordcloud import WordCloud

#Plotly Template Settings
template_style = "plotly_white"

In [None]:
#Loading Dataset
df = pd.read_csv('../input/us-ecommerce-record-2020/US  E-commerce records 2020.csv')
df.head()

# 2. Exploring Dataset

In [None]:
# Basic Info about DataFrame
df.info()

In [None]:
# Describe Method
df.describe()

In [None]:
# Get a view of unique values in column, e.g. 'Ship Mode'
df['Ship Mode'].unique()

In [None]:
# NaN count for each column
df.isnull().sum()

## Q.1 What was the highest Sales in 2020.?

In [None]:
df.nlargest(3, 'Sales')

In [None]:
# Highest Sale
df.iloc[df['Sales'].argmax()]

## Q.2 What is average Discount of charis.?

In [None]:
# Create Boolean mask
mask = df['Sub-Category'] == 'Chairs'

# Use Boolean mask to filter dataframe
df[mask]['Discount'].mean()

## Q.3 What is Total Profit & Sales by Sub-Category.?

In [None]:
df['Profit Margin'] = df['Profit'] / df['Sales']

# Group By Sub-Category [SUM]
df_by_sub_category = df.groupby('Sub-Category').sum()

# Reset Index
df_by_sub_category.reset_index(inplace=True)

# Print out Head
df_by_sub_category.head()

# 3. Auto Viz Report

In [None]:
AV = AutoViz_Class()
df_autoviz = AV.AutoViz('../input/us-ecommerce-record-2020/US  E-commerce records 2020.csv')

# 4. Visualizing Patterns using Plotly

### **Histogram**

In [None]:
# Quick Stats Overview for Sales
df['Sales'].describe()

fig = px.histogram(df,
                  x="Sales",
                  template=template_style)
# Plot Chart
fig.show()

### **Box Plot**

In [None]:
# Show the distribution and skewness of Sales
fig = px.box(df,
             y="Sales",
             range_y=[0,1000],
             template=template_style)
# Plot Chart
fig.show()

### **Bar Plot (Sales by Sub-Category)**

In [None]:
# Groupby as a function
def grouped_data(column_name):
    '''
    Groupby column and return DataFrame
    Input: Column Name
    '''
    df_tmp = df.groupby(column_name).sum()
    df_tmp.reset_index(inplace=True)
    return df_tmp
    
# Group DataFrame by Segment
grouped_data('Segment')

# Plot Sales by Sub-Category
data = grouped_data('Sub-Category')
data.head()

# Create Chart
fig = px.bar(data,
             x='Sub-Category',
             y='Sales',
             title='<b>Sales by Sub Category</b>',
             template = template_style)

# Display Plot
fig.show()

### **Bar Plot (Profit by Sub-Category)**

In [None]:
# Create Chart
fig = px.bar(data,
             x='Sub-Category',
             y='Profit',
             title='<b>Profit by Sub Category</b>',
             template = template_style)

# Display Plot
fig.show()

### **Bar Plot (Sales & Profit by Sub-Category)**

In [None]:
# Create Chart
fig = px.bar(data,
             x='Sub-Category',
             y='Sales', color='Profit',
             color_continuous_scale=["red","yellow","green"],
             template = template_style,
             title = '<b>Sales & Profit by Sub Category</b>')

# Display Plot
fig.show()

### **Scatter Plot**

In [None]:
# Inspect Negative Profit of Tables
fig = px.scatter(df,
                 x='Sales',
                 y='Profit',
                 color='Discount',
                 template = template_style,
                 title = '<b>Scatterplot Sales/Profit</b>')

# Display Plot
fig.show()

 ### **Bar Plot (Mean Discount by Sub Category)**

In [None]:
# Create new dataframe: Group by 'Sub-Category' and aggregate the mean of 'Discount'
df_discount = df.groupby('Sub-Category').agg({'Discount':'mean',
                                              'Profit':'sum'})

# Display first 5 rows of new dataframe
df_discount.head()

# Create Chart
fig = px.bar(df_discount,
             x=df_discount.index,
             y='Discount',
             color='Profit',
             color_continuous_scale=['red','yellow','green'],
             template = template_style,
             title = '<b>Mean Discount by Sub Category</b>')

# Display Plot
fig.show()

### **Line Plot (Sales/Profit Development)**

In [None]:
# Sort Values by Order Date
df_sorted = df.sort_values(by=['Order Date'])

# Add cummulative Sales & Profit
df_sorted['cummulative_sales'] = df_sorted['Sales'].cumsum()
df_sorted['cummulative_profit'] = df_sorted['Profit'].cumsum()

# Print tail & head of sorted dataframe
df_sorted.tail(3)

# Create Chart
fig = px.line(df_sorted,
              x='Order Date',
              y=['cummulative_sales','cummulative_profit'],
              template = template_style,
              title= '<b>Sales/Profit Development</b>')

# Display Plot
fig.show()

In [None]:
#If you Like this notebook,don't forget to Upvote.