# Import software libraries and load the dataset

In [1]:
import sys
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sb

# Summarize software libraries used.
print('Libraries used in this project:')
print('- Python {}'.format(sys.version))
print('- NumPy {}'.format(np.__version__))
print('- pandas {}'.format(pd.__version__))
print('- Matplotlib {}'.format(mpl.__version__))
print('- Seaborn {}'.format(sb.__version__))

# Load the dataset.
stores_df = pd.read_csv('../data/stores_data_full_clean.csv',
                        index_col = 0)
stores_df['Date'] = pd.to_datetime(stores_df['Date'])
print('\nLoaded dataset.')

ModuleNotFoundError: No module named 'seaborn'

# Generate scatter plots using different Seaborn styles

In [None]:
kwargs = {'x': 'UnitPrice', 'y': 'GrossIncome',
          'data': stores_df, 'palette': 'Blues'}

fig = plt.figure(figsize = (16, 4))

with sb.axes_style('white'):
    ax = fig.add_subplot(1, 3, 1)
    sb.scatterplot(hue = 'COGS', ax = ax, **kwargs)
    
with sb.axes_style('dark'):
    ax = fig.add_subplot(1, 3, 2)
    sb.scatterplot(hue = 'Quantity', ax = ax, **kwargs)
    
with sb.axes_style('ticks'):
    ax = fig.add_subplot(1, 3, 3)
    sb.scatterplot(hue = 'CustomerRating', ax = ax, **kwargs)

# Generate a compound distribution plot with a customized style

In [None]:
sb.set_style('white', {'xtick.bottom': True})

fig, ax = plt.subplots(figsize = (12, 10))
sb.distplot(a = stores_df['Revenue'], bins = 20, rug = True)
ax.set_title('Revenue Distribution', size = 16)
ax.set_xlabel('Revenue (USD)', size = 14)

sb.despine()

# Generate a bar chart using a Seaborn color palette

In [None]:
sb.set()  # Reset style to default.

with sb.color_palette('colorblind'):
    fig, ax = plt.subplots(figsize = (12, 10))
    sb.barplot(x = 'ProductLine', y = 'Quantity', hue = 'Branch',
               data = stores_df, estimator = np.sum)
    ax.set_title('Quantity Sold for Each Product Line', size = 16)

# Generate a swarm plot using a style and color palette

In [None]:
with sb.axes_style('whitegrid'), sb.color_palette('deep'):
    fig, ax = plt.subplots(figsize = (12, 10))
    sb.swarmplot(x = 'ProductLine', y = 'GrossIncome', data = stores_df)
    ax.set_title('Distribution of Product Line Income', size = 16)

# Generate a heat map showing correlations between numeric variables

In [None]:
fig, ax = plt.subplots(figsize = (12, 10))
sb.heatmap(stores_df.corr(), annot = True)

# Generate a linear regression plot for revenue and gross income

In [None]:
fig, ax = plt.subplots(figsize = (12, 10))
sb.regplot(x = 'Revenue', y = 'GrossIncome', data = stores_df);

# Generate a `FacetGrid` of quantity sold per product line, categorized by branch and customer type

In [None]:
plot = sb.catplot(x = 'ProductLine', y = 'Quantity',
                  row = 'Branch', col = 'CustomerType', data = stores_df,
                  kind = 'bar', estimator = np.sum, height = 8)
plot.set_xticklabels(stores_df['ProductLine'].unique(), rotation = -30);

# Generate a `JointGrid` showing the distributions of revenue and income

In [None]:
sb.jointplot(x = 'Revenue', y = 'GrossIncome', data = stores_df,
             kind = 'scatter', height = 10);