# Sales Visualization

## Setup

In [None]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)
import pandas as pd

sys.path.append("..")

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "end_to_end_project"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)


# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")


Select the Imput File

In [None]:
input_file = "../data/sales.csv"

Create a DataFrame using an ifered schema 

In [None]:
df = pd.read_csv(input_file)

print(df.info())

## Plot the data

In [None]:
df

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

plt.figure(figsize=(15, 25))
plt.suptitle("Sales attributes boxplots", fontsize=18,y=1.00)
plt.subplots_adjust(hspace=0.5)

for n, col in enumerate(df.select_dtypes(include=np.number).columns):
    ax = plt.subplot(10, 2, n+1)
    df[[col]].boxplot(ax=ax,sym='r+')
    

save_fig("attribute_box_plots")
plt.show()


In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
df.hist(bins=10, figsize=(20,15))
save_fig("attribute_histogram_plots")
plt.show()

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
df[["sales", "salary"]].plot.kde(figsize=(20,15))
save_fig("attribute_density_plots")
plt.show()

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from pandas.plotting import scatter_matrix
scatter_matrix(df, alpha=0.2, figsize=(30, 30), diagonal='kde')
save_fig("scatter_matrix")
plt.show()