In [None]:
# Standard imports
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

In [None]:
df = pd.read_excel("https://github.com/chris1610/pbpython/blob/master/data/sample-salesv3.xlsx?raw=true")

In [None]:
df.shape

In [None]:
df.head(10)

In [None]:
df.columns

In [None]:
df.index

In [None]:
df.info()

## Data Cleaning and Wrangling

In [None]:
temp1=df.groupby('name')

In [None]:
print(temp1)

In [None]:
temp1.groups

In [None]:
temp2=temp1['ext price', 'quantity'].agg({'ext price':'sum', 'quantity':'count'})

In [None]:
temp3=temp2.sort_values(by='ext price', ascending=False)

In [None]:
temp3.head(100)

In [None]:
temp3[:10]

In [None]:
temp3.reset_index()

In [None]:
temp4=temp3[:10].reset_index()

In [None]:
top_10=temp4

In [None]:
top_10.head(100)

In [None]:
top_10.rename(columns={'name':'Name', 'ext price':'Sales', 'quantity': 'Purchases'}, inplace=True)

In [None]:
top_10.head(20)

In [None]:
top_10.plot(kind='line', x='Name', y='Sales');

In [None]:
top_10.plot(kind='bar',x='Name', y='Sales');

In [None]:
top_10.plot(kind='barh',x='Name', y='Sales');

## Building the Plot

In [None]:
# To clean up the currency in Total Revenue, we define a custom formatting function

def currency(x, pos):
    'The two args are the value and tick position'
    if x >= 1000000:
        return '${:1.1f}M'.format(x*1e-6)
    return '${:1.0f}K'.format(x*1e-3)

In [None]:
#Set the overall style
plt.style.use('ggplot')

# Get the figure and the axes
fig, (ax0,ax1) = plt.subplots(nrows=1, ncols=2, sharey=True, figsize=(10, 6))

#Build the first plot
top_10.plot(kind='barh', x='Name', y='Sales', ax=ax0)

#Customize the first plot
ax0.set(title='Revenue', xlabel='Total revenue', ylabel='Customers')
ax0.set_xlim([-10000, 140000])
formatter = FuncFormatter(currency)
ax0.xaxis.set_major_formatter(formatter)

#Add an average line to the first plot
revenue_average = top_10['Sales'].mean()
ax0.axvline(x=revenue_average, color='b', label='Average', linestyle='--', linewidth=1)

#Build the second plot
top_10.plot(kind='barh', x='Name', y='Purchases', ax=ax1)

#Customize the second plot
ax1.set(title='Units', xlabel='Total Units', ylabel='')
ax1.set_xlim([-5,100])

#Add an average line to the second plot
purchases_average = top_10['Purchases'].mean()
ax1.axvline(x=purchases_average, color='b', label='Average', linestyle='--', linewidth=1)

#Annotate the new customers
for cust in [3,5,8]:
    ax0.text(120000, cust, 'NEW CUSTOMER')
#Title the Figure
fig.suptitle('2014 Sales Analysis', fontsize=14, fontweight='bold');

#Hide the plot legend
ax0.legend().set_visible(False)
ax1.legend().set_visible(False)