## Chapter 2: Data Exploration and Visualization

> In this chapter, you will learn to explore, analyze, and reshape your data so that you can shed light on the attributes of your data that are important to the business - a key skill in a marketing analytics repertoire.

In [9]:
# Example
import pandas as pd

# Read in the dataset
sales_df = pd.read_csv('datasets/sales.csv')

In [48]:
def tweak_sales(sales_df):
    """
    Clean and transform sales dataset. 
    i). Use your subject matter expertise to transform the columns as needed.
    """
    
    # Reorder columns
    cols = ['year', 'product_line', 'product_type','product',
        'retailer_country', 'order_method', 'revenue']
    
    # Return a cleaned DataFrame
    return((sales_df
            .assign(product_line = sales_df.Product + ' ' + sales_df.line,
                  product_type = sales_df['Product.1'] + ' ' + sales_df['type'],
                  product = sales_df['Product.2'] + ' ' + sales_df['Order'] + ' ' + sales_df['method'],
                  retailer_country = sales_df['Retailer'].replace('United', 'United States')
             )
            .rename(columns = {'type.1':'order_method', 'Revenue':'revenue', 'Year':'year'})
            [cols]
        )
    )

In [49]:
# Clean the data using the above function
df = tweak_sales(sales_df)

In [50]:
# Inspect the first row
df.head()

Unnamed: 0,year,product_line,product_type,product,retailer_country,order_method,revenue
0,2004,Camping Equipment,Cooking Gear,TrailChef Water Bag,United States,Telephone,315044.33
1,2004,Camping Equipment,Cooking Gear,TrailChef Water Bag,Canada,Telephone,14313.48
2,2004,Camping Equipment,Cooking Gear,TrailChef Water Bag,Mexico,Telephone,156644.47
3,2004,Camping Equipment,Cooking Gear,TrailChef Water Bag,Brazil,Telephone,59191.72
4,2004,Camping Equipment,Cooking Gear,TrailChef Water Bag,Japan,Telephone,7029.33


In [43]:
### Computing Total Revenue by Retailer Country
revenue_df = (df
              .groupby(['retailer_country'])['revenue']
              .sum()
              .round()
              .sort_values(ascending=False)
        )

# Inspect the summary
revenue_df

retailer_country
United States    1314826.0
China             622502.0
Finland           571848.0
Australia         467090.0
Korea             429402.0
Canada            420274.0
France            389180.0
Brazil            387599.0
Germany           320038.0
Belgium           318531.0
Austria           315656.0
Sweden            311533.0
Mexico            284686.0
Netherlands       244450.0
Italy             242723.0
Singapore         213251.0
Japan             196899.0
Spain             196040.0
Switzerland       178395.0
Denmark           129413.0
Name: revenue, dtype: float64

In [44]:
df.tail()

Unnamed: 0,year,product_line,product_type,product,product.1,retailer_country,order_method,revenue
95,2004,Camping Equipment,Cooking Gear,TrailChef,TrailChef Water Bag,Finland,Mail,6615.84
96,2004,Camping Equipment,Cooking Gear,TrailChef,TrailChef Water Bag,Denmark,Mail,52613.47
97,2004,Camping Equipment,Cooking Gear,TrailChef,TrailChef Water Bag,France,Mail,41912.85
98,2004,Camping Equipment,Cooking Gear,TrailChef,TrailChef Water Bag,Germany,Mail,59479.91
99,2004,Camping Equipment,Cooking Gear,TrailChef,TrailChef Water Bag,United States,Mail,156324.28
