In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

df = DataFrame([{'product_id':23, 'name':'computer', 'wholesale_price': 500, 
                 'retail_price':1000, 'sales':100},
               {'product_id':96, 'name':'Python Workout', 'wholesale_price': 35,
                'retail_price':75, 'sales':1000},
               {'product_id':97, 'name':'Pandas Workout', 'wholesale_price': 35,
                'retail_price':75, 'sales':500},
               {'product_id':15, 'name':'banana', 'wholesale_price': 0.5,
                'retail_price':1, 'sales':200},
               {'product_id':87, 'name':'sandwich', 'wholesale_price': 3,
                'retail_price':5, 'sales':300},
               ])

df['current_net'] = ((df['retail_price'] - df['wholesale_price']) * df['sales'])
df

Unnamed: 0,product_id,name,wholesale_price,retail_price,sales,current_net
0,23,computer,500.0,1000,100,50000.0
1,96,Python Workout,35.0,75,1000,40000.0
2,97,Pandas Workout,35.0,75,500,20000.0
3,15,banana,0.5,1,200,100.0
4,87,sandwich,3.0,5,300,600.0


# Beyond 1

An alternative tax plan would charge 25% tax, but only on those products on which we would net more than 20,000. In such a case, how much would we make?

In [2]:
# The short way, using lambda + the inline if-else
df['current_net'].apply(lambda c: c*0.75 if c > 20000 else c).sum()

88200.0

In [3]:
# The longer way, defining a "real" function with a normal if-else
def calculate_tax(c):
    if c > 20000:
        return c * 0.75

    return c

df['current_net'].apply(calculate_tax).sum()

88200.0

# Beyond 2

Yet another alternative tax plan would charge 25% tax on products whose retail price is greater than 80, 10% tax on products whose retail price is between 30 and 80, and no tax on others. Implement and calculate the result of such a tax scheme.

In [4]:
# Use pd.cut to set the cutoffs, then translate from category to floats
df['after_tax'] = pd.cut(df['retail_price'], 
                   bins=[0, 30, 80, df['retail_price'].max()],
                   labels=[1, 0.9, 0.75]).astype(np.float64)

df['final_net'] = df['current_net'] * df['after_tax']
df

Unnamed: 0,product_id,name,wholesale_price,retail_price,sales,current_net,after_tax,final_net
0,23,computer,500.0,1000,100,50000.0,0.75,37500.0
1,96,Python Workout,35.0,75,1000,40000.0,0.9,36000.0
2,97,Pandas Workout,35.0,75,500,20000.0,0.9,18000.0
3,15,banana,0.5,1,200,100.0,1.0,100.0
4,87,sandwich,3.0,5,300,600.0,1.0,600.0


# Beyond 3

These long floating-point numbers are getting a bit hard to read. Set the `float_format` option in `pandas` such that the floating-point numbers will be displayed with commas every three digits before the decimal point, and only two digits after the decimal point. Note that this is a bit tricky, in that it requires understanding Python callables and the `str.format` method. 

In [5]:
pd.options.display.float_format = '{:,.2f}'.format

In [6]:
df

Unnamed: 0,product_id,name,wholesale_price,retail_price,sales,current_net,after_tax,final_net
0,23,computer,500.0,1000,100,50000.0,0.75,37500.0
1,96,Python Workout,35.0,75,1000,40000.0,0.9,36000.0
2,97,Pandas Workout,35.0,75,500,20000.0,0.9,18000.0
3,15,banana,0.5,1,200,100.0,1.0,100.0
4,87,sandwich,3.0,5,300,600.0,1.0,600.0
