# Import PANDAS Library

Notice that we are using pd as an abbreviation so each time we want to use the PANDAS library, we can simply type pd instead of pandas.

In [1]:
import pandas as pd
pd.options.display.float_format = '{:,.2f}'.format

# Import Dataset (either full dataset or simplified)

You can use either set of datafiles (full or simplified). In the past some students' computers could not handle the full dataset. If you worry that your computer will not handle the full dataset (or if you find your code is running very slow), use the simplified version. 

The simplified version was created by taking a sample of the full data; hence the simplified files have "Sample" added to the end of the file names. Some files were not large to begin with and didn't need to be reduced. Those files do not have Sample added to the end of the file name.

## Adjust the file address below to match your computer.

You will want to replace the begining of the address below (i.e., everything before OneDrive - Oregon State University/)

In [2]:
begInv = pd.read_csv("BegInvFINAL12-31-16Sample.csv")

In [3]:
begInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,onHand,Price,startDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,12.99,2016-01-01
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,10.99,2016-01-01


# Deliverable 1A

In [4]:
# Renaming the 'onHand' Column
begInv = begInv.rename(columns = {'onHand':'01_01_2016_Qty'})
begInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,01_01_2016_Qty,Price,startDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,12.99,2016-01-01
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,10.99,2016-01-01


In [5]:
begInv['InvCost01_01_2016'] = begInv['01_01_2016_Qty'] * begInv['Price']
begInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,01_01_2016_Qty,Price,startDate,InvCost01_01_2016
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,12.99,2016-01-01,103.92
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,10.99,2016-01-01,76.93


In [6]:
begInv_Store = begInv[['Store','01_01_2016_Qty','InvCost01_01_2016']].groupby('Store').sum()
begInv_Store.head(2)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
1,49917,838079.59
2,52925,840452.2


In [7]:
begInv_Store.sort_values(by='InvCost01_01_2016',ascending=False).head(10)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
34,153852,3291170.24
73,162551,3142497.36
67,158996,3079578.63
66,149314,2973033.9
76,140208,2952418.44
69,144255,2946726.65
38,114368,2232698.77
55,119641,2001263.66
50,94720,1649808.22
79,95330,1503149.48


# ALternative Solution Code

In [8]:
begInv_Store.sort_values(by='InvCost01_01_2016').tail(10).sort_values(by='InvCost01_01_2016', ascending = False)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
34,153852,3291170.24
73,162551,3142497.36
67,158996,3079578.63
66,149314,2973033.9
76,140208,2952418.44
69,144255,2946726.65
38,114368,2232698.77
55,119641,2001263.66
50,94720,1649808.22
79,95330,1503149.48


# Deliverable 1B

In [9]:
# Change 'Store' to 'Brand' and change the start name to 'begInv_Brand

In [10]:
begInv_Brand = begInv[['Brand','01_01_2016_Qty','InvCost01_01_2016']].groupby('Brand').sum()
begInv_Brand.head(2)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
58,281,3650.19
60,288,3165.12


In [11]:
begInv_Brand.sort_values(by='InvCost01_01_2016', ascending = False).head(10)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
3545,14499,463823.01
1233,12016,432455.84
8068,15341,383371.59
4261,15499,340823.01
3858,13649,327439.51
2753,4625,286703.75
8082,9287,278517.13
8680,7066,275503.34
2589,6766,270572.34
3876,14829,266773.71


# Deliverable 2A

In [12]:
endInv = pd.read_csv("EndInvFINAL12-31-16Sample.csv")
endInv.head()

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,onHand,Price,endDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,12.99,2016-12-31
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,36.99,2016-12-31
2,1_HARDERSFIELD_63,1,HARDERSFIELD,63,Herradura Reposado Tequila,750mL,7,38.99,2016-12-31
3,1_HARDERSFIELD_72,1,HARDERSFIELD,72,No. 3 London Dry Gin,750mL,4,34.99,2016-12-31
4,1_HARDERSFIELD_75,1,HARDERSFIELD,75,Three Olives Tomato Vodka,750mL,7,14.99,2016-12-31


In [13]:
endInv = endInv.rename(columns = {'onHand':'12_31_2016_Qty'})
endInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,12_31_2016_Qty,Price,endDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,12.99,2016-12-31
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,36.99,2016-12-31


In [14]:
endInv['InvCost12_31_2016'] = endInv['12_31_2016_Qty'] * endInv['Price']
endInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,12_31_2016_Qty,Price,endDate,InvCost12_31_2016
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,12.99,2016-12-31,142.89
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,36.99,2016-12-31,258.93


In [15]:
endInv_Store = endInv[['Store','12_31_2016_Qty','InvCost12_31_2016']].groupby('Store').sum()
endInv_Store.head(2)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
1,79827,1206845.93
2,56671,850884.06


In [16]:
endInv_Store.sort_values(by='InvCost12_31_2016',ascending=False).head(10)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
50,260717,4887260.68
73,164589,3254662.81
67,163765,3076114.82
34,145829,3074616.75
76,143866,2975945.18
69,150848,2968678.82
66,144579,2860504.99
74,166015,2803645.13
38,129397,2463906.85
55,125584,2234836.35


# 2A Solution Above

# Deliverable 2B

In [17]:
endInv_Store = endInv[['Brand','12_31_2016_Qty','InvCost12_31_2016']].groupby('Brand').sum()
endInv_Store.head(2)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
58,385,5001.15
60,146,1604.54


In [18]:
endInv_Brand = pd.read_csv('EndInvFINAL12-31-16Sample.csv').groupby('Brand')

In [19]:
endInv['InvCost12_31_2016'] = endInv['12_31_2016_Qty'] * endInv['Price']

In [20]:
endInv_Store.sort_values(by='InvCost12_31_2016',ascending=False).head(10)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
1233,15047,526494.53
3545,16770,502932.3
2753,7849,470861.51
8068,15608,366631.92
3405,12268,355649.32
4261,16769,351981.31
2757,11603,336370.97
2589,7922,300956.78
1376,13180,276648.2
2585,10487,272557.13


# Deliverable 3

In [21]:
endInv.head()

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,12_31_2016_Qty,Price,endDate,InvCost12_31_2016
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,12.99,2016-12-31,142.89
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,36.99,2016-12-31,258.93
2,1_HARDERSFIELD_63,1,HARDERSFIELD,63,Herradura Reposado Tequila,750mL,7,38.99,2016-12-31,272.93
3,1_HARDERSFIELD_72,1,HARDERSFIELD,72,No. 3 London Dry Gin,750mL,4,34.99,2016-12-31,139.96
4,1_HARDERSFIELD_75,1,HARDERSFIELD,75,Three Olives Tomato Vodka,750mL,7,14.99,2016-12-31,104.93


In [22]:
begInv.head()

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,01_01_2016_Qty,Price,startDate,InvCost01_01_2016
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,12.99,2016-01-01,103.92
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,10.99,2016-01-01,76.93
2,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,6,36.99,2016-01-01,221.94
3,1_HARDERSFIELD_63,1,HARDERSFIELD,63,Herradura Reposado Tequila,750mL,3,38.99,2016-01-01,116.97
4,1_HARDERSFIELD_72,1,HARDERSFIELD,72,No. 3 London Dry Gin,750mL,6,34.99,2016-01-01,209.94


In [23]:
begInv = begInv.rename(columns = {'01_01_2016_Qty': 'Beginning Quantity'})
endInv = endInv.rename(columns = {'12_31_2016_Qty': 'Ending Quantity'})

In [24]:
begInv['Beginning Total'] = begInv['Beginning Quantity'] * begInv['Price']

In [25]:
endInv['Ending Total'] = endInv['Ending Quantity'] * endInv['Price']

In [26]:
Inventories=pd.merge(left=begInv[['InventoryId', 'Beginning Quantity', 'Beginning Total']], right=endInv[['Ending Quantity', 'Ending Total', 'InventoryId']], how='outer', on=['InventoryId']).fillna(0)

In [27]:
Inventories[['InventoryId', 'Beginning Quantity', 'Beginning Total', 'Ending Quantity', 'Ending Total']]

Unnamed: 0,InventoryId,Beginning Quantity,Beginning Total,Ending Quantity,Ending Total
0,1_HARDERSFIELD_58,8.00,103.92,11.00,142.89
1,1_HARDERSFIELD_60,7.00,76.93,0.00,0.00
2,1_HARDERSFIELD_62,6.00,221.94,7.00,258.93
3,1_HARDERSFIELD_63,3.00,116.97,7.00,272.93
4,1_HARDERSFIELD_72,6.00,209.94,4.00,139.96
...,...,...,...,...,...
256037,81_PEMBROKE_90087,0.00,0.00,3.00,1409.97
256038,81_PEMBROKE_90088,0.00,0.00,3.00,404.97
256039,81_PEMBROKE_90089,0.00,0.00,3.00,359.97
256040,81_PEMBROKE_90090,0.00,0.00,3.00,1949.97


In [28]:
Inventories.fillna(0)

Unnamed: 0,InventoryId,Beginning Quantity,Beginning Total,Ending Quantity,Ending Total
0,1_HARDERSFIELD_58,8.00,103.92,11.00,142.89
1,1_HARDERSFIELD_60,7.00,76.93,0.00,0.00
2,1_HARDERSFIELD_62,6.00,221.94,7.00,258.93
3,1_HARDERSFIELD_63,3.00,116.97,7.00,272.93
4,1_HARDERSFIELD_72,6.00,209.94,4.00,139.96
...,...,...,...,...,...
256037,81_PEMBROKE_90087,0.00,0.00,3.00,1409.97
256038,81_PEMBROKE_90088,0.00,0.00,3.00,404.97
256039,81_PEMBROKE_90089,0.00,0.00,3.00,359.97
256040,81_PEMBROKE_90090,0.00,0.00,3.00,1949.97


In [29]:
Inventories.head()

Unnamed: 0,InventoryId,Beginning Quantity,Beginning Total,Ending Quantity,Ending Total
0,1_HARDERSFIELD_58,8.0,103.92,11.0,142.89
1,1_HARDERSFIELD_60,7.0,76.93,0.0,0.0
2,1_HARDERSFIELD_62,6.0,221.94,7.0,258.93
3,1_HARDERSFIELD_63,3.0,116.97,7.0,272.93
4,1_HARDERSFIELD_72,6.0,209.94,4.0,139.96


In [30]:
len(Inventories)

256042

# Deliverable 3 Length is 256,042 units

# Deliverable 4A

In [31]:
Purchases = pd.read_csv("PurchasesFINAL12-31-16Sample.csv")

In [32]:
Purchases.head(2)

Unnamed: 0,InventoryId,Store,Brand,Description,Size,VendorNumber,VendorName,PONumber,PODate,ReceivingDate,InvoiceDate,PayDate,PurchasePrice,Quantity,Dollars,Classification
0,69_MOUNTMEND_8412,69,8412,Tequila Ocho Plata Fresno,750mL,105,ALTAMAR BRANDS LLC,8124,2015-12-21,2016-01-02,2016-01-04,2016-02-16,35.71,6,214.26,1
1,34_PITMERDEN_5215,34,5215,TGI Fridays Long Island Iced,1.75L,4466,AMERICAN VINTAGE BEVERAGE,8137,2015-12-22,2016-01-02,2016-01-07,2016-02-21,9.41,5,47.05,1


In [33]:
Purchases.groupby(['VendorNumber', 'VendorName', ])[['Dollars']].sum().sort_values(by='Dollars', ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Dollars
VendorNumber,VendorName,Unnamed: 2_level_1
3960,DIAGEO NORTH AMERICA INC,21315299.75
4425,MARTIGNETTI COMPANIES,11709222.19
12546,JIM BEAM BRANDS COMPANY,10232521.65
17035,PERNOD RICARD USA,10224899.93
480,BACARDI USA INC,7420630.63
1392,CONSTELLATION BRANDS INC,6584032.72
1128,BROWN-FORMAN CORP,5719375.62
9165,ULTRA BEVERAGE COMPANY LLP,5545766.29
3252,E & J GALLO WINERY,5177067.61
9552,M S WALKER INC,4647908.17


# Deliverable 4B

In [34]:
ShippingDate = pd.read_csv("InvoicePurchases12-31-16Sample.csv")

In [35]:
ShippingDate.groupby(['VendorNumber', 'VendorName',])[['Freight']].sum().sort_values(by='Freight', ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Freight
VendorNumber,VendorName,Unnamed: 2_level_1
3960,DIAGEO NORTH AMERICA INC,257032.07
4425,MARTIGNETTI COMPANIES,144719.92
12546,JIM BEAM BRANDS COMPANY,123880.97
17035,PERNOD RICARD USA,123780.22
480,BACARDI USA INC,89286.27
1392,CONSTELLATION BRANDS INC,79528.99
1128,BROWN-FORMAN CORP,68601.68
9165,ULTRA BEVERAGE COMPANY LLP,68054.7
3252,E & J GALLO WINERY,61966.91
9552,M S WALKER INC,55551.82


# Deliverable 4C

In [36]:
dfLargeVendors = pd.read_csv('InvoicePurchases12-31-16Sample.csv')

In [37]:
dfLargeVendors = ShippingDate.groupby(['VendorNumber', 'VendorName',])[['Freight', 'Dollars', 'Quantity']].sum()

In [38]:
dfLargeVendors = dfLargeVendors[dfLargeVendors['Dollars']>=250000]

In [39]:
dfLargeVendors.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Freight,Dollars,Quantity
VendorNumber,VendorName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
480,BACARDI USA INC,89286.27,17624378.72,1427075
516,BANFI PRODUCTS CORP,8510.41,1628866.68,228103
653,STATE WINE & SPIRITS,8014.98,1529682.04,154092
660,SAZERAC NORTH AMERICA INC.,17932.33,3537977.55,503931
1128,BROWN-FORMAN CORP,68601.68,13529433.08,1006122


In [40]:
dfLargeVendors['FreightRate(%)'] = (dfLargeVendors['Freight']/dfLargeVendors['Dollars'])*100
dfLargeVendors['FreightPerUnit'] = (dfLargeVendors['Freight']/dfLargeVendors['Quantity'])

In [41]:
dfLargeVendors.sort_values(by = 'FreightRate(%)', ascending = False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Freight,Dollars,Quantity,FreightRate(%),FreightPerUnit
VendorNumber,VendorName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
9625,WESTERN SPIRITS BEVERAGE CO,1933.19,361249.21,56860,0.54,0.03
1590,DIAGEO CHATEAU ESTATE WINES,7259.75,1365472.83,187841,0.53,0.04
9744,FREDERICK WILDMAN & SONS,3999.93,759449.24,70932,0.53,0.06
653,STATE WINE & SPIRITS,8014.98,1529682.04,154092,0.52,0.05
17031,FLAG HILL WINERY & VINEYARD,1573.31,300403.2,20608,0.52,0.08
516,BANFI PRODUCTS CORP,8510.41,1628866.68,228103,0.52,0.04
4425,MARTIGNETTI COMPANIES,144719.92,27821473.91,2637275,0.52,0.05
8673,STE MICHELLE WINE ESTATES,15919.7,3086650.7,419822,0.52,0.04
9815,WINE GROUP INC,27100.41,5258636.79,888385,0.52,0.03
9165,ULTRA BEVERAGE COMPANY LLP,68054.7,13210613.93,1077527,0.52,0.06


# 4C Top 10 Vendors Solution Above

In [42]:
dfLargeVendors.sort_values(by = 'FreightRate(%)', ascending = False).tail(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Freight,Dollars,Quantity,FreightRate(%),FreightPerUnit
VendorNumber,VendorName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
8352,LUXCO INC,10261.6,2051436.01,283260,0.5,0.04
3924,HEAVEN HILL DISTILLERIES,14069.87,2816661.94,352348,0.5,0.04
3089,SIDNEY FRANK IMPORTING CO,8549.55,1715908.88,186464,0.5,0.05
9819,TREASURY WINE ESTATES,14836.57,2978686.4,497770,0.5,0.03
6359,OLE SMOKY DISTILLERY LLC,1922.0,387622.69,110162,0.5,0.02


# 4c Solution Bottom 5 Vendors Above

# Deliverable 4D

In [43]:
dfSmallExp = pd.read_csv('InvoicePurchases12-31-16Sample.csv')

In [44]:
dfSmallExp.head()

Unnamed: 0,VendorNumber,VendorName,InvoiceDate,PONumber,PODate,PayDate,Quantity,Dollars,Freight,Approval
0,105,ALTAMAR BRANDS LLC,2016-01-04,8124,2015-12-21,2016-02-16,6,214.26,3.47,
1,4466,AMERICAN VINTAGE BEVERAGE,2016-01-07,8137,2015-12-22,2016-02-21,15,140.55,8.57,
2,388,ATLANTIC IMPORTING COMPANY,2016-01-09,8169,2015-12-24,2016-02-16,5,106.6,4.61,
3,480,BACARDI USA INC,2016-01-12,8106,2015-12-20,2016-02-05,10100,137483.78,2935.2,
4,516,BANFI PRODUCTS CORP,2016-01-07,8170,2015-12-24,2016-02-12,1935,15527.25,429.2,


In [45]:
dfSmallExp = dfSmallExp[dfSmallExp['Freight'] >= 100]

In [46]:
dfSmallExp = dfSmallExp[dfSmallExp['Quantity'] <= 1000]

In [47]:
dfSmallExp.head()

Unnamed: 0,VendorNumber,VendorName,InvoiceDate,PONumber,PODate,PayDate,Quantity,Dollars,Freight,Approval
11,1485,CASTLE BRANDS CORP.,2016-01-08,8152,2015-12-23,2016-02-19,320,5420.41,179.26,
16,2242,DELICATO VINEYARDS INC,2016-01-06,8139,2015-12-22,2016-02-10,808,6646.46,127.05,
19,2555,DISARONNO INTERNATIONAL LLC,2016-01-11,8192,2015-12-25,2016-02-17,385,3506.41,146.86,
23,2561,EDRINGTON AMERICAS,2016-01-08,8175,2015-12-24,2016-02-10,136,5645.24,218.18,
28,3924,HEAVEN HILL DISTILLERIES,2016-01-08,8155,2015-12-23,2016-02-15,818,7079.02,200.02,


In [48]:
dfSmallExp = dfSmallExp.groupby(['VendorNumber', 'VendorName',])[['Freight', 'Dollars', 'Quantity']].sum()

In [49]:
dfSmallExp.sort_values(by = 'Freight', ascending = False).head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Freight,Dollars,Quantity
VendorNumber,VendorName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2561,EDRINGTON AMERICAS,2944.67,570627.52,18120
8664,"STOLI GROUP,(USA) LLC",349.02,14299.98,975
7239,REMY COINTREAU USA INC,348.07,7515.38,382
653,STATE WINE & SPIRITS,303.7,5544.89,541
6785,PALM BAY INTERNATIONAL INC,242.73,5533.18,715


# 4D Solution Above

# Deliverable 4E

In [50]:
dfLargeVendors.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Freight,Dollars,Quantity,FreightRate(%),FreightPerUnit
VendorNumber,VendorName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
480,BACARDI USA INC,89286.27,17624378.72,1427075,0.51,0.06
516,BANFI PRODUCTS CORP,8510.41,1628866.68,228103,0.52,0.04
653,STATE WINE & SPIRITS,8014.98,1529682.04,154092,0.52,0.05
660,SAZERAC NORTH AMERICA INC.,17932.33,3537977.55,503931,0.51,0.04
1128,BROWN-FORMAN CORP,68601.68,13529433.08,1006122,0.51,0.07


In [51]:
dfSmallExp.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Freight,Dollars,Quantity
VendorNumber,VendorName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
653,STATE WINE & SPIRITS,303.7,5544.89,541
1273,CALEDONIA SPIRITS INC,109.8,20334.21,576
1485,CASTLE BRANDS CORP.,179.26,5420.41,320
2242,DELICATO VINEYARDS INC,127.05,6646.46,808
2555,DISARONNO INTERNATIONAL LLC,146.86,3506.41,385


In [52]:
dfSmallExp["Freight rate as %"] = (dfSmallExp["Freight"] / dfSmallExp["Dollars"] * 100)
dfSmallExp["Freight per Unit"] = (dfSmallExp["Freight"] / dfSmallExp["Quantity"])

In [53]:
pd.DataFrame([["Freight Per Dollar", dfLargeVendors["FreightRate(%)"].mean(), dfSmallExp["Freight rate as %"].mean()],  ["Freight per unit",dfLargeVendors["FreightPerUnit"].mean(), dfSmallExp["Freight per Unit"].mean()]],
 columns = ["", "Large", "Small/Exp"]).set_index('')

Unnamed: 0,Large,Small/Exp
,,
Freight Per Dollar,0.51,2.75
Freight per unit,0.05,0.34


# 4E Solution Above

# Deliverable 5

In [54]:
dfPurchasePrice = pd.read_csv('PurchasesFINAL12-31-16Sample.csv')

In [55]:
dfPurchasePrice.head()

Unnamed: 0,InventoryId,Store,Brand,Description,Size,VendorNumber,VendorName,PONumber,PODate,ReceivingDate,InvoiceDate,PayDate,PurchasePrice,Quantity,Dollars,Classification
0,69_MOUNTMEND_8412,69,8412,Tequila Ocho Plata Fresno,750mL,105,ALTAMAR BRANDS LLC,8124,2015-12-21,2016-01-02,2016-01-04,2016-02-16,35.71,6,214.26,1
1,34_PITMERDEN_5215,34,5215,TGI Fridays Long Island Iced,1.75L,4466,AMERICAN VINTAGE BEVERAGE,8137,2015-12-22,2016-01-02,2016-01-07,2016-02-21,9.41,5,47.05,1
2,76_DONCASTER_2034,76,2034,Glendalough Double Barrel,750mL,388,ATLANTIC IMPORTING COMPANY,8169,2015-12-24,2016-01-02,2016-01-09,2016-02-16,21.32,5,106.6,1
3,5_SUTTON_3348,5,3348,Bombay Sapphire Gin,1.75L,480,BACARDI USA INC,8106,2015-12-20,2016-01-02,2016-01-12,2016-02-05,22.38,6,134.28,1
4,30_CULCHETH_4903,30,4903,Bacardi Superior Rum,200mL,480,BACARDI USA INC,8106,2015-12-20,2016-01-01,2016-01-12,2016-02-05,2.87,48,137.76,1


In [56]:
dfSalesPrice = pd.read_csv('salesDecember2016Sample.csv')

In [57]:
dfSalesPrice.head()

Unnamed: 0,InventoryId,Store,Brand,Description,Size,SalesQuantity,SalesDollars,SalesPrice,SalesDate,Volume,Classification,ExciseTax,VendorNo,VendorName
0,10_HORNSEY_1003,10,1003,Crown Royal +2 Reusable Cups,750mL,5,114.95,22.99,2016-12-01,750.0,1,3.94,3960,DIAGEO NORTH AMERICA INC
1,10_HORNSEY_1003,10,1003,Crown Royal +2 Reusable Cups,750mL,4,91.96,22.99,2016-12-03,750.0,1,3.15,3960,DIAGEO NORTH AMERICA INC
2,10_HORNSEY_1003,10,1003,Crown Royal +2 Reusable Cups,750mL,2,45.98,22.99,2016-12-04,750.0,1,1.57,3960,DIAGEO NORTH AMERICA INC
3,10_HORNSEY_1003,10,1003,Crown Royal +2 Reusable Cups,750mL,1,22.99,22.99,2016-12-06,750.0,1,0.79,3960,DIAGEO NORTH AMERICA INC
4,10_HORNSEY_1003,10,1003,Crown Royal +2 Reusable Cups,750mL,11,252.89,22.99,2016-12-09,750.0,1,8.66,3960,DIAGEO NORTH AMERICA INC


In [None]:
dfCM = pd.merge(left= dfSalesPrice, right= dfPurchasePrice, how='inner', on='Description')
dfCM['Contribution Margin'] = dfCM['Sales Price'] - dfCM['Purchase Price']

In [None]:
dfCM = pd.read_csv('salesDecember2016Sample.csv').groupby(['Description'])[['SalesPrice']].mean(),
[['Purchase Price']].mean()

In [None]:
dfCM.head()

In [None]:
dfCM = dfCM(dfCM['Contribution Margin']<=0)
dfCM.sort_values(by = 'Contribution Margin', ascending = True)

In [None]:
dfCM.head()

# Was in Help Session on Friday. Didn't Finish 5 and you Told Me that was ok. I got part of it in. 