# Import PANDAS Library

Notice that we are using pd as an abbreviation so each time we want to use the PANDAS library, we can simply type pd instead of pandas.

In [1]:
import pandas as pd
pd.options.display.float_format = '{:,.2f}'.format

# Import Dataset (either full dataset or simplified)

You can use either set of datafiles (full or simplified). In the past some students' computers could not handle the full dataset. If you worry that your computer will not handle the full dataset (or if you find your code is running very slow), use the simplified version. 

The simplified version was created by taking a sample of the full data; hence the simplified files have "Sample" added to the end of the file names. Some files were not large to begin with and didn't need to be reduced. Those files do not have Sample added to the end of the file name.

## Adjust the file address below to match your computer.

You will want to replace the begining of the address below (i.e., everything before OneDrive - Oregon State University/)

In [2]:
begInv = pd.read_csv("BegInvFINAL12-31-16Sample.csv")

In [3]:
begInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,onHand,Price,startDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,12.99,2016-01-01
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,10.99,2016-01-01


In [4]:
begInv = begInv.rename(columns = {'onHand':'01_01_2016_Qty'})
begInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,01_01_2016_Qty,Price,startDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,12.99,2016-01-01
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,10.99,2016-01-01


In [5]:
begInv['InvCost01_01_2016'] = begInv['01_01_2016_Qty'] * begInv['Price']
begInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,01_01_2016_Qty,Price,startDate,InvCost01_01_2016
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,12.99,2016-01-01,103.92
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,10.99,2016-01-01,76.93


In [6]:
begInv_Store = begInv[['Store','01_01_2016_Qty','InvCost01_01_2016']].groupby('Store').sum()
begInv_Store.head(2)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
1,49917,838079.59
2,52925,840452.2


In [7]:
begInv_Store.sort_values(by='InvCost01_01_2016',ascending=False).head(10)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
34,153852,3291170.24
73,162551,3142497.36
67,158996,3079578.63
66,149314,2973033.9
76,140208,2952418.44
69,144255,2946726.65
38,114368,2232698.77
55,119641,2001263.66
50,94720,1649808.22
79,95330,1503149.48


In [8]:
begInv_Store.sort_values(by='InvCost01_01_2016').tail(10).sort_values(by='InvCost01_01_2016', ascending = False)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
34,153852,3291170.24
73,162551,3142497.36
67,158996,3079578.63
66,149314,2973033.9
76,140208,2952418.44
69,144255,2946726.65
38,114368,2232698.77
55,119641,2001263.66
50,94720,1649808.22
79,95330,1503149.48


In [9]:
endInv = pd.read_csv("EndInvFINAL12-31-16Sample.csv")
endInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,onHand,Price,endDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,12.99,2016-12-31
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,36.99,2016-12-31


In [10]:
endInv = endInv.rename(columns = {'onHand':'12_31_2016_Qty'})
endInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,12_31_2016_Qty,Price,endDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,12.99,2016-12-31
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,36.99,2016-12-31


In [11]:
endInv['InvCost12_31_2016'] = endInv['12_31_2016_Qty'] * endInv['Price']
endInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,12_31_2016_Qty,Price,endDate,InvCost12_31_2016
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,12.99,2016-12-31,142.89
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,36.99,2016-12-31,258.93


In [12]:
endInv_Store = endInv[['Store','12_31_2016_Qty','InvCost12_31_2016']].groupby('Store').sum()
endInv_Store.head(2)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
1,79827,1206845.93
2,56671,850884.06


In [13]:
endInv_Store.sort_values(by='InvCost12_31_2016',ascending=False).head(10)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
50,260717,4887260.68
73,164589,3254662.81
67,163765,3076114.82
34,145829,3074616.75
76,143866,2975945.18
69,150848,2968678.82
66,144579,2860504.99
74,166015,2803645.13
38,129397,2463906.85
55,125584,2234836.35


In [14]:
endInv_Store.sort_values(by='InvCost12_31_2016').tail(10).sort_values(by='InvCost12_31_2016', ascending = False)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
50,260717,4887260.68
73,164589,3254662.81
67,163765,3076114.82
34,145829,3074616.75
76,143866,2975945.18
69,150848,2968678.82
66,144579,2860504.99
74,166015,2803645.13
38,129397,2463906.85
55,125584,2234836.35


# Deliverable 1A

In [15]:
begInv = pd.read_csv("BegInvFINAL12-31-16Sample.csv")

In [16]:
begInv = begInv.rename(columns = {'onHand':'01_01_2016_Qty'})

In [17]:
begInv['InvCost_01_01_2016'] = begInv['01_01_2016_Qty'] * begInv['Price']

In [18]:
begInv_Store.sort_values(by= 'InvCost01_01_2016', ascending=False).head(10) 

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
34,153852,3291170.24
73,162551,3142497.36
67,158996,3079578.63
66,149314,2973033.9
76,140208,2952418.44
69,144255,2946726.65
38,114368,2232698.77
55,119641,2001263.66
50,94720,1649808.22
79,95330,1503149.48


In [19]:
begInv_Store.sort_values(by='InvCost01_01_2016').tail(10).sort_values(by='InvCost01_01_2016', ascending = False)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
34,153852,3291170.24
73,162551,3142497.36
67,158996,3079578.63
66,149314,2973033.9
76,140208,2952418.44
69,144255,2946726.65
38,114368,2232698.77
55,119641,2001263.66
50,94720,1649808.22
79,95330,1503149.48


# Delieverable 1B

In [20]:
begInv_Brand = begInv[['Brand','01_01_2016_Qty','InvCost_01_01_2016']].groupby('Brand').sum()
begInv_Brand.head(10)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost_01_01_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
58,281,3650.19
60,288,3165.12
61,12,167.88
62,384,14204.16
63,365,14231.35
72,151,5283.49
75,18,269.82
77,1261,18902.39
79,902,13520.98
82,24,8999.76


# Deliverable 2A

In [21]:
endInv_Store.sort_values(by= 'InvCost12_31_2016', ascending=False).head(10) 
endInv_Store.sort_values(by='InvCost12_31_2016').tail(10).sort_values(by='InvCost12_31_2016', ascending = False)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
50,260717,4887260.68
73,164589,3254662.81
67,163765,3076114.82
34,145829,3074616.75
76,143866,2975945.18
69,150848,2968678.82
66,144579,2860504.99
74,166015,2803645.13
38,129397,2463906.85
55,125584,2234836.35


# Deliverable 2B

In [22]:
endInv_Brand = endInv[['Brand','12_31_2016_Qty','InvCost12_31_2016']].groupby('Brand').sum()
endInv_Brand.head(10)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
58,385,5001.15
60,146,1604.54
61,12,167.88
62,459,16978.41
63,442,17233.58
72,139,4863.61
75,7,104.93
77,1764,26442.36
79,995,14915.05
86,18,5399.82


# Deliverable 3

In [23]:
Inv = pd.merge(left =  begInv[['InventoryId', '01_01_2016_Qty','InvCost_01_01_2016']], right = endInv[['InventoryId', '12_31_2016_Qty','InvCost12_31_2016']], how = 'outer', on = ['InventoryId']).fillna(0)
Inv.head()

Unnamed: 0,InventoryId,01_01_2016_Qty,InvCost_01_01_2016,12_31_2016_Qty,InvCost12_31_2016
0,1_HARDERSFIELD_58,8.0,103.92,11.0,142.89
1,1_HARDERSFIELD_60,7.0,76.93,0.0,0.0
2,1_HARDERSFIELD_62,6.0,221.94,7.0,258.93
3,1_HARDERSFIELD_63,3.0,116.97,7.0,272.93
4,1_HARDERSFIELD_72,6.0,209.94,4.0,139.96


In [42]:
len(Inv)

256042

# Deliverable 4A

In [24]:
purchases = pd.read_csv("InvoicePurchases12-31-16Sample.csv")

In [52]:
Topvendors = purchases[['VendorNumber', 'VendorName', 'Dollars']].groupby(['VendorNumber','VendorName']).sum()

In [54]:
Topvendors.sort_values(by='Dollars', ascending = False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Dollars
VendorNumber,VendorName,Unnamed: 2_level_1
3960,DIAGEO NORTH AMERICA INC,50959796.85
4425,MARTIGNETTI COMPANIES,27821473.91
12546,JIM BEAM BRANDS COMPANY,24203151.05
17035,PERNOD RICARD USA,24124091.56
480,BACARDI USA INC,17624378.72
1392,CONSTELLATION BRANDS INC,15573917.9
1128,BROWN-FORMAN CORP,13529433.08
9165,ULTRA BEVERAGE COMPANY LLP,13210613.93
3252,E & J GALLO WINERY,12289608.09
9552,M S WALKER INC,10935817.3


# Deliverable 4B

In [34]:
purchases.head()

Unnamed: 0,VendorNumber,VendorName,InvoiceDate,PONumber,PODate,PayDate,Quantity,Dollars,Freight,Approval
0,105,ALTAMAR BRANDS LLC,2016-01-04,8124,2015-12-21,2016-02-16,6,214.26,3.47,
1,4466,AMERICAN VINTAGE BEVERAGE,2016-01-07,8137,2015-12-22,2016-02-21,15,140.55,8.57,
2,388,ATLANTIC IMPORTING COMPANY,2016-01-09,8169,2015-12-24,2016-02-16,5,106.6,4.61,
3,480,BACARDI USA INC,2016-01-12,8106,2015-12-20,2016-02-05,10100,137483.78,2935.2,
4,516,BANFI PRODUCTS CORP,2016-01-07,8170,2015-12-24,2016-02-12,1935,15527.25,429.2,


In [37]:
purchases.groupby(['VendorNumber','VendorName'])[['Freight']].sum().sort_values(by='Freight', ascending = False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Freight
VendorNumber,VendorName,Unnamed: 2_level_1
3960,DIAGEO NORTH AMERICA INC,257032.07
4425,MARTIGNETTI COMPANIES,144719.92
12546,JIM BEAM BRANDS COMPANY,123880.97
17035,PERNOD RICARD USA,123780.22
480,BACARDI USA INC,89286.27
1392,CONSTELLATION BRANDS INC,79528.99
1128,BROWN-FORMAN CORP,68601.68
9165,ULTRA BEVERAGE COMPANY LLP,68054.7
3252,E & J GALLO WINERY,61966.91
9552,M S WALKER INC,55551.82


# Deliverable 4C

In [67]:
dfLargeVendors = purchases.groupby(['VendorNumber','VendorName'])[['Freight','Dollars','Quantity']].sum()

In [68]:
dfLargeVendors = dfLargeVendors[dfLargeVendors['Dollars']>250000]

In [69]:
dfLargeVendors.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Freight,Dollars,Quantity
VendorNumber,VendorName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
480,BACARDI USA INC,89286.27,17624378.72,1427075
516,BANFI PRODUCTS CORP,8510.41,1628866.68,228103
653,STATE WINE & SPIRITS,8014.98,1529682.04,154092
660,SAZERAC NORTH AMERICA INC.,17932.33,3537977.55,503931
1128,BROWN-FORMAN CORP,68601.68,13529433.08,1006122
1273,CALEDONIA SPIRITS INC,1319.77,259604.7,8202
1392,CONSTELLATION BRANDS INC,79528.99,15573917.9,2325892
1485,CASTLE BRANDS CORP.,8497.59,1679383.91,105220
1587,VINEYARD BRANDS INC,6015.86,1196162.81,171919
1590,DIAGEO CHATEAU ESTATE WINES,7259.75,1365472.83,187841


In [70]:
dfLargeVendors['FreightRate(%)'] = (dfLargeVendors['Freight']/dfLargeVendors['Dollars'])*100 
dfLargeVendors['FreightPerUnit'] = (dfLargeVendors['Freight']/dfLargeVendors['Quantity'])

In [71]:
dfLargeVendors = dfLargeVendors.sort_values(by = 'FreightRate(%)', ascending = False)

In [72]:
dfLargeVendors.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Freight,Dollars,Quantity,FreightRate(%),FreightPerUnit
VendorNumber,VendorName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
9625,WESTERN SPIRITS BEVERAGE CO,1933.19,361249.21,56860,0.54,0.03
1590,DIAGEO CHATEAU ESTATE WINES,7259.75,1365472.83,187841,0.53,0.04
9744,FREDERICK WILDMAN & SONS,3999.93,759449.24,70932,0.53,0.06
653,STATE WINE & SPIRITS,8014.98,1529682.04,154092,0.52,0.05
17031,FLAG HILL WINERY & VINEYARD,1573.31,300403.2,20608,0.52,0.08
516,BANFI PRODUCTS CORP,8510.41,1628866.68,228103,0.52,0.04
4425,MARTIGNETTI COMPANIES,144719.92,27821473.91,2637275,0.52,0.05
8673,STE MICHELLE WINE ESTATES,15919.7,3086650.7,419822,0.52,0.04
9815,WINE GROUP INC,27100.41,5258636.79,888385,0.52,0.03
9165,ULTRA BEVERAGE COMPANY LLP,68054.7,13210613.93,1077527,0.52,0.06


In [65]:
LargeVendors.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Freight,Dollars,Quantity,FrieghtRate(%),FreightPerUnit,FreightRate(%)
VendorNumber,VendorName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
8352,LUXCO INC,10261.6,2051436.01,283260,0.5,0.04,0.5
3924,HEAVEN HILL DISTILLERIES,14069.87,2816661.94,352348,0.5,0.04,0.5
3089,SIDNEY FRANK IMPORTING CO,8549.55,1715908.88,186464,0.5,0.05,0.5
9819,TREASURY WINE ESTATES,14836.57,2978686.4,497770,0.5,0.03,0.5
6359,OLE SMOKY DISTILLERY LLC,1922.0,387622.69,110162,0.5,0.02,0.5


# Deliverable 4D

In [74]:
dfSmallExpensiveInvoices = purchases[purchases['Freight']>100]
dfSmallExpensiveInvoices

Unnamed: 0,VendorNumber,VendorName,InvoiceDate,PONumber,PODate,PayDate,Quantity,Dollars,Freight,Approval
3,480,BACARDI USA INC,2016-01-12,8106,2015-12-20,2016-02-05,10100,137483.78,2935.20,
4,516,BANFI PRODUCTS CORP,2016-01-07,8170,2015-12-24,2016-02-12,1935,15527.25,429.20,
6,1128,BROWN-FORMAN CORP,2016-01-09,8150,2015-12-23,2016-02-19,4684,65403.57,1808.77,
9,11567,CAMPARI AMERICA,2016-01-06,8151,2015-12-23,2016-02-20,1321,12039.71,398.71,
11,1485,CASTLE BRANDS CORP.,2016-01-08,8152,2015-12-23,2016-02-19,320,5420.41,179.26,
...,...,...,...,...,...,...,...,...,...,...
5532,9165,ULTRA BEVERAGE COMPANY LLP,2017-01-10,13658,2016-12-23,2017-02-19,38896,502128.05,2761.70,Frank Delahunt
5534,1587,VINEYARD BRANDS INC,2017-01-03,13625,2016-12-21,2017-02-11,3015,24370.78,109.67,
5539,9625,WESTERN SPIRITS BEVERAGE CO,2017-01-10,13661,2016-12-23,2017-02-18,4617,37300.48,186.50,
5540,3664,WILLIAM GRANT & SONS INC,2017-01-02,13643,2016-12-22,2017-02-04,9848,202815.78,932.95,


In [76]:
dfSmallExpensiveInvoices = dfSmallExpensiveInvoices[dfSmallExpensiveInvoices['Quantity']<=1000]

In [77]:
dfSmallExpensiveInvoices.shape

(39, 10)

In [78]:
dfSmallExpensiveInvoices.sort_values(by='Quantity', ascending = False)

Unnamed: 0,VendorNumber,VendorName,InvoiceDate,PONumber,PODate,PayDate,Quantity,Dollars,Freight,Approval
1323,2561,EDRINGTON AMERICAS,2016-04-07,9495,2016-03-24,2016-05-13,989,28488.09,131.05,
4849,4848,LAIRD & CO,2016-11-24,12970,2016-11-08,2016-12-22,982,18937.82,100.37,
115,2561,EDRINGTON AMERICAS,2016-01-15,8239,2015-12-29,2016-02-17,980,29875.7,143.4,
71,8664,"STOLI GROUP,(USA) LLC",2016-01-12,8202,2015-12-25,2016-02-04,975,14299.98,349.02,
4219,2561,EDRINGTON AMERICAS,2016-10-18,12314,2016-09-25,2016-11-17,956,25432.72,132.25,
1221,2561,EDRINGTON AMERICAS,2016-04-02,9334,2016-03-14,2016-05-10,951,26168.64,120.38,
1021,2561,EDRINGTON AMERICAS,2016-03-14,9183,2016-03-03,2016-04-23,946,29350.99,137.95,
1121,2561,EDRINGTON AMERICAS,2016-03-24,9248,2016-03-08,2016-04-25,930,27042.94,129.81,
512,2561,EDRINGTON AMERICAS,2016-02-10,8679,2016-01-28,2016-03-23,920,28830.68,129.74,
4319,2561,EDRINGTON AMERICAS,2016-10-20,12473,2016-10-05,2016-11-30,914,23491.68,105.71,


In [102]:
dfSmallExpensiveInvoices_ByVendor = dfSmallExpensiveInvoices.groupby(['VendorNumber','VendorName'], as_index = False)[['Freight', 'Dollars', 'Quantity']].sum().sort_values(by = 'Freight', ascending = False)

In [105]:
dfSmallExpensiveInvoices_ByVendor.shape

(18, 5)

In [103]:
dfSmallExpensiveInvoices_ByVendor.head()

Unnamed: 0,VendorNumber,VendorName,Freight,Dollars,Quantity
5,2561,EDRINGTON AMERICAS,2944.67,570627.52,18120
14,8664,"STOLI GROUP,(USA) LLC",349.02,14299.98,975
12,7239,REMY COINTREAU USA INC,348.07,7515.38,382
0,653,STATE WINE & SPIRITS,303.7,5544.89,541
11,6785,PALM BAY INTERNATIONAL INC,242.73,5533.18,715


# Deliverable 4E

In [104]:
dfSmallExpensiveInvoices_ByVendor.index

Int64Index([5, 14, 12, 0, 11, 16, 7, 13, 8, 2, 10, 15, 4, 6, 3, 17, 1, 9], dtype='int64')

In [107]:
dfSmallExpensiveInvoices_ByVendor.shape

(18, 5)

In [106]:
dfSmallExpensiveInvoices['FreightRate(%)'] = ((dfSmallExpensiveInvoices['Freight']/dfSmallExpensiveInvoices['Dollars'])*100)
dfSmallExpensiveInvoices['FreightRatePerUnit'] = (dfSmallExpensiveInvoices['Freight']/dfSmallExpensiveInvoices['Quantity'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfSmallExpensiveInvoices['FreightRate(%)'] = ((dfSmallExpensiveInvoices['Freight']/dfSmallExpensiveInvoices['Dollars'])*100)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfSmallExpensiveInvoices['FreightRatePerUnit'] = (dfSmallExpensiveInvoices['Freight']/dfSmallExpensiveInvoices['Quantity'])


In [108]:
dfSmallExpensiveInvoices.head()

Unnamed: 0,VendorNumber,VendorName,InvoiceDate,PONumber,PODate,PayDate,Quantity,Dollars,Freight,Approval,FreightRate(%),FreightRatePerUnit
11,1485,CASTLE BRANDS CORP.,2016-01-08,8152,2015-12-23,2016-02-19,320,5420.41,179.26,,3.31,0.56
16,2242,DELICATO VINEYARDS INC,2016-01-06,8139,2015-12-22,2016-02-10,808,6646.46,127.05,,1.91,0.16
19,2555,DISARONNO INTERNATIONAL LLC,2016-01-11,8192,2015-12-25,2016-02-17,385,3506.41,146.86,,4.19,0.38
23,2561,EDRINGTON AMERICAS,2016-01-08,8175,2015-12-24,2016-02-10,136,5645.24,218.18,,3.86,1.6
28,3924,HEAVEN HILL DISTILLERIES,2016-01-08,8155,2015-12-23,2016-02-15,818,7079.02,200.02,,2.83,0.24


In [109]:
dfLargeVendors.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Freight,Dollars,Quantity,FreightRate(%),FreightPerUnit
VendorNumber,VendorName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
9625,WESTERN SPIRITS BEVERAGE CO,1933.19,361249.21,56860,0.54,0.03
1590,DIAGEO CHATEAU ESTATE WINES,7259.75,1365472.83,187841,0.53,0.04
9744,FREDERICK WILDMAN & SONS,3999.93,759449.24,70932,0.53,0.06
653,STATE WINE & SPIRITS,8014.98,1529682.04,154092,0.52,0.05
17031,FLAG HILL WINERY & VINEYARD,1573.31,300403.2,20608,0.52,0.08


In [None]:
pd.DataFrame([['Freight % of $', dfSmallExpensiveInvoices['FreightRate(%)'].mean(), dfLargeVendors['FreightRate(%)'].mean, purchases['FreightRate(%)'],mean()],])

# Deliverable 5

In [None]:
dfSalesPrice = pd.read_csv