# Import PANDAS Library

Notice that we are using pd as an abbreviation so each time we want to use the PANDAS library, we can simply type pd instead of pandas.

In [1]:
import pandas as pd
pd.options.display.float_format = '${:,.2f}'.format


# Import Dataset (either full dataset or simplified)

You can use either set of datafiles (full or simplified). In the past some students' computers could not handle the full dataset. If you worry that your computer will not handle the full dataset (or if you find your code is running very slow), use the simplified version. 

The simplified version was created by taking a sample of the full data; hence the simplified files have "Sample" added to the end of the file names. Some files were not large to begin with and didn't need to be reduced. Those files do not have Sample added to the end of the file name.

## Adjust the file address below to match your computer.

You will want to replace the begining of the address below (i.e., everything before OneDrive - Oregon State University/)

In [2]:
begInv = pd.read_csv("BegInvFINAL12312016Sample.csv")

# Deliverable #1a

In [3]:
begInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,onHand,Price,startDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,$12.99,2016-01-01
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,$10.99,2016-01-01


In [4]:
begInv = begInv.rename(columns = {'onHand':'01_01_2016_Qty'})
begInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,01_01_2016_Qty,Price,startDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,$12.99,2016-01-01
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,$10.99,2016-01-01


In [5]:
begInv['InvCost01_01_2016'] = begInv['01_01_2016_Qty'] * begInv['Price']
begInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,01_01_2016_Qty,Price,startDate,InvCost01_01_2016
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,$12.99,2016-01-01,$103.92
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,$10.99,2016-01-01,$76.93


In [6]:
begInv_Store = begInv[['Store','01_01_2016_Qty','InvCost01_01_2016']].groupby('Store').sum()
begInv_Store.head(2)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
1,49917,"$838,079.59"
2,52925,"$840,452.20"


In [7]:
begInv_Store.sort_values(by='InvCost01_01_2016',ascending=False).head(10)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
34,153852,"$3,291,170.24"
73,162551,"$3,142,497.36"
67,158996,"$3,079,578.63"
66,149314,"$2,973,033.90"
76,140208,"$2,952,418.44"
69,144255,"$2,946,726.65"
38,114368,"$2,232,698.77"
55,119641,"$2,001,263.66"
50,94720,"$1,649,808.22"
79,95330,"$1,503,149.48"


### Alternative way to sort the table in ascending order

In [8]:
begInv_Store.sort_values(by='InvCost01_01_2016').tail(10).sort_values(by='InvCost01_01_2016', ascending = False)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
34,153852,"$3,291,170.24"
73,162551,"$3,142,497.36"
67,158996,"$3,079,578.63"
66,149314,"$2,973,033.90"
76,140208,"$2,952,418.44"
69,144255,"$2,946,726.65"
38,114368,"$2,232,698.77"
55,119641,"$2,001,263.66"
50,94720,"$1,649,808.22"
79,95330,"$1,503,149.48"


# Deliverable #1b

In [9]:
begInv_Brand = begInv[['Brand','01_01_2016_Qty','InvCost01_01_2016']].groupby('Brand').sum()
begInv_Brand.head(2)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
58,281,"$3,650.19"
60,288,"$3,165.12"


In [10]:
begInv_Brand.sort_values(by='InvCost01_01_2016',ascending=False).head(10)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
3545,14499,"$463,823.01"
1233,12016,"$432,455.84"
8068,15341,"$383,371.59"
4261,15499,"$340,823.01"
3858,13649,"$327,439.51"
2753,4625,"$286,703.75"
8082,9287,"$278,517.13"
8680,7066,"$275,503.34"
2589,6766,"$270,572.34"
3876,14829,"$266,773.71"


# Deliverable #2a

In [11]:
endInv = pd.read_csv("EndInvFINAL12312016.csv")

In [12]:
endInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,onHand,Price,endDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,$12.99,2016-12-31
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,$36.99,2016-12-31


In [13]:
endInv = endInv.rename(columns = {'onHand':'12_31_2016_Qty'})
endInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,12_31_2016_Qty,Price,endDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,$12.99,2016-12-31
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,$36.99,2016-12-31


In [14]:
endInv['InvCost12_31_2016'] = endInv['12_31_2016_Qty'] * endInv['Price']
endInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,12_31_2016_Qty,Price,endDate,InvCost12_31_2016
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,$12.99,2016-12-31,$142.89
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,$36.99,2016-12-31,$258.93


In [15]:
endInv_Store = endInv[['Store','12_31_2016_Qty','InvCost12_31_2016']].groupby('Store').sum()
endInv_Store.head(2)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
1,79827,"$1,206,845.93"
2,56671,"$850,884.06"


In [16]:
endInv_Store.sort_values(by='InvCost12_31_2016',ascending=False).head(10)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
50,260717,"$4,887,260.68"
73,164589,"$3,254,662.81"
67,163765,"$3,076,114.82"
34,145829,"$3,074,616.75"
76,143866,"$2,975,945.18"
69,150848,"$2,968,678.82"
66,144579,"$2,860,504.99"
74,166015,"$2,803,645.13"
38,129397,"$2,463,906.85"
55,125584,"$2,234,836.35"


# Deliverable #2b

In [17]:
endInv_Brand = endInv[['Brand','12_31_2016_Qty','InvCost12_31_2016']].groupby('Brand').sum()
endInv_Brand.head(2)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
58,385,"$5,001.15"
60,146,"$1,604.54"


In [18]:
endInv_Brand.sort_values(by='InvCost12_31_2016',ascending=False).head(10)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
1233,15047,"$526,494.53"
3545,16770,"$502,932.30"
2753,7849,"$470,861.51"
8068,15608,"$366,631.92"
3405,12268,"$355,649.32"
4261,16769,"$351,981.31"
2757,11603,"$336,370.97"
2589,7922,"$300,956.78"
1376,13180,"$276,648.20"
2585,10487,"$272,557.13"


# Deliverable #3

In [19]:
begInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,01_01_2016_Qty,Price,startDate,InvCost01_01_2016
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,$12.99,2016-01-01,$103.92
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,$10.99,2016-01-01,$76.93


In [20]:
endInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,12_31_2016_Qty,Price,endDate,InvCost12_31_2016
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,$12.99,2016-12-31,$142.89
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,$36.99,2016-12-31,$258.93


In [21]:
mergetableA=pd.merge(left=begInv, right= endInv, how = "left", on= ['InventoryId'])
mergetableA= mergetableA[['InventoryId', '01_01_2016_Qty', 'InvCost01_01_2016', '12_31_2016_Qty', 'InvCost12_31_2016']]


In [22]:
mergetableA= mergetableA.fillna(0)

In [23]:
mergetableA.head()

Unnamed: 0,InventoryId,01_01_2016_Qty,InvCost01_01_2016,12_31_2016_Qty,InvCost12_31_2016
0,1_HARDERSFIELD_58,8,$103.92,$11.00,$142.89
1,1_HARDERSFIELD_60,7,$76.93,$0.00,$0.00
2,1_HARDERSFIELD_62,6,$221.94,$7.00,$258.93
3,1_HARDERSFIELD_63,3,$116.97,$7.00,$272.93
4,1_HARDERSFIELD_72,6,$209.94,$4.00,$139.96


# Deliverable #4a

In [24]:
purchases = pd.read_csv("PurchasesFINAL12312016Sample.csv")
purchases.head()

Unnamed: 0,InventoryId,Store,Brand,Description,Size,VendorNumber,VendorName,PONumber,PODate,ReceivingDate,InvoiceDate,PayDate,PurchasePrice,Quantity,Dollars,Classification
0,69_MOUNTMEND_8412,69,8412,Tequila Ocho Plata Fresno,750mL,105,ALTAMAR BRANDS LLC,8124,2015-12-21,2016-01-02,2016-01-04,2016-02-16,$35.71,6,$214.26,1
1,34_PITMERDEN_5215,34,5215,TGI Fridays Long Island Iced,1.75L,4466,AMERICAN VINTAGE BEVERAGE,8137,2015-12-22,2016-01-02,2016-01-07,2016-02-21,$9.41,5,$47.05,1
2,76_DONCASTER_2034,76,2034,Glendalough Double Barrel,750mL,388,ATLANTIC IMPORTING COMPANY,8169,2015-12-24,2016-01-02,2016-01-09,2016-02-16,$21.32,5,$106.60,1
3,5_SUTTON_3348,5,3348,Bombay Sapphire Gin,1.75L,480,BACARDI USA INC,8106,2015-12-20,2016-01-02,2016-01-12,2016-02-05,$22.38,6,$134.28,1
4,30_CULCHETH_4903,30,4903,Bacardi Superior Rum,200mL,480,BACARDI USA INC,8106,2015-12-20,2016-01-01,2016-01-12,2016-02-05,$2.87,48,$137.76,1


In [25]:
purchases[["InventoryId", "VendorNumber", "VendorName", "Dollars"]].groupby(["VendorNumber","VendorName"]).sum().sort_values(by="Dollars",ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,InventoryId,Dollars
VendorNumber,VendorName,Unnamed: 2_level_1,Unnamed: 3_level_1
3960,DIAGEO NORTH AMERICA INC,1_HARDERSFIELD_246876_DONCASTER_872634_PITMERD...,"$21,315,299.75"
4425,MARTIGNETTI COMPANIES,69_MOUNTMEND_2533754_GUTHRAM_696767_EANVERNESS...,"$11,709,222.19"
12546,JIM BEAM BRANDS COMPANY,50_MOUNTMEND_80661_HARDERSFIELD_419930_CULCHET...,"$10,232,521.65"
17035,PERNOD RICARD USA,54_GUTHRAM_258612_LEESIDE_2105518_FURNESS_5734...,"$10,224,899.93"
480,BACARDI USA INC,5_SUTTON_334830_CULCHETH_49031_HARDERSFIELD_38...,"$7,420,630.63"
1392,CONSTELLATION BRANDS INC,30_CULCHETH_3850216_LUNDY_1774815_WANBORNE_665...,"$6,584,032.72"
1128,BROWN-FORMAN CORP,1_HARDERSFIELD_123247_PELLA'S WISH_123215_WANB...,"$5,719,375.62"
9165,ULTRA BEVERAGE COMPANY LLP,47_PELLA'S WISH_201021_HARDERSFIELD_262576_DON...,"$5,545,766.29"
3252,E & J GALLO WINERY,7_STANMORE_3150269_MOUNTMEND_656938_GOULCREST_...,"$5,177,067.61"
9552,M S WALKER INC,16_LUNDY_376976_DONCASTER_81351_HARDERSFIELD_3...,"$4,647,908.17"


# Deliverable #4b

In [26]:
invoice_purchases = pd.read_csv("InvoicePurchases12312016.csv")
invoice_purchases.head()

Unnamed: 0,VendorNumber,VendorName,InvoiceDate,PONumber,PODate,PayDate,Quantity,Dollars,Freight,Approval
0,105,ALTAMAR BRANDS LLC,2016-01-04,8124,2015-12-21,2016-02-16,6,$214.26,$3.47,
1,4466,AMERICAN VINTAGE BEVERAGE,2016-01-07,8137,2015-12-22,2016-02-21,15,$140.55,$8.57,
2,388,ATLANTIC IMPORTING COMPANY,2016-01-09,8169,2015-12-24,2016-02-16,5,$106.60,$4.61,
3,480,BACARDI USA INC,2016-01-12,8106,2015-12-20,2016-02-05,10100,"$137,483.78","$2,935.20",
4,516,BANFI PRODUCTS CORP,2016-01-07,8170,2015-12-24,2016-02-12,1935,"$15,527.25",$429.20,


In [27]:
invoice_purchases[["VendorNumber", "VendorName", "Freight"]].groupby(["VendorNumber","VendorName"]).sum().sort_values(by="Freight",ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Freight
VendorNumber,VendorName,Unnamed: 2_level_1
3960,DIAGEO NORTH AMERICA INC,"$257,032.07"
4425,MARTIGNETTI COMPANIES,"$144,719.92"
12546,JIM BEAM BRANDS COMPANY,"$123,880.97"
17035,PERNOD RICARD USA,"$123,780.22"
480,BACARDI USA INC,"$89,286.27"
1392,CONSTELLATION BRANDS INC,"$79,528.99"
1128,BROWN-FORMAN CORP,"$68,601.68"
9165,ULTRA BEVERAGE COMPANY LLP,"$68,054.70"
3252,E & J GALLO WINERY,"$61,966.91"
9552,M S WALKER INC,"$55,551.82"


# Deliverable #4c

In [28]:
invoice_purchases.head()
vendor= invoice_purchases[["VendorNumber", "VendorName","Quantity", "Dollars", "Freight"]]
vendor.head()

Unnamed: 0,VendorNumber,VendorName,Quantity,Dollars,Freight
0,105,ALTAMAR BRANDS LLC,6,$214.26,$3.47
1,4466,AMERICAN VINTAGE BEVERAGE,15,$140.55,$8.57
2,388,ATLANTIC IMPORTING COMPANY,5,$106.60,$4.61
3,480,BACARDI USA INC,10100,"$137,483.78","$2,935.20"
4,516,BANFI PRODUCTS CORP,1935,"$15,527.25",$429.20


In [29]:
vendorGroup = vendor.groupby(["VendorNumber","VendorName"])[['Freight','Dollars','Quantity']].sum()
vendorGroup

Unnamed: 0_level_0,Unnamed: 1_level_0,Freight,Dollars,Quantity
VendorNumber,VendorName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,"IRA GOLDMAN AND WILLIAMS, LLP",$27.08,"$5,630.88",328
54,AAPER ALCOHOL & CHEMICAL CO,$0.48,$105.07,1
60,ADAMBA IMPORTS INTL INC,$367.52,"$76,770.25",4732
105,ALTAMAR BRANDS LLC,$62.39,"$11,706.20",332
200,AMERICAN SPIRITS EXCHANGE,$6.19,"$1,205.16",132
...,...,...,...,...
98450,Serralles Usa LLC,$856.02,"$168,993.61",10463
99166,STARK BREWING COMPANY,$130.09,"$25,961.04",1212
172662,SWEETWATER FARM,$178.34,"$34,708.03",1629
173357,TAMWORTH DISTILLING,$202.50,"$41,036.44",1990


In [30]:
vendor_large = vendorGroup[vendorGroup["Dollars"] > 250000]
vendor_large.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Freight,Dollars,Quantity
VendorNumber,VendorName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
480,BACARDI USA INC,"$89,286.27","$17,624,378.72",1427075
516,BANFI PRODUCTS CORP,"$8,510.41","$1,628,866.68",228103
653,STATE WINE & SPIRITS,"$8,014.98","$1,529,682.04",154092
660,SAZERAC NORTH AMERICA INC.,"$17,932.33","$3,537,977.55",503931
1128,BROWN-FORMAN CORP,"$68,601.68","$13,529,433.08",1006122


In [31]:
vendor_large['vendor_freight_per$']= (vendor_large['Freight'] / vendor_large['Dollars'])
vendor_large.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vendor_large['vendor_freight_per$']= (vendor_large['Freight'] / vendor_large['Dollars'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Freight,Dollars,Quantity,vendor_freight_per$
VendorNumber,VendorName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
480,BACARDI USA INC,"$89,286.27","$17,624,378.72",1427075,$0.01
516,BANFI PRODUCTS CORP,"$8,510.41","$1,628,866.68",228103,$0.01
653,STATE WINE & SPIRITS,"$8,014.98","$1,529,682.04",154092,$0.01
660,SAZERAC NORTH AMERICA INC.,"$17,932.33","$3,537,977.55",503931,$0.01
1128,BROWN-FORMAN CORP,"$68,601.68","$13,529,433.08",1006122,$0.01


In [32]:
vendor_large.sort_values(by="vendor_freight_per$",ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Freight,Dollars,Quantity,vendor_freight_per$
VendorNumber,VendorName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
9625,WESTERN SPIRITS BEVERAGE CO,"$1,933.19","$361,249.21",56860,$0.01
1590,DIAGEO CHATEAU ESTATE WINES,"$7,259.75","$1,365,472.83",187841,$0.01
9744,FREDERICK WILDMAN & SONS,"$3,999.93","$759,449.24",70932,$0.01
653,STATE WINE & SPIRITS,"$8,014.98","$1,529,682.04",154092,$0.01
17031,FLAG HILL WINERY & VINEYARD,"$1,573.31","$300,403.20",20608,$0.01
516,BANFI PRODUCTS CORP,"$8,510.41","$1,628,866.68",228103,$0.01
4425,MARTIGNETTI COMPANIES,"$144,719.92","$27,821,473.91",2637275,$0.01
8673,STE MICHELLE WINE ESTATES,"$15,919.70","$3,086,650.70",419822,$0.01
9815,WINE GROUP INC,"$27,100.41","$5,258,636.79",888385,$0.01
9165,ULTRA BEVERAGE COMPANY LLP,"$68,054.70","$13,210,613.93",1077527,$0.01


In [33]:
vendor_large.sort_values(by="vendor_freight_per$",ascending=False).tail(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Freight,Dollars,Quantity,vendor_freight_per$
VendorNumber,VendorName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
8352,LUXCO INC,"$10,261.60","$2,051,436.01",283260,$0.01
3924,HEAVEN HILL DISTILLERIES,"$14,069.87","$2,816,661.94",352348,$0.00
3089,SIDNEY FRANK IMPORTING CO,"$8,549.55","$1,715,908.88",186464,$0.00
9819,TREASURY WINE ESTATES,"$14,836.57","$2,978,686.40",497770,$0.00
6359,OLE SMOKY DISTILLERY LLC,"$1,922.00","$387,622.69",110162,$0.00


# Deliverable #4d

In [34]:
vendor.head()

Unnamed: 0,VendorNumber,VendorName,Quantity,Dollars,Freight
0,105,ALTAMAR BRANDS LLC,6,$214.26,$3.47
1,4466,AMERICAN VINTAGE BEVERAGE,15,$140.55,$8.57
2,388,ATLANTIC IMPORTING COMPANY,5,$106.60,$4.61
3,480,BACARDI USA INC,10100,"$137,483.78","$2,935.20"
4,516,BANFI PRODUCTS CORP,1935,"$15,527.25",$429.20


In [35]:
freight_large = vendor[(vendor['Freight'] >100)]
unit_large = freight_large[(freight_large['Quantity'] <= 1000)]

#mergetableC[["VendorNumber", "VendorName_y","Quantity_y","Freight"]].sort_values(by='Freight',ascending=False)
#mergetableC[["VendorNumber", "VendorName_y","Quantity_y", "Freight"]].groupby(["VendorNumber","VendorName_y"]).sum().sort_values(by='Freight',ascending=False)


In [36]:
unit_large.groupby(["VendorNumber","VendorName"]).sum().sort_values(by="Freight",ascending=False).head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Quantity,Dollars,Freight
VendorNumber,VendorName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2561,EDRINGTON AMERICAS,18120,"$570,627.52","$2,944.67"
8664,"STOLI GROUP,(USA) LLC",975,"$14,299.98",$349.02
7239,REMY COINTREAU USA INC,382,"$7,515.38",$348.07
653,STATE WINE & SPIRITS,541,"$5,544.89",$303.70
6785,PALM BAY INTERNATIONAL INC,715,"$5,533.18",$242.73


In [37]:
#mergetableC[["VendorNumber", "VendorName","Quantity", "Freight"]].groupby(["VendorNumber","VendorName"]).sum().sort_values(by='Freight',ascending=False).head(3)


# Deliverable #4e

In [38]:
NewTable=unit_large[["VendorNumber", "VendorName","Quantity", "Dollars", "Freight"]]
NewTable["Freight_per_unit"] = (unit_large["Freight"] / unit_large["Quantity"])
NewTable["Freight_per_dollar"] = (unit_large["Freight"] / unit_large["Dollars"])


In [39]:
NewTable.groupby(["VendorNumber","VendorName"]).sum().sort_values(by="Freight_per_unit",ascending=False).head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Quantity,Dollars,Freight,Freight_per_unit,Freight_per_dollar
VendorNumber,VendorName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2561,EDRINGTON AMERICAS,18120,"$570,627.52","$2,944.67",$4.82,$0.14
7239,REMY COINTREAU USA INC,382,"$7,515.38",$348.07,$0.91,$0.05
653,STATE WINE & SPIRITS,541,"$5,544.89",$303.70,$0.56,$0.05
1485,CASTLE BRANDS CORP.,320,"$5,420.41",$179.26,$0.56,$0.03
4692,KOBRAND CORPORATION,427,"$5,336.48",$185.93,$0.44,$0.03


In [45]:
vendor_large.sort_values(by="vendor_freight_per$",ascending=False).head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Freight,Dollars,Quantity,vendor_freight_per$
VendorNumber,VendorName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
9625,WESTERN SPIRITS BEVERAGE CO,"$1,933.19","$361,249.21",56860,$0.01
1590,DIAGEO CHATEAU ESTATE WINES,"$7,259.75","$1,365,472.83",187841,$0.01
9744,FREDERICK WILDMAN & SONS,"$3,999.93","$759,449.24",70932,$0.01
653,STATE WINE & SPIRITS,"$8,014.98","$1,529,682.04",154092,$0.01
17031,FLAG HILL WINERY & VINEYARD,"$1,573.31","$300,403.20",20608,$0.01


In [44]:
#They are different because part c freight cost that are above 250k are smaller than freight cost for part e.

# Deliverable #5

In [41]:
#Compute the total sales dollar for each product and find the top 10 total sales dollar.

In [42]:
sales = pd.read_csv("SalesFINAL12312016Sample.csv")
sales.head()

Unnamed: 0,InventoryId,Store,Brand,Description,Size,SalesQuantity,SalesDollars,SalesPrice,SalesDate,Volume,Classification,ExciseTax,VendorNo,VendorName
0,1_HARDERSFIELD_1009,1,1009,Rebel Yell Variety Pack,750mL 3 Pk,1,$49.99,$49.99,2016-01-02,$750.00,1,$0.79,8352,LUXCO INC
1,1_HARDERSFIELD_10238,1,10238,Layer Cake Primitivo Puglia,750mL,2,$31.98,$15.99,2016-01-02,$750.00,2,$0.22,4425,MARTIGNETTI COMPANIES
2,1_HARDERSFIELD_10239,1,10239,Cannonball Cab Svgn Cal,750mL,1,$13.99,$13.99,2016-01-02,$750.00,2,$0.11,4425,MARTIGNETTI COMPANIES
3,1_HARDERSFIELD_10266,1,10266,Klinker Brick Old Vine Znfdl,750mL,1,$16.99,$16.99,2016-01-09,$750.00,2,$0.11,9552,M S WALKER INC
4,1_HARDERSFIELD_1029,1,1029,Fulton's Harvest Apple Pie L,750mL,1,$9.99,$9.99,2016-01-03,$750.00,1,$0.79,3924,HEAVEN HILL DISTILLERIES


In [43]:
sales[["InventoryId", "Description","SalesDollars"]].groupby(["InventoryId"]).sum().sort_values(by='SalesDollars',ascending=False).head(10)


Unnamed: 0_level_0,Description,SalesDollars
InventoryId,Unnamed: 1_level_1,Unnamed: 2_level_1
50_MOUNTMEND_1233,Jack Daniels No 7 BlackJack Daniels No 7 Black...,"$36,025.91"
76_DONCASTER_3405,Tito's Handmade VodkaTito's Handmade VodkaTito...,"$27,774.73"
73_DONCASTER_42188,Moet & Chandon Nectar Imp RsMoet & Chandon Nec...,"$25,088.37"
34_PITMERDEN_3545,Ketel One VodkaKetel One VodkaKetel One VodkaK...,"$23,449.41"
38_GOULCREST_3405,Tito's Handmade VodkaTito's Handmade VodkaTito...,"$23,281.27"
50_MOUNTMEND_4689,Hennessy Privilege VSOPHennessy Privilege VSOP...,"$23,107.12"
27_MOUNTMEND_3858,Grey Goose VodkaGrey Goose VodkaGrey Goose Vod...,"$22,970.56"
34_PITMERDEN_4689,Hennessy Privilege VSOPHennessy Privilege VSOP...,"$22,101.35"
73_DONCASTER_1233,Jack Daniels No 7 BlackJack Daniels No 7 Black...,"$21,841.06"
76_DONCASTER_1233,Jack Daniels No 7 BlackJack Daniels No 7 Black...,"$20,921.07"
