# Import PANDAS Library

Notice that we are using pd as an abbreviation so each time we want to use the PANDAS library, we can simply type pd instead of pandas.

In [1]:
import pandas as pd

# Import Dataset (either full dataset or simplified)

You can use either set of datafiles (full or simplified). In the past some students' computers could not handle the full dataset. If you worry that your computer will not handle the full dataset (or if you find your code is running very slow), use the simplified version. 

The simplified version was created by taking a sample of the full data; hence the simplified files have "Sample" added to the end of the file names. Some files were not large to begin with and didn't need to be reduced. Those files do not have Sample added to the end of the file name.

## Adjust the file address below to match your computer.

You will want to replace the begining of the address below (i.e., everything before OneDrive - Oregon State University/)

In [2]:
begInv = pd.read_csv("BegInvFINAL12-31-16Sample.csv")

In [3]:
begInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,onHand,Price,startDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,12.99,2016-01-01
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,10.99,2016-01-01


## Deliverable 1a

In [4]:
begInv = begInv.rename(columns = {'onHand':'01_01_2016_Qty'})
begInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,01_01_2016_Qty,Price,startDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,12.99,2016-01-01
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,10.99,2016-01-01


In [5]:
begInv['InvCost01_01_2016'] = begInv['01_01_2016_Qty'] * begInv['Price']
begInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,01_01_2016_Qty,Price,startDate,InvCost01_01_2016
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,12.99,2016-01-01,103.92
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,10.99,2016-01-01,76.93


In [6]:
begInv_Store = begInv[['Store','01_01_2016_Qty','InvCost01_01_2016']].groupby('Store').sum()
begInv_Store.head(2)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
1,49917,838079.59
2,52925,840452.2


### One way to get this is to sort the table in decending order and use head(10) to show the ten highest

In [7]:
begInv_Store.sort_values(by='InvCost01_01_2016',ascending=False).head(10)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
34,153852,3291170.24
73,162551,3142497.36
67,158996,3079578.63
66,149314,2973033.9
76,140208,2952418.44
69,144255,2946726.65
38,114368,2232698.77
55,119641,2001263.66
50,94720,1649808.22
79,95330,1503149.48


### Alternative way to get the same answer

In [8]:
begInv_Store.sort_values(by='InvCost01_01_2016').tail(10).sort_values(by='InvCost01_01_2016', ascending = False)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
34,153852,3291170.24
73,162551,3142497.36
67,158996,3079578.63
66,149314,2973033.9
76,140208,2952418.44
69,144255,2946726.65
38,114368,2232698.77
55,119641,2001263.66
50,94720,1649808.22
79,95330,1503149.48


## Deliverable 1b

In [9]:
begInv_Brand = begInv[['Brand','01_01_2016_Qty','InvCost01_01_2016']].groupby('Brand').sum()
begInv_Brand.head(2)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
58,281,3650.19
60,288,3165.12


In [10]:
begInv_Brand.sort_values(by='InvCost01_01_2016',ascending=False).head(10)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
3545,14499,463823.01
1233,12016,432455.84
8068,15341,383371.59
4261,15499,340823.01
3858,13649,327439.51
2753,4625,286703.75
8082,9287,278517.13
8680,7066,275503.34
2589,6766,270572.34
3876,14829,266773.71


## Deliverable 2a

In [11]:
endInv = pd.read_csv("EndInvFINAL12-31-16Sample.csv")

In [12]:
endInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,onHand,Price,endDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,12.99,2016-12-31
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,36.99,2016-12-31


In [13]:
endInv = endInv.rename(columns = {'onHand':'12_31_2016_Qty'})
endInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,12_31_2016_Qty,Price,endDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,12.99,2016-12-31
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,36.99,2016-12-31


In [14]:
endInv['InvCost12_31_2016'] = endInv['12_31_2016_Qty'] * endInv['Price']
endInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,12_31_2016_Qty,Price,endDate,InvCost12_31_2016
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,12.99,2016-12-31,142.89
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,36.99,2016-12-31,258.93


In [15]:
endInv_Store = endInv[['Store','12_31_2016_Qty','InvCost12_31_2016']].groupby('Store').sum()
endInv_Store.head(2)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
1,79827,1206845.93
2,56671,850884.06


In [16]:
endInv_Store.sort_values(by='InvCost12_31_2016',ascending=False).head(10)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
50,260717,4887260.68
73,164589,3254662.81
67,163765,3076114.82
34,145829,3074616.75
76,143866,2975945.18
69,150848,2968678.82
66,144579,2860504.99
74,166015,2803645.13
38,129397,2463906.85
55,125584,2234836.35


## Deliverable 2b

In [17]:
endInv_Brand = endInv[['Brand','12_31_2016_Qty','InvCost12_31_2016']].groupby('Brand').sum()
endInv_Brand.head(2)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
58,385,5001.15
60,146,1604.54


In [18]:
endInv_Brand.sort_values(by='InvCost12_31_2016',ascending=False).head(10)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
1233,15047,526494.53
3545,16770,502932.3
2753,7849,470861.51
8068,15608,366631.92
3405,12268,355649.32
4261,16769,351981.31
2757,11603,336370.97
2589,7922,300956.78
1376,13180,276648.2
2585,10487,272557.13


## Deliverable 3

In [19]:
merge_inv = pd.merge(left=begInv[['InventoryId','01_01_2016_Qty','InvCost01_01_2016']],
                     right=endInv[['InventoryId','12_31_2016_Qty','InvCost12_31_2016']],
                     how='outer',
                     on=['InventoryId'])
merge_inv.head()

Unnamed: 0,InventoryId,01_01_2016_Qty,InvCost01_01_2016,12_31_2016_Qty,InvCost12_31_2016
0,1_HARDERSFIELD_58,8.0,103.92,11.0,142.89
1,1_HARDERSFIELD_60,7.0,76.93,,
2,1_HARDERSFIELD_62,6.0,221.94,7.0,258.93
3,1_HARDERSFIELD_63,3.0,116.97,7.0,272.93
4,1_HARDERSFIELD_72,6.0,209.94,4.0,139.96


In [20]:
merge_inv = merge_inv.fillna(0)
merge_inv.head()

Unnamed: 0,InventoryId,01_01_2016_Qty,InvCost01_01_2016,12_31_2016_Qty,InvCost12_31_2016
0,1_HARDERSFIELD_58,8.0,103.92,11.0,142.89
1,1_HARDERSFIELD_60,7.0,76.93,0.0,0.0
2,1_HARDERSFIELD_62,6.0,221.94,7.0,258.93
3,1_HARDERSFIELD_63,3.0,116.97,7.0,272.93
4,1_HARDERSFIELD_72,6.0,209.94,4.0,139.96


## Deliverable 4a

In [21]:
purchases = pd.read_csv("PurchasesFINAL12-31-16Sample.csv")
purchases.head()

Unnamed: 0,InventoryId,Store,Brand,Description,Size,VendorNumber,VendorName,PONumber,PODate,ReceivingDate,InvoiceDate,PayDate,PurchasePrice,Quantity,Dollars,Classification
0,69_MOUNTMEND_8412,69,8412,Tequila Ocho Plata Fresno,750mL,105,ALTAMAR BRANDS LLC,8124,2015-12-21,2016-01-02,2016-01-04,2016-02-16,35.71,6,214.26,1
1,34_PITMERDEN_5215,34,5215,TGI Fridays Long Island Iced,1.75L,4466,AMERICAN VINTAGE BEVERAGE,8137,2015-12-22,2016-01-02,2016-01-07,2016-02-21,9.41,5,47.05,1
2,76_DONCASTER_2034,76,2034,Glendalough Double Barrel,750mL,388,ATLANTIC IMPORTING COMPANY,8169,2015-12-24,2016-01-02,2016-01-09,2016-02-16,21.32,5,106.6,1
3,5_SUTTON_3348,5,3348,Bombay Sapphire Gin,1.75L,480,BACARDI USA INC,8106,2015-12-20,2016-01-02,2016-01-12,2016-02-05,22.38,6,134.28,1
4,30_CULCHETH_4903,30,4903,Bacardi Superior Rum,200mL,480,BACARDI USA INC,8106,2015-12-20,2016-01-01,2016-01-12,2016-02-05,2.87,48,137.76,1


In [22]:
purchases[["VendorName","Dollars"]].groupby(["VendorName"]).sum().sort_values(by='Dollars' , ascending=False).head(10)

Unnamed: 0_level_0,Dollars
VendorName,Unnamed: 1_level_1
DIAGEO NORTH AMERICA INC,21315299.75
MARTIGNETTI COMPANIES,11709222.19
JIM BEAM BRANDS COMPANY,10232521.65
PERNOD RICARD USA,10224899.93
BACARDI USA INC,7420630.63
CONSTELLATION BRANDS INC,6584032.72
BROWN-FORMAN CORP,5719375.62
ULTRA BEVERAGE COMPANY LLP,5545766.29
E & J GALLO WINERY,5177067.61
M S WALKER INC,4647908.17


## Deliverable 4b

In [23]:
invoice = pd.read_csv("InvoicePurchases12-31-16Sample.csv")
invoice.head()


Unnamed: 0,VendorNumber,VendorName,InvoiceDate,PONumber,PODate,PayDate,Quantity,Dollars,Freight,Approval
0,105,ALTAMAR BRANDS LLC,2016-01-04,8124,2015-12-21,2016-02-16,6,214.26,3.47,
1,4466,AMERICAN VINTAGE BEVERAGE,2016-01-07,8137,2015-12-22,2016-02-21,15,140.55,8.57,
2,388,ATLANTIC IMPORTING COMPANY,2016-01-09,8169,2015-12-24,2016-02-16,5,106.6,4.61,
3,480,BACARDI USA INC,2016-01-12,8106,2015-12-20,2016-02-05,10100,137483.78,2935.2,
4,516,BANFI PRODUCTS CORP,2016-01-07,8170,2015-12-24,2016-02-12,1935,15527.25,429.2,


In [24]:
invoice[["VendorName","Freight"]].groupby(["VendorName"]).sum().sort_values(by='Freight' , ascending=False).head(10)

Unnamed: 0_level_0,Freight
VendorName,Unnamed: 1_level_1
DIAGEO NORTH AMERICA INC,257032.07
MARTIGNETTI COMPANIES,144719.92
JIM BEAM BRANDS COMPANY,123880.97
PERNOD RICARD USA,123780.22
BACARDI USA INC,89286.27
CONSTELLATION BRANDS INC,79528.99
BROWN-FORMAN CORP,68601.68
ULTRA BEVERAGE COMPANY LLP,68054.7
E & J GALLO WINERY,61966.91
M S WALKER INC,55551.82


## Deliverable 4c

In [25]:
table1 = invoice[["VendorName","Freight","Dollars"]].groupby(["VendorName"]).sum()
table1.head()

Unnamed: 0_level_0,Freight,Dollars
VendorName,Unnamed: 1_level_1,Unnamed: 2_level_1
AAPER ALCOHOL & CHEMICAL CO,0.48,105.07
ADAMBA IMPORTS INTL INC,367.52,76770.25
ALISA CARR BEVERAGES,172.0,34951.68
ALTAMAR BRANDS LLC,62.39,11706.2
AMERICAN SPIRITS EXCHANGE,6.19,1205.16


In [26]:
highestfreight = table1[table1["Dollars"] >= 250000]
highestfreight.head()

Unnamed: 0_level_0,Freight,Dollars
VendorName,Unnamed: 1_level_1,Unnamed: 2_level_1
BACARDI USA INC,89286.27,17624378.72
BANFI PRODUCTS CORP,8510.41,1628866.68
BROWN-FORMAN CORP,68601.68,13529433.08
CALEDONIA SPIRITS INC,1319.77,259604.7
CAMPARI AMERICA,20964.81,4141720.71


In [27]:
highestfreight['FreightperDollar'] = (highestfreight["Freight"] / highestfreight["Dollars"])
highestfreight.sort_values(by='FreightperDollar', ascending=False).head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  highestfreight['FreightperDollar'] = (highestfreight["Freight"] / highestfreight["Dollars"])


Unnamed: 0_level_0,Freight,Dollars,FreightperDollar
VendorName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
WESTERN SPIRITS BEVERAGE CO,1933.19,361249.21,0.005351
DIAGEO CHATEAU ESTATE WINES,7259.75,1365472.83,0.005317
FREDERICK WILDMAN & SONS,3999.93,759449.24,0.005267
STATE WINE & SPIRITS,8014.98,1529682.04,0.00524
FLAG HILL WINERY & VINEYARD,1573.31,300403.2,0.005237
BANFI PRODUCTS CORP,8510.41,1628866.68,0.005225
MARTIGNETTI COMPANIES,144719.92,27821473.91,0.005202
STE MICHELLE WINE ESTATES,15919.7,3086650.7,0.005158
WINE GROUP INC,27100.41,5258636.79,0.005154
ULTRA BEVERAGE COMPANY LLP,68054.7,13210613.93,0.005152


In [28]:
highestfreight.sort_values(by='FreightperDollar', ascending=True).head(5)

Unnamed: 0_level_0,Freight,Dollars,FreightperDollar
VendorName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
OLE SMOKY DISTILLERY LLC,1922.0,387622.69,0.004958
TREASURY WINE ESTATES,14836.57,2978686.4,0.004981
SIDNEY FRANK IMPORTING CO,8549.55,1715908.88,0.004983
HEAVEN HILL DISTILLERIES,14069.87,2816661.94,0.004995
LUXCO INC,10261.6,2051436.01,0.005002


## Deliverable 4d

In [29]:
invoice.head()

Unnamed: 0,VendorNumber,VendorName,InvoiceDate,PONumber,PODate,PayDate,Quantity,Dollars,Freight,Approval
0,105,ALTAMAR BRANDS LLC,2016-01-04,8124,2015-12-21,2016-02-16,6,214.26,3.47,
1,4466,AMERICAN VINTAGE BEVERAGE,2016-01-07,8137,2015-12-22,2016-02-21,15,140.55,8.57,
2,388,ATLANTIC IMPORTING COMPANY,2016-01-09,8169,2015-12-24,2016-02-16,5,106.6,4.61,
3,480,BACARDI USA INC,2016-01-12,8106,2015-12-20,2016-02-05,10100,137483.78,2935.2,
4,516,BANFI PRODUCTS CORP,2016-01-07,8170,2015-12-24,2016-02-12,1935,15527.25,429.2,


In [30]:
table2 = invoice[(invoice["Freight"] >= 100) & (invoice["Quantity"] <= 1000)]
table2.head()

Unnamed: 0,VendorNumber,VendorName,InvoiceDate,PONumber,PODate,PayDate,Quantity,Dollars,Freight,Approval
11,1485,CASTLE BRANDS CORP.,2016-01-08,8152,2015-12-23,2016-02-19,320,5420.41,179.26,
16,2242,DELICATO VINEYARDS INC,2016-01-06,8139,2015-12-22,2016-02-10,808,6646.46,127.05,
19,2555,DISARONNO INTERNATIONAL LLC,2016-01-11,8192,2015-12-25,2016-02-17,385,3506.41,146.86,
23,2561,EDRINGTON AMERICAS,2016-01-08,8175,2015-12-24,2016-02-10,136,5645.24,218.18,
28,3924,HEAVEN HILL DISTILLERIES,2016-01-08,8155,2015-12-23,2016-02-15,818,7079.02,200.02,


In [31]:
table2[["VendorName","Freight"]].groupby(["VendorName"]).sum().sort_values(by='Freight', ascending=False).head(10)

Unnamed: 0_level_0,Freight
VendorName,Unnamed: 1_level_1
EDRINGTON AMERICAS,2944.67
"STOLI GROUP,(USA) LLC",349.02
REMY COINTREAU USA INC,348.07
STATE WINE & SPIRITS,303.7
PALM BAY INTERNATIONAL INC,242.73
MAJESTIC FINE WINES,218.05
HEAVEN HILL DISTILLERIES,200.02
LUXCO INC,196.61
KOBRAND CORPORATION,185.93
CASTLE BRANDS CORP.,179.26


In [33]:
table2['FreightperDollar'] = (table2["Freight"] / table2["Dollars"])
table2['FreightperUnit'] = (table2["Freight"] / table2["Quantity"])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  table2['FreightperDollar'] = (table2["Freight"] / table2["Dollars"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  table2['FreightperUnit'] = (table2["Freight"] / table2["Quantity"])


In [35]:
table2[["VendorName","Freight","Dollars","Quantity","FreightperDollar","FreightperUnit"]].sort_values(by='FreightperDollar', ascending=False).head(10)

Unnamed: 0,VendorName,Freight,Dollars,Quantity,FreightperDollar,FreightperUnit
69,STATE WINE & SPIRITS,303.7,5544.89,541,0.054771,0.561368
59,REMY COINTREAU USA INC,348.07,7515.38,382,0.046314,0.911178
50,PALM BAY INTERNATIONAL INC,242.73,5533.18,715,0.043868,0.339483
19,DISARONNO INTERNATIONAL LLC,146.86,3506.41,385,0.041883,0.381455
23,EDRINGTON AMERICAS,218.18,5645.24,136,0.038648,1.604265
45,MHW LTD,177.5,4821.83,412,0.036812,0.430825
34,KOBRAND CORPORATION,185.93,5336.48,427,0.034841,0.435433
11,CASTLE BRANDS CORP.,179.26,5420.41,320,0.033071,0.560187
38,LUXCO INC,196.61,6139.47,819,0.032024,0.240061
40,MAJESTIC FINE WINES,218.05,7234.88,875,0.030139,0.2492


The amounts are signficantly higher than the amounts in Deliverable 4c

## Deliverable 5

In [68]:
Sales = pd.read_csv("SalesFINAL12-31-16Sample.csv")

In [69]:
Sales.head()

Unnamed: 0,InventoryId,Store,Brand,Description,Size,SalesQuantity,SalesDollars,SalesPrice,SalesDate,Volume,Classification,ExciseTax,VendorNo,VendorName
0,1_HARDERSFIELD_1009,1,1009,Rebel Yell Variety Pack,750mL 3 Pk,1,49.99,49.99,2016-01-02,750.0,1,0.79,8352,LUXCO INC
1,1_HARDERSFIELD_10238,1,10238,Layer Cake Primitivo Puglia,750mL,2,31.98,15.99,2016-01-02,750.0,2,0.22,4425,MARTIGNETTI COMPANIES
2,1_HARDERSFIELD_10239,1,10239,Cannonball Cab Svgn Cal,750mL,1,13.99,13.99,2016-01-02,750.0,2,0.11,4425,MARTIGNETTI COMPANIES
3,1_HARDERSFIELD_10266,1,10266,Klinker Brick Old Vine Znfdl,750mL,1,16.99,16.99,2016-01-09,750.0,2,0.11,9552,M S WALKER INC
4,1_HARDERSFIELD_1029,1,1029,Fulton's Harvest Apple Pie L,750mL,1,9.99,9.99,2016-01-03,750.0,1,0.79,3924,HEAVEN HILL DISTILLERIES


In [70]:
invoices = pd.read_csv("InvoicePurchases12-31-16Sample.csv")
invoices.head()

Unnamed: 0,VendorNumber,VendorName,InvoiceDate,PONumber,PODate,PayDate,Quantity,Dollars,Freight,Approval
0,105,ALTAMAR BRANDS LLC,2016-01-04,8124,2015-12-21,2016-02-16,6,214.26,3.47,
1,4466,AMERICAN VINTAGE BEVERAGE,2016-01-07,8137,2015-12-22,2016-02-21,15,140.55,8.57,
2,388,ATLANTIC IMPORTING COMPANY,2016-01-09,8169,2015-12-24,2016-02-16,5,106.6,4.61,
3,480,BACARDI USA INC,2016-01-12,8106,2015-12-20,2016-02-05,10100,137483.78,2935.2,
4,516,BANFI PRODUCTS CORP,2016-01-07,8170,2015-12-24,2016-02-12,1935,15527.25,429.2,


In [71]:
merge_sales = pd.merge(left=Sales[['VendorName','SalesDollars']],
                     right=invoices[['VendorName','Dollars']],
                     how='outer',
                     on=["VendorName"])
merge_sales.head()

Unnamed: 0,VendorName,SalesDollars,Dollars
0,LUXCO INC,49.99,6139.47
1,LUXCO INC,49.99,43820.57
2,LUXCO INC,49.99,28477.46
3,LUXCO INC,49.99,26348.0
4,LUXCO INC,49.99,25458.26


In [72]:
merge_sales.groupby("VendorName").sum().sort_values(by='SalesDollars', ascending=False).head(10)

Unnamed: 0_level_0,SalesDollars,Dollars
VendorName,Unnamed: 1_level_1,Unnamed: 2_level_1
DIAGEO NORTH AMERICA INC,293364100.0,5897577000000.0
MARTIGNETTI COMPANIES,174298600.0,2037200000000.0
JIM BEAM BRANDS COMPANY,136822100.0,2125884000000.0
PERNOD RICARD USA,134713100.0,1140008000000.0
BACARDI USA INC,106438500.0,735606300000.0
CONSTELLATION BRANDS INC,105311800.0,1046567000000.0
E & J GALLO WINERY,79578150.0,719433700000.0
BROWN-FORMAN CORP,79060020.0,375103500000.0
ULTRA BEVERAGE COMPANY LLP,77300190.0,533880500000.0
M S WALKER INC,66316820.0,486064300000.0


In [74]:
merge_sales['SalesDollar/Dollar'] = (merge_sales["SalesDollars"] / merge_sales["Dollars"])
merge_sales.head()

Unnamed: 0,VendorName,SalesDollars,Dollars,SalesDollar/Dollar
0,LUXCO INC,49.99,6139.47,0.008142
1,LUXCO INC,49.99,43820.57,0.001141
2,LUXCO INC,49.99,28477.46,0.001755
3,LUXCO INC,49.99,26348.0,0.001897
4,LUXCO INC,49.99,25458.26,0.001964


In [77]:
merge_sales.groupby("VendorName").sum().sort_values(by='SalesDollar/Dollar', ascending=False).head(10)

Unnamed: 0_level_0,SalesDollars,Dollars,SalesDollar/Dollar
VendorName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KOBRAND CORPORATION,12379296.05,10640940000.0,16131.286314
MANGO BOTTLING INC,754382.84,209009600.0,5464.385697
FLAG HILL WINERY & VINEYARD,1752362.15,367092700.0,2804.654914
AMERICAN VINTAGE BEVERAGE,815187.45,111482700.0,2082.460167
SIDNEY FRANK IMPORTING CO,9779208.45,11053890000.0,1293.894669
"HOOD RIVER DISTILLERS, Inc.",96328.8,1129690.0,1065.342151
LATITUDE BEVERAGE COMPANY,2075870.5,512643800.0,1044.631689
CAMPARI AMERICA,24747148.25,59359140000.0,1002.289362
PHILLIPS PRODUCTS CO.,2598686.75,828278000.0,1000.973168
NICHE W & S,503560.75,53184220.0,986.535867
