# Import PANDAS Library

Notice that we are using pd as an abbreviation so each time we want to use the PANDAS library, we can simply type pd instead of pandas.

In [1]:
import pandas as pd

# Import Dataset (either full dataset or simplified)

You can use either set of datafiles (full or simplified). In the past some students' computers could not handle the full dataset. If you worry that your computer will not handle the full dataset (or if you find your code is running very slow), use the simplified version. 

The simplified version was created by taking a sample of the full data; hence the simplified files have "Sample" added to the end of the file names. Some files were not large to begin with and didn't need to be reduced. Those files do not have Sample added to the end of the file name.

## Adjust the file address below to match your computer.

You will want to replace the begining of the address below (i.e., everything before OneDrive - Oregon State University/)

In [2]:
begInv = pd.read_csv("BegInvFINAL12-31-16Sample.csv")

In [3]:
begInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,onHand,Price,startDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,12.99,2016-01-01
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,10.99,2016-01-01


# Deliverable 1a

In [4]:
begInv = begInv.rename(columns = {'onHand':'01_01_2016_Qty'})
begInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,01_01_2016_Qty,Price,startDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,12.99,2016-01-01
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,10.99,2016-01-01


In [5]:
begInv['InvCost01_01_2016'] = begInv['01_01_2016_Qty'] * begInv['Price']
begInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,01_01_2016_Qty,Price,startDate,InvCost01_01_2016
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,12.99,2016-01-01,103.92
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,10.99,2016-01-01,76.93


In [6]:
begInv_Store = begInv[['Store','01_01_2016_Qty','InvCost01_01_2016']].groupby('Store').sum()
begInv_Store.head(2)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
1,49917,838079.59
2,52925,840452.2


One way to get this is to sort the table in descending order and use head(10) to show the ten highest.

In [7]:
begInv_Store.sort_values(by='InvCost01_01_2016',ascending=False).head(10)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
34,153852,3291170.24
73,162551,3142497.36
67,158996,3079578.63
66,149314,2973033.9
76,140208,2952418.44
69,144255,2946726.65
38,114368,2232698.77
55,119641,2001263.66
50,94720,1649808.22
79,95330,1503149.48


Alternative way to get the same answer

In [8]:
begInv_Store.sort_values(by='InvCost01_01_2016').tail(10).sort_values(by='InvCost01_01_2016', ascending = False)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
34,153852,3291170.24
73,162551,3142497.36
67,158996,3079578.63
66,149314,2973033.9
76,140208,2952418.44
69,144255,2946726.65
38,114368,2232698.77
55,119641,2001263.66
50,94720,1649808.22
79,95330,1503149.48


# Deliverable 1b

In [9]:
begInv_Brand = begInv[['Brand','01_01_2016_Qty','InvCost01_01_2016']].groupby('Brand').sum()
begInv_Brand.head(2)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
58,281,3650.19
60,288,3165.12


In [10]:
begInv_Brand.sort_values(by='InvCost01_01_2016',ascending=False).head(10)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
3545,14499,463823.01
1233,12016,432455.84
8068,15341,383371.59
4261,15499,340823.01
3858,13649,327439.51
2753,4625,286703.75
8082,9287,278517.13
8680,7066,275503.34
2589,6766,270572.34
3876,14829,266773.71


# Deliverable 2a

In [11]:
endInv = pd.read_csv("EndInvFINAL12-31-16Sample.csv")
endInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,onHand,Price,endDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,12.99,2016-12-31
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,36.99,2016-12-31


In [12]:
endInv = endInv.rename(columns = {'onHand':'12_31_2016_Qty'})
endInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,12_31_2016_Qty,Price,endDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,12.99,2016-12-31
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,36.99,2016-12-31


In [13]:
endInv['InvCost12_31_2016'] = endInv['12_31_2016_Qty'] * endInv['Price']
endInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,12_31_2016_Qty,Price,endDate,InvCost12_31_2016
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,12.99,2016-12-31,142.89
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,36.99,2016-12-31,258.93


In [14]:
endInv_Store = endInv[['Store','12_31_2016_Qty','InvCost12_31_2016']].groupby('Store').sum()
endInv_Store.head(2)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
1,79827,1206845.93
2,56671,850884.06


In [15]:
endInv_Store.sort_values(by='InvCost12_31_2016',ascending=False).head(10)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
50,260717,4887260.68
73,164589,3254662.81
67,163765,3076114.82
34,145829,3074616.75
76,143866,2975945.18
69,150848,2968678.82
66,144579,2860504.99
74,166015,2803645.13
38,129397,2463906.85
55,125584,2234836.35


# Deliverable 2b

In [16]:
endInv_Brand = endInv[['Brand','12_31_2016_Qty','InvCost12_31_2016']].groupby('Brand').sum()
endInv_Brand.head(2)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
58,385,5001.15
60,146,1604.54


In [17]:
endInv_Brand.sort_values(by='InvCost12_31_2016',ascending=False).head(10)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
1233,15047,526494.53
3545,16770,502932.3
2753,7849,470861.51
8068,15608,366631.92
3405,12268,355649.32
4261,16769,351981.31
2757,11603,336370.97
2589,7922,300956.78
1376,13180,276648.2
2585,10487,272557.13


# Deliverable 3

In [18]:
mergeA= pd.merge(left=endInv, right=begInv, how="outer",  on=["InventoryId"])

In [19]:
mergeA.head()

Unnamed: 0,InventoryId,Store_x,City_x,Brand_x,Description_x,Size_x,12_31_2016_Qty,Price_x,endDate,InvCost12_31_2016,Store_y,City_y,Brand_y,Description_y,Size_y,01_01_2016_Qty,Price_y,startDate,InvCost01_01_2016
0,1_HARDERSFIELD_58,1.0,HARDERSFIELD,58.0,Gekkeikan Black & Gold Sake,750mL,11.0,12.99,2016-12-31,142.89,1.0,HARDERSFIELD,58.0,Gekkeikan Black & Gold Sake,750mL,8.0,12.99,2016-01-01,103.92
1,1_HARDERSFIELD_62,1.0,HARDERSFIELD,62.0,Herradura Silver Tequila,750mL,7.0,36.99,2016-12-31,258.93,1.0,HARDERSFIELD,62.0,Herradura Silver Tequila,750mL,6.0,36.99,2016-01-01,221.94
2,1_HARDERSFIELD_63,1.0,HARDERSFIELD,63.0,Herradura Reposado Tequila,750mL,7.0,38.99,2016-12-31,272.93,1.0,HARDERSFIELD,63.0,Herradura Reposado Tequila,750mL,3.0,38.99,2016-01-01,116.97
3,1_HARDERSFIELD_72,1.0,HARDERSFIELD,72.0,No. 3 London Dry Gin,750mL,4.0,34.99,2016-12-31,139.96,1.0,HARDERSFIELD,72.0,No. 3 London Dry Gin,750mL,6.0,34.99,2016-01-01,209.94
4,1_HARDERSFIELD_75,1.0,HARDERSFIELD,75.0,Three Olives Tomato Vodka,750mL,7.0,14.99,2016-12-31,104.93,1.0,HARDERSFIELD,75.0,Three Olives Tomato Vodka,750mL,18.0,14.99,2016-01-01,269.82


In [20]:
mergeA ["BegTotal"] = (mergeA["01_01_2016_Qty"] * mergeA["Price_y"])

In [21]:
mergeA ["EndTotal"] = (mergeA["12_31_2016_Qty"] * mergeA["Price_x"])

In [22]:
mergeA.head()

Unnamed: 0,InventoryId,Store_x,City_x,Brand_x,Description_x,Size_x,12_31_2016_Qty,Price_x,endDate,InvCost12_31_2016,...,City_y,Brand_y,Description_y,Size_y,01_01_2016_Qty,Price_y,startDate,InvCost01_01_2016,BegTotal,EndTotal
0,1_HARDERSFIELD_58,1.0,HARDERSFIELD,58.0,Gekkeikan Black & Gold Sake,750mL,11.0,12.99,2016-12-31,142.89,...,HARDERSFIELD,58.0,Gekkeikan Black & Gold Sake,750mL,8.0,12.99,2016-01-01,103.92,103.92,142.89
1,1_HARDERSFIELD_62,1.0,HARDERSFIELD,62.0,Herradura Silver Tequila,750mL,7.0,36.99,2016-12-31,258.93,...,HARDERSFIELD,62.0,Herradura Silver Tequila,750mL,6.0,36.99,2016-01-01,221.94,221.94,258.93
2,1_HARDERSFIELD_63,1.0,HARDERSFIELD,63.0,Herradura Reposado Tequila,750mL,7.0,38.99,2016-12-31,272.93,...,HARDERSFIELD,63.0,Herradura Reposado Tequila,750mL,3.0,38.99,2016-01-01,116.97,116.97,272.93
3,1_HARDERSFIELD_72,1.0,HARDERSFIELD,72.0,No. 3 London Dry Gin,750mL,4.0,34.99,2016-12-31,139.96,...,HARDERSFIELD,72.0,No. 3 London Dry Gin,750mL,6.0,34.99,2016-01-01,209.94,209.94,139.96
4,1_HARDERSFIELD_75,1.0,HARDERSFIELD,75.0,Three Olives Tomato Vodka,750mL,7.0,14.99,2016-12-31,104.93,...,HARDERSFIELD,75.0,Three Olives Tomato Vodka,750mL,18.0,14.99,2016-01-01,269.82,269.82,104.93


In [23]:
mergeB = mergeA[["InventoryId", "01_01_2016_Qty", "BegTotal", "12_31_2016_Qty", "EndTotal"]]

In [24]:
mergeB.head()

Unnamed: 0,InventoryId,01_01_2016_Qty,BegTotal,12_31_2016_Qty,EndTotal
0,1_HARDERSFIELD_58,8.0,103.92,11.0,142.89
1,1_HARDERSFIELD_62,6.0,221.94,7.0,258.93
2,1_HARDERSFIELD_63,3.0,116.97,7.0,272.93
3,1_HARDERSFIELD_72,6.0,209.94,4.0,139.96
4,1_HARDERSFIELD_75,18.0,269.82,7.0,104.93


# Deliverable 4a

In [25]:
Purchases = pd.read_csv("PurchasesFINAL12-31-16Sample.csv")
Purchases.head(2)

Unnamed: 0,InventoryId,Store,Brand,Description,Size,VendorNumber,VendorName,PONumber,PODate,ReceivingDate,InvoiceDate,PayDate,PurchasePrice,Quantity,Dollars,Classification
0,69_MOUNTMEND_8412,69,8412,Tequila Ocho Plata Fresno,750mL,105,ALTAMAR BRANDS LLC,8124,2015-12-21,2016-01-02,2016-01-04,2016-02-16,35.71,6,214.26,1
1,34_PITMERDEN_5215,34,5215,TGI Fridays Long Island Iced,1.75L,4466,AMERICAN VINTAGE BEVERAGE,8137,2015-12-22,2016-01-02,2016-01-07,2016-02-21,9.41,5,47.05,1


In [26]:
Purchases = Purchases.rename(columns = {'Dollars':'TotalSold'})
Purchases.head(2)

Unnamed: 0,InventoryId,Store,Brand,Description,Size,VendorNumber,VendorName,PONumber,PODate,ReceivingDate,InvoiceDate,PayDate,PurchasePrice,Quantity,TotalSold,Classification
0,69_MOUNTMEND_8412,69,8412,Tequila Ocho Plata Fresno,750mL,105,ALTAMAR BRANDS LLC,8124,2015-12-21,2016-01-02,2016-01-04,2016-02-16,35.71,6,214.26,1
1,34_PITMERDEN_5215,34,5215,TGI Fridays Long Island Iced,1.75L,4466,AMERICAN VINTAGE BEVERAGE,8137,2015-12-22,2016-01-02,2016-01-07,2016-02-21,9.41,5,47.05,1


In [27]:
Purchases_Vendor = Purchases[['VendorName','TotalSold']].groupby('VendorName').sum()
Purchases_Vendor.head(2)

Unnamed: 0_level_0,TotalSold
VendorName,Unnamed: 1_level_1
AAPER ALCOHOL & CHEMICAL CO,105.07
ADAMBA IMPORTS INTL INC,30175.0


In [28]:
Purchases_Vendor.sort_values(by='TotalSold',ascending=False).head(10)

Unnamed: 0_level_0,TotalSold
VendorName,Unnamed: 1_level_1
DIAGEO NORTH AMERICA INC,21315299.75
MARTIGNETTI COMPANIES,11709222.19
JIM BEAM BRANDS COMPANY,10232521.65
PERNOD RICARD USA,10224899.93
BACARDI USA INC,7420630.63
CONSTELLATION BRANDS INC,6584032.72
BROWN-FORMAN CORP,5719375.62
ULTRA BEVERAGE COMPANY LLP,5545766.29
E & J GALLO WINERY,5177067.61
M S WALKER INC,4647908.17


# Deliverable 4b

In [29]:
Invoices = pd.read_csv("InvoicePurchases12-31-16Sample.csv")
Invoices.head(2)

Unnamed: 0,VendorNumber,VendorName,InvoiceDate,PONumber,PODate,PayDate,Quantity,Dollars,Freight,Approval
0,105,ALTAMAR BRANDS LLC,2016-01-04,8124,2015-12-21,2016-02-16,6,214.26,3.47,
1,4466,AMERICAN VINTAGE BEVERAGE,2016-01-07,8137,2015-12-22,2016-02-21,15,140.55,8.57,


In [30]:
Invoices_Freight = Invoices[['VendorName','Freight']].groupby('VendorName').sum()
Invoices_Freight.head(2)

Unnamed: 0_level_0,Freight
VendorName,Unnamed: 1_level_1
AAPER ALCOHOL & CHEMICAL CO,0.48
ADAMBA IMPORTS INTL INC,367.52


In [31]:
Invoices_Freight.sort_values(by='Freight',ascending=False).head(10)

Unnamed: 0_level_0,Freight
VendorName,Unnamed: 1_level_1
DIAGEO NORTH AMERICA INC,257032.07
MARTIGNETTI COMPANIES,144719.92
JIM BEAM BRANDS COMPANY,123880.97
PERNOD RICARD USA,123780.22
BACARDI USA INC,89286.27
CONSTELLATION BRANDS INC,79528.99
BROWN-FORMAN CORP,68601.68
ULTRA BEVERAGE COMPANY LLP,68054.7
E & J GALLO WINERY,61966.91
M S WALKER INC,55551.82


# Deliverable 4c

In [32]:
InvoicesGrouped = Invoices[["VendorName","Dollars","Freight"]].groupby('VendorName').sum()
InvoicesGrouped.head()

Unnamed: 0_level_0,Dollars,Freight
VendorName,Unnamed: 1_level_1,Unnamed: 2_level_1
AAPER ALCOHOL & CHEMICAL CO,105.07,0.48
ADAMBA IMPORTS INTL INC,76770.25,367.52
ALISA CARR BEVERAGES,34951.68,172.0
ALTAMAR BRANDS LLC,11706.2,62.39
AMERICAN SPIRITS EXCHANGE,1205.16,6.19


In [33]:
Invoices_250 = InvoicesGrouped[InvoicesGrouped["Dollars"]>=250000]
Invoices_250.head(2)

Unnamed: 0_level_0,Dollars,Freight
VendorName,Unnamed: 1_level_1,Unnamed: 2_level_1
BACARDI USA INC,17624378.72,89286.27
BANFI PRODUCTS CORP,1628866.68,8510.41


In [34]:
Invoices_250["FreightPerDollar"] = (Invoices_250["Freight"] / Invoices_250["Dollars"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Invoices_250["FreightPerDollar"] = (Invoices_250["Freight"] / Invoices_250["Dollars"])


In [35]:
Invoices_250.head(2)

Unnamed: 0_level_0,Dollars,Freight,FreightPerDollar
VendorName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BACARDI USA INC,17624378.72,89286.27,0.005066
BANFI PRODUCTS CORP,1628866.68,8510.41,0.005225


In [36]:
Invoices_250.sort_values(by='FreightPerDollar',ascending=False).head(10)

Unnamed: 0_level_0,Dollars,Freight,FreightPerDollar
VendorName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
WESTERN SPIRITS BEVERAGE CO,361249.21,1933.19,0.005351
DIAGEO CHATEAU ESTATE WINES,1365472.83,7259.75,0.005317
FREDERICK WILDMAN & SONS,759449.24,3999.93,0.005267
STATE WINE & SPIRITS,1529682.04,8014.98,0.00524
FLAG HILL WINERY & VINEYARD,300403.2,1573.31,0.005237
BANFI PRODUCTS CORP,1628866.68,8510.41,0.005225
MARTIGNETTI COMPANIES,27821473.91,144719.92,0.005202
STE MICHELLE WINE ESTATES,3086650.7,15919.7,0.005158
WINE GROUP INC,5258636.79,27100.41,0.005154
ULTRA BEVERAGE COMPANY LLP,13210613.93,68054.7,0.005152


In [37]:
Invoices_250.sort_values(by='FreightPerDollar',ascending=True).head(5)

Unnamed: 0_level_0,Dollars,Freight,FreightPerDollar
VendorName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
OLE SMOKY DISTILLERY LLC,387622.69,1922.0,0.004958
TREASURY WINE ESTATES,2978686.4,14836.57,0.004981
SIDNEY FRANK IMPORTING CO,1715908.88,8549.55,0.004983
HEAVEN HILL DISTILLERIES,2816661.94,14069.87,0.004995
LUXCO INC,2051436.01,10261.6,0.005002


# Deliverable 4d

In [38]:
Invoices4d = Invoices[["VendorName","Quantity","Freight"]].groupby('VendorName').sum()
Invoices4d.head()

Unnamed: 0_level_0,Quantity,Freight
VendorName,Unnamed: 1_level_1,Unnamed: 2_level_1
AAPER ALCOHOL & CHEMICAL CO,1,0.48
ADAMBA IMPORTS INTL INC,4732,367.52
ALISA CARR BEVERAGES,438,172.0
ALTAMAR BRANDS LLC,332,62.39
AMERICAN SPIRITS EXCHANGE,132,6.19


In [39]:
Invoices_100 = Invoices4d[Invoices4d["Freight"]>100 | (Invoices4d["Quantity"]<=1000)]
Invoices_100.head(2)

Unnamed: 0_level_0,Quantity,Freight
VendorName,Unnamed: 1_level_1,Unnamed: 2_level_1
ADAMBA IMPORTS INTL INC,4732,367.52
ALISA CARR BEVERAGES,438,172.0


In [40]:
Invoices_100.sort_values(by='Freight',ascending=False).head(10)

Unnamed: 0_level_0,Quantity,Freight
VendorName,Unnamed: 1_level_1,Unnamed: 2_level_1
DIAGEO NORTH AMERICA INC,5459788,257032.07
MARTIGNETTI COMPANIES,2637275,144719.92
JIM BEAM BRANDS COMPANY,2737165,123880.97
PERNOD RICARD USA,1647558,123780.22
BACARDI USA INC,1427075,89286.27
CONSTELLATION BRANDS INC,2325892,79528.99
BROWN-FORMAN CORP,1006122,68601.68
ULTRA BEVERAGE COMPANY LLP,1077527,68054.7
E & J GALLO WINERY,1858260,61966.91
M S WALKER INC,1372841,55551.82


# Deliverable 4e

In [43]:
Invoices4e=Invoices_100.sort_values(by='Freight',ascending=False).head(10)

In [45]:
Invoices4e["FreightPerUnit"] = (Invoices4e["Freight"] / Invoices4e["Quantity"])

In [46]:
Invoices4e.head(10)

Unnamed: 0_level_0,Quantity,Freight,FreightPerUnit
VendorName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
DIAGEO NORTH AMERICA INC,5459788,257032.07,0.047077
MARTIGNETTI COMPANIES,2637275,144719.92,0.054875
JIM BEAM BRANDS COMPANY,2737165,123880.97,0.045259
PERNOD RICARD USA,1647558,123780.22,0.07513
BACARDI USA INC,1427075,89286.27,0.062566
CONSTELLATION BRANDS INC,2325892,79528.99,0.034193
BROWN-FORMAN CORP,1006122,68601.68,0.068184
ULTRA BEVERAGE COMPANY LLP,1077527,68054.7,0.063158
E & J GALLO WINERY,1858260,61966.91,0.033347
M S WALKER INC,1372841,55551.82,0.040465


In [54]:
Invoices4ee = Invoices[["VendorName","Quantity","Freight","Dollars"]].groupby('VendorName').sum()
Invoices4ee.head()

Unnamed: 0_level_0,Quantity,Freight,Dollars
VendorName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPER ALCOHOL & CHEMICAL CO,1,0.48,105.07
ADAMBA IMPORTS INTL INC,4732,367.52,76770.25
ALISA CARR BEVERAGES,438,172.0,34951.68
ALTAMAR BRANDS LLC,332,62.39,11706.2
AMERICAN SPIRITS EXCHANGE,132,6.19,1205.16


In [55]:
Invoices_1000 = Invoices4ee[Invoices4ee["Freight"]>100 | (Invoices4ee["Quantity"]<=1000)]
Invoices_1000.head(2)

Unnamed: 0_level_0,Quantity,Freight,Dollars
VendorName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ADAMBA IMPORTS INTL INC,4732,367.52,76770.25
ALISA CARR BEVERAGES,438,172.0,34951.68


In [59]:
Invoices_1000.sort_values(by='Freight',ascending=False).head(10)

Unnamed: 0_level_0,Quantity,Freight,Dollars
VendorName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
DIAGEO NORTH AMERICA INC,5459788,257032.07,50959796.85
MARTIGNETTI COMPANIES,2637275,144719.92,27821473.91
JIM BEAM BRANDS COMPANY,2737165,123880.97,24203151.05
PERNOD RICARD USA,1647558,123780.22,24124091.56
BACARDI USA INC,1427075,89286.27,17624378.72
CONSTELLATION BRANDS INC,2325892,79528.99,15573917.9
BROWN-FORMAN CORP,1006122,68601.68,13529433.08
ULTRA BEVERAGE COMPANY LLP,1077527,68054.7,13210613.93
E & J GALLO WINERY,1858260,61966.91,12289608.09
M S WALKER INC,1372841,55551.82,10935817.3


In [61]:
Invoices_1000["FreightPerDollars"] = (Invoices_1000["Freight"] / Invoices_1000["Dollars"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Invoices_1000["FreightPerDollars"] = (Invoices_1000["Freight"] / Invoices_1000["Dollars"])


In [62]:
Invoices_1000.head()

Unnamed: 0_level_0,Quantity,Freight,Dollars,FreightPerDollars
VendorName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ADAMBA IMPORTS INTL INC,4732,367.52,76770.25,0.004787
ALISA CARR BEVERAGES,438,172.0,34951.68,0.004921
ALTAMAR BRANDS LLC,332,62.39,11706.2,0.00533
AMERICAN SPIRITS EXCHANGE,132,6.19,1205.16,0.005136
AMERICAN VINTAGE BEVERAGE,15778,793.91,156357.28,0.005078


In [63]:
Invoices_1000.sort_values(by='Freight',ascending=False).head(10)

Unnamed: 0_level_0,Quantity,Freight,Dollars,FreightPerDollars
VendorName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
DIAGEO NORTH AMERICA INC,5459788,257032.07,50959796.85,0.005044
MARTIGNETTI COMPANIES,2637275,144719.92,27821473.91,0.005202
JIM BEAM BRANDS COMPANY,2737165,123880.97,24203151.05,0.005118
PERNOD RICARD USA,1647558,123780.22,24124091.56,0.005131
BACARDI USA INC,1427075,89286.27,17624378.72,0.005066
CONSTELLATION BRANDS INC,2325892,79528.99,15573917.9,0.005107
BROWN-FORMAN CORP,1006122,68601.68,13529433.08,0.005071
ULTRA BEVERAGE COMPANY LLP,1077527,68054.7,13210613.93,0.005152
E & J GALLO WINERY,1858260,61966.91,12289608.09,0.005042
M S WALKER INC,1372841,55551.82,10935817.3,0.00508


The freight per dollars and the freight per units are comparable to the freight per dolars in part c but of different VendorNames due to the conditions in c and d.

# Deliverable 5

In [64]:
Sales = pd.read_csv("SalesFINAL12-31-16Sample.csv")
Sales.head(2)

Unnamed: 0,InventoryId,Store,Brand,Description,Size,SalesQuantity,SalesDollars,SalesPrice,SalesDate,Volume,Classification,ExciseTax,VendorNo,VendorName
0,1_HARDERSFIELD_1009,1,1009,Rebel Yell Variety Pack,750mL 3 Pk,1,49.99,49.99,2016-01-02,750.0,1,0.79,8352,LUXCO INC
1,1_HARDERSFIELD_10238,1,10238,Layer Cake Primitivo Puglia,750mL,2,31.98,15.99,2016-01-02,750.0,2,0.22,4425,MARTIGNETTI COMPANIES


In [73]:
merge5= pd.merge(left=Sales, right=Purchases, how="right",  on=["InventoryId"])

In [74]:
merge5.head()

Unnamed: 0,InventoryId,Store_x,Brand_x,Description_x,Size_x,SalesQuantity,SalesDollars,SalesPrice,SalesDate,Volume,...,VendorName_y,PONumber,PODate,ReceivingDate,InvoiceDate,PayDate,PurchasePrice,Quantity,TotalSold,Classification_y
0,69_MOUNTMEND_8412,69.0,8412.0,Tequila Ocho Plata Fresno,750mL,2.0,99.98,49.99,2016-01-29,750.0,...,ALTAMAR BRANDS LLC,8124,2015-12-21,2016-01-02,2016-01-04,2016-02-16,35.71,6,214.26,1
1,69_MOUNTMEND_8412,69.0,8412.0,Tequila Ocho Plata Fresno,750mL,1.0,49.99,49.99,2016-06-04,750.0,...,ALTAMAR BRANDS LLC,8124,2015-12-21,2016-01-02,2016-01-04,2016-02-16,35.71,6,214.26,1
2,34_PITMERDEN_5215,34.0,5215.0,TGI Fridays Long Island Iced,1.75L,4.0,51.96,12.99,2016-01-17,1750.0,...,AMERICAN VINTAGE BEVERAGE,8137,2015-12-22,2016-01-02,2016-01-07,2016-02-21,9.41,5,47.05,1
3,34_PITMERDEN_5215,34.0,5215.0,TGI Fridays Long Island Iced,1.75L,2.0,25.98,12.99,2016-01-30,1750.0,...,AMERICAN VINTAGE BEVERAGE,8137,2015-12-22,2016-01-02,2016-01-07,2016-02-21,9.41,5,47.05,1
4,34_PITMERDEN_5215,34.0,5215.0,TGI Fridays Long Island Iced,1.75L,2.0,25.98,12.99,2016-03-30,1750.0,...,AMERICAN VINTAGE BEVERAGE,8137,2015-12-22,2016-01-02,2016-01-07,2016-02-21,9.41,5,47.05,1


In [75]:
merge5a = merge5[["InventoryId", "SalesPrice", "PurchasePrice"]]
merge5a.head()

Unnamed: 0,InventoryId,SalesPrice,PurchasePrice
0,69_MOUNTMEND_8412,49.99,35.71
1,69_MOUNTMEND_8412,49.99,35.71
2,34_PITMERDEN_5215,12.99,9.41
3,34_PITMERDEN_5215,12.99,9.41
4,34_PITMERDEN_5215,12.99,9.41


In [76]:
merge5a["Profit"] = (merge5a["SalesPrice"] - merge5a["PurchasePrice"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merge5a["Profit"] = (merge5a["SalesPrice"] - merge5a["PurchasePrice"])


In [77]:
merge5a.head()

Unnamed: 0,InventoryId,SalesPrice,PurchasePrice,Profit
0,69_MOUNTMEND_8412,49.99,35.71,14.28
1,69_MOUNTMEND_8412,49.99,35.71,14.28
2,34_PITMERDEN_5215,12.99,9.41,3.58
3,34_PITMERDEN_5215,12.99,9.41,3.58
4,34_PITMERDEN_5215,12.99,9.41,3.58


In [78]:
merge5a.sort_values(by='Profit',ascending=False).head(10)

Unnamed: 0,InventoryId,SalesPrice,PurchasePrice,Profit
5730657,50_MOUNTMEND_1598,949.99,736.43,213.56
8130657,50_MOUNTMEND_1598,949.99,736.43,213.56
6666186,50_MOUNTMEND_1598,949.99,736.43,213.56
5345396,32_MOUNTMEND_2842,499.99,300.74,199.25
5800150,50_MOUNTMEND_425,799.99,620.15,179.84
6792123,55_DRY GULCH_90087,469.99,311.25,158.74
8997031,67_EANVERNESS_6044,669.99,523.43,146.56
1545520,79_BALLYMENA_24888,419.99,278.14,141.85
7040216,73_DONCASTER_2748,369.99,240.59,129.4
9325388,66_EANVERNESS_2748,369.99,240.59,129.4


This calculates the top ten highest profits of the inventories sold.