# Import PANDAS Library

Notice that we are using pd as an abbreviation so each time we want to use the PANDAS library, we can simply type pd instead of pandas.

In [1]:
import pandas as pd
pd.options.display.float_format = '{:,.2f}'.format

# Import Dataset (either full dataset or simplified)

You can use either set of datafiles (full or simplified). In the past some students' computers could not handle the full dataset. If you worry that your computer will not handle the full dataset (or if you find your code is running very slow), use the simplified version. 

The simplified version was created by taking a sample of the full data; hence the simplified files have "Sample" added to the end of the file names. Some files were not large to begin with and didn't need to be reduced. Those files do not have Sample added to the end of the file name.

## Adjust the file address below to match your computer.

You will want to replace the begining of the address below (i.e., everything before OneDrive - Oregon State University/)

In [2]:
begInv = pd.read_csv("BegInvFINAL12-31-16Sample.csv")

In [3]:
begInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,onHand,Price,startDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,12.99,2016-01-01
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,10.99,2016-01-01


## Deliverable 1a answer

In [4]:
begInv = begInv.rename(columns = {'onHand':'01_01_2016_Qty'})
begInv.head(2)
#renaming on hand column to reflect the reality of the column, the qty on 1/1/2026

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,01_01_2016_Qty,Price,startDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,12.99,2016-01-01
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,10.99,2016-01-01


In [5]:
begInv['InvCost01_01_2016'] = begInv['01_01_2016_Qty'] * begInv['Price']
begInv.head(2)
#qty times price gives us how much that inv costed then

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,01_01_2016_Qty,Price,startDate,InvCost01_01_2016
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,8,12.99,2016-01-01,103.92
1,1_HARDERSFIELD_60,1,HARDERSFIELD,60,Canadian Club 1858 VAP,750mL,7,10.99,2016-01-01,76.93


In [6]:
begInv_Store = begInv[['Store','01_01_2016_Qty','InvCost01_01_2016']].groupby('Store').sum()
begInv_Store.head(2)
#pulling the store, the qty, the cost, grouping by store, and summing it

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
1,49917,838079.59
2,52925,840452.2


In [7]:
begInv_Store.sort_values(by='InvCost01_01_2016',ascending=False).head(10)
#sorting the values by the cost in descending order then only taking the first 10 to get the top 10 stores in terms of inventory cost

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
34,153852,3291170.24
73,162551,3142497.36
67,158996,3079578.63
66,149314,2973033.9
76,140208,2952418.44
69,144255,2946726.65
38,114368,2232698.77
55,119641,2001263.66
50,94720,1649808.22
79,95330,1503149.48


### Alternative way to get the answer

In [8]:
begInv_Store.sort_values(by='InvCost01_01_2016').tail(10).sort_values(by='InvCost01_01_2016', ascending = False)
# did tail since the default is ascending order and the last 10 would be the most. Need to sort still since it would be in reverse order. 

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
34,153852,3291170.24
73,162551,3142497.36
67,158996,3079578.63
66,149314,2973033.9
76,140208,2952418.44
69,144255,2946726.65
38,114368,2232698.77
55,119641,2001263.66
50,94720,1649808.22
79,95330,1503149.48


## Deliverable 1b

In [9]:
#code brought in from 1a and store was changed to brand. 
begInv_Brand = begInv[['Brand','01_01_2016_Qty','InvCost01_01_2016']].groupby('Brand').sum()
begInv_Brand.head(2)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
58,281,3650.19
60,288,3165.12


In [10]:
begInv_Brand.sort_values(by='InvCost01_01_2016',ascending=False).head(10)

Unnamed: 0_level_0,01_01_2016_Qty,InvCost01_01_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
3545,14499,463823.01
1233,12016,432455.84
8068,15341,383371.59
4261,15499,340823.01
3858,13649,327439.51
2753,4625,286703.75
8082,9287,278517.13
8680,7066,275503.34
2589,6766,270572.34
3876,14829,266773.71


## Deliverable 2a

In [11]:
#need to bring in end inventory file
endInv = pd.read_csv("EndInvFINAL12-31-16Sample.csv")
endInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,onHand,Price,endDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,12.99,2016-12-31
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,36.99,2016-12-31


In [12]:
endInv = endInv.rename(columns = {'onHand':'12_31_2016_Qty'})
endInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,12_31_2016_Qty,Price,endDate
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,12.99,2016-12-31
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,36.99,2016-12-31


In [13]:
endInv['InvCost12_31_2016'] = endInv['12_31_2016_Qty'] * endInv['Price']
endInv.head(2)

Unnamed: 0,InventoryId,Store,City,Brand,Description,Size,12_31_2016_Qty,Price,endDate,InvCost12_31_2016
0,1_HARDERSFIELD_58,1,HARDERSFIELD,58,Gekkeikan Black & Gold Sake,750mL,11,12.99,2016-12-31,142.89
1,1_HARDERSFIELD_62,1,HARDERSFIELD,62,Herradura Silver Tequila,750mL,7,36.99,2016-12-31,258.93


In [14]:
endInv_Store = endInv[['Store','12_31_2016_Qty','InvCost12_31_2016']].groupby('Store').sum()
endInv_Store.head(2)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
1,79827,1206845.93
2,56671,850884.06


In [15]:
endInv_Store.sort_values(by='InvCost12_31_2016',ascending=False).head(10)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Store,Unnamed: 1_level_1,Unnamed: 2_level_1
50,260717,4887260.68
73,164589,3254662.81
67,163765,3076114.82
34,145829,3074616.75
76,143866,2975945.18
69,150848,2968678.82
66,144579,2860504.99
74,166015,2803645.13
38,129397,2463906.85
55,125584,2234836.35


## Deliverable 2b

In [16]:
endInv_Brand = endInv[['Brand','12_31_2016_Qty','InvCost12_31_2016']].groupby('Brand').sum()
endInv_Brand.head(2)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
58,385,5001.15
60,146,1604.54


In [17]:
endInv_Brand.sort_values(by='InvCost12_31_2016',ascending=False).head(10)

Unnamed: 0_level_0,12_31_2016_Qty,InvCost12_31_2016
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
1233,15047,526494.53
3545,16770,502932.3
2753,7849,470861.51
8068,15608,366631.92
3405,12268,355649.32
4261,16769,351981.31
2757,11603,336370.97
2589,7922,300956.78
1376,13180,276648.2
2585,10487,272557.13


## Deliverable 3

In [18]:
merge3 = pd.merge(left=begInv[["InventoryId", "01_01_2016_Qty", "InvCost01_01_2016"]], right=endInv[["12_31_2016_Qty", "InvCost12_31_2016", "InventoryId"]], how='outer', on=["InventoryId"])
merge3.head(2)

Unnamed: 0,InventoryId,01_01_2016_Qty,InvCost01_01_2016,12_31_2016_Qty,InvCost12_31_2016
0,1_HARDERSFIELD_58,8.0,103.92,11.0,142.89
1,1_HARDERSFIELD_60,7.0,76.93,,


In [19]:
merge3 = merge3.fillna(0)

In [20]:
merge3.head()

Unnamed: 0,InventoryId,01_01_2016_Qty,InvCost01_01_2016,12_31_2016_Qty,InvCost12_31_2016
0,1_HARDERSFIELD_58,8.0,103.92,11.0,142.89
1,1_HARDERSFIELD_60,7.0,76.93,0.0,0.0
2,1_HARDERSFIELD_62,6.0,221.94,7.0,258.93
3,1_HARDERSFIELD_63,3.0,116.97,7.0,272.93
4,1_HARDERSFIELD_72,6.0,209.94,4.0,139.96


## Deliverable 4a

In [21]:
purchases = pd.read_csv("PurchasesFINAL12-31-16Sample.csv")
purchases.head(2)

Unnamed: 0,InventoryId,Store,Brand,Description,Size,VendorNumber,VendorName,PONumber,PODate,ReceivingDate,InvoiceDate,PayDate,PurchasePrice,Quantity,Dollars,Classification
0,69_MOUNTMEND_8412,69,8412,Tequila Ocho Plata Fresno,750mL,105,ALTAMAR BRANDS LLC,8124,2015-12-21,2016-01-02,2016-01-04,2016-02-16,35.71,6,214.26,1
1,34_PITMERDEN_5215,34,5215,TGI Fridays Long Island Iced,1.75L,4466,AMERICAN VINTAGE BEVERAGE,8137,2015-12-22,2016-01-02,2016-01-07,2016-02-21,9.41,5,47.05,1


In [22]:
vendors_grouped = purchases.groupby("VendorNumber").sum("Dollars")
vendors_grouped.head()

Unnamed: 0_level_0,Store,Brand,PONumber,PurchasePrice,Quantity,Dollars,Classification
VendorNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2,253,361912,38413,74.86,101,1751.3,8
54,74,990,13148,105.07,1,105.07,1
60,9909,987756,2964055,4052.67,1861,30175.0,250
105,1257,187593,253417,808.87,125,4388.99,23
200,135,102871,58518,46.45,84,764.52,10


In [23]:
sorted_vendors = vendors_grouped.sort_values(by="Dollars", ascending = False)
sorted_vendors.head(10)

Unnamed: 0_level_0,Store,Brand,PONumber,PurchasePrice,Quantity,Dollars,Classification
VendorNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
3960,4492412,424232145,1129084922,1658657.02,2297321,21315299.75,102406
4425,3470126,1645180827,826088134,819826.08,1115923,11724670.82,137492
12546,3442874,306514871,875162030,1033143.87,1155665,10232521.65,80509
17035,2014219,280354410,501427472,847211.45,692444,10224899.93,50080
480,1698603,172316736,427622772,550546.66,599721,7420630.63,41500
1392,3434559,1460616876,866146157,556347.16,978758,6584032.72,145767
1128,1112576,92171488,275646240,458919.02,423866,5719375.62,27159
9165,1652464,615955687,389174208,557950.57,454455,5545766.29,56584
3252,3048616,1417472807,756814787,498003.92,781857,5177067.61,128110
9552,2113824,683154306,527841872,415482.01,583676,4647908.17,67928


## Deliverable 4b

In [24]:
invoices = pd.read_csv("InvoicePurchases12-31-16Sample.csv")
invoices.head()

Unnamed: 0,VendorNumber,VendorName,InvoiceDate,PONumber,PODate,PayDate,Quantity,Dollars,Freight,Approval
0,105,ALTAMAR BRANDS LLC,2016-01-04,8124,2015-12-21,2016-02-16,6,214.26,3.47,
1,4466,AMERICAN VINTAGE BEVERAGE,2016-01-07,8137,2015-12-22,2016-02-21,15,140.55,8.57,
2,388,ATLANTIC IMPORTING COMPANY,2016-01-09,8169,2015-12-24,2016-02-16,5,106.6,4.61,
3,480,BACARDI USA INC,2016-01-12,8106,2015-12-20,2016-02-05,10100,137483.78,2935.2,
4,516,BANFI PRODUCTS CORP,2016-01-07,8170,2015-12-24,2016-02-12,1935,15527.25,429.2,


In [25]:
freight_grouped = invoices.groupby("VendorNumber").sum("Freight")
freight_grouped.head()

Unnamed: 0_level_0,PONumber,Quantity,Dollars,Freight
VendorNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,40959,328,5630.88,27.08
54,13168,1,105.07,0.48
60,415554,4732,76770.25,367.52
105,442896,332,11706.2,62.39
200,71951,132,1205.16,6.19


In [26]:
sorted_freight = freight_grouped.sort_values(by="Freight", ascending = False)
sorted_freight.head(10)

Unnamed: 0_level_0,PONumber,Quantity,Dollars,Freight
VendorNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3960,597515,5459788,50959796.85,257032.07
4425,853524,2640411,27861690.02,144929.24
12546,598179,2737165,24203151.05,123880.97
17035,597979,1647558,24124091.56,123780.22
480,597226,1427075,17624378.72,89286.27
1392,597607,2325892,15573917.9,79528.99
1128,597191,1006122,13529433.08,68601.68
9165,597939,1077527,13210613.93,68054.7
3252,597881,1858260,12289608.09,61966.91
9552,597511,1372841,10935817.3,55551.82


## Deliverable 4c

In [27]:
invoices_vendors_grouped = invoices[["Quantity", "Dollars", "Freight", "VendorNumber"]].groupby("VendorNumber").sum("Dollars")
invoices_vendors_grouped.head()

Unnamed: 0_level_0,Quantity,Dollars,Freight
VendorNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,328,5630.88,27.08
54,1,105.07,0.48
60,4732,76770.25,367.52
105,332,11706.2,62.39
200,132,1205.16,6.19


In [28]:
over_250 = invoices_vendors_grouped[invoices_vendors_grouped["Dollars"]>=250000]

In [29]:
over_250.head()

Unnamed: 0_level_0,Quantity,Dollars,Freight
VendorNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
480,1427075,17624378.72,89286.27
516,228103,1628866.68,8510.41
653,154092,1529682.04,8014.98
660,503931,3537977.55,17932.33
1128,1006122,13529433.08,68601.68


In [30]:
over_250["Freight per Dollar %"] = (over_250["Freight"] / over_250["Dollars"]) * 100

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  over_250["Freight per Dollar %"] = (over_250["Freight"] / over_250["Dollars"]) * 100


In [31]:
 over_250.head()

Unnamed: 0_level_0,Quantity,Dollars,Freight,Freight per Dollar %
VendorNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
480,1427075,17624378.72,89286.27,0.51
516,228103,1628866.68,8510.41,0.52
653,154092,1529682.04,8014.98,0.52
660,503931,3537977.55,17932.33,0.51
1128,1006122,13529433.08,68601.68,0.51


In [32]:
sorted_over_250 = over_250.sort_values(by="Freight per Dollar %", ascending=False)

In [33]:
sorted_over_250.head(10)

Unnamed: 0_level_0,Quantity,Dollars,Freight,Freight per Dollar %
VendorNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
9625,56860,361249.21,1933.19,0.54
1590,187841,1365472.83,7259.75,0.53
9744,70932,759449.24,3999.93,0.53
653,154092,1529682.04,8014.98,0.52
17031,20608,300403.2,1573.31,0.52
516,228103,1628866.68,8510.41,0.52
4425,2640411,27861690.02,144929.24,0.52
8673,419822,3086650.7,15919.7,0.52
9815,888385,5258636.79,27100.41,0.52
9165,1077527,13210613.93,68054.7,0.52


In [34]:
sorted_over_250.tail(5)

Unnamed: 0_level_0,Quantity,Dollars,Freight,Freight per Dollar %
VendorNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
8352,283260,2051436.01,10261.6,0.5
3924,352348,2816661.94,14069.87,0.5
3089,186464,1715908.88,8549.55,0.5
9819,497770,2978686.4,14836.57,0.5
6359,110162,387622.69,1922.0,0.5


## Deliverable 4d

In [35]:
for_4d = invoices[["Quantity", "Dollars", "Freight", "VendorNumber"]]
for_4d.head()

Unnamed: 0,Quantity,Dollars,Freight,VendorNumber
0,6,214.26,3.47,105
1,15,140.55,8.57,4466
2,5,106.6,4.61,388
3,10100,137483.78,2935.2,480
4,1935,15527.25,429.2,516


In [36]:
meets_4d_req = for_4d[(for_4d["Freight"]>100) & (for_4d["Quantity"]<=1000)]
meets_4d_req.head()

Unnamed: 0,Quantity,Dollars,Freight,VendorNumber
11,320,5420.41,179.26,1485
16,808,6646.46,127.05,2242
19,385,3506.41,146.86,2555
23,136,5645.24,218.18,2561
28,818,7079.02,200.02,3924


In [37]:
top_vendors_4d = meets_4d_req.groupby("VendorNumber").sum("Freight").sort_values(by="Freight", ascending = False)

In [38]:
top_vendors_4d

Unnamed: 0_level_0,Quantity,Dollars,Freight
VendorNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2561,18120,570627.52,2944.67
8664,975,14299.98,349.02
7239,382,7515.38,348.07
653,541,5544.89,303.7
6785,715,5533.18,242.73
10000,875,7234.88,218.05
3924,818,7079.02,200.02
8352,819,6139.47,196.61
4692,427,5336.48,185.93
1485,320,5420.41,179.26


## Deliverable 4e

In [39]:
meets_4d_req["Freight per Dollar %"] = (meets_4d_req["Freight"] / meets_4d_req["Dollars"]) * 100
meets_4d_req.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meets_4d_req["Freight per Dollar %"] = (meets_4d_req["Freight"] / meets_4d_req["Dollars"]) * 100


Unnamed: 0,Quantity,Dollars,Freight,VendorNumber,Freight per Dollar %
11,320,5420.41,179.26,1485,3.31
16,808,6646.46,127.05,2242,1.91
19,385,3506.41,146.86,2555,4.19
23,136,5645.24,218.18,2561,3.86
28,818,7079.02,200.02,3924,2.83


In [40]:
grouped_4e = meets_4d_req.groupby("VendorNumber").sum("Freight per Dollar %")
grouped_4e

Unnamed: 0_level_0,Quantity,Dollars,Freight,Freight per Dollar %
VendorNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
653,541,5544.89,303.7,5.48
1273,576,20334.21,109.8,0.54
1485,320,5420.41,179.26,3.31
2242,808,6646.46,127.05,1.91
2555,385,3506.41,146.86,4.19
2561,18120,570627.52,2944.67,13.97
3089,379,4443.66,133.83,3.01
3924,818,7079.02,200.02,2.83
4692,427,5336.48,185.93,3.48
4848,982,18937.82,100.37,0.53


In [41]:
sorted_4e = grouped_4e.sort_values(by="Freight per Dollar %", ascending = False)
sorted_4e

Unnamed: 0_level_0,Quantity,Dollars,Freight,Freight per Dollar %
VendorNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2561,18120,570627.52,2944.67,13.97
653,541,5544.89,303.7,5.48
7239,382,7515.38,348.07,4.63
6785,715,5533.18,242.73,4.39
2555,385,3506.41,146.86,4.19
6213,412,4821.83,177.5,3.68
4692,427,5336.48,185.93,3.48
1485,320,5420.41,179.26,3.31
8352,819,6139.47,196.61,3.2
10000,875,7234.88,218.05,3.01


For just transactions where the freight was more than $100 and the quantity was 1,000 units or less the freight per dollar percentage
is significantly higher than it was in 4c. 

## Deliverable 5

In [42]:
sales = pd.read_csv("SalesFINAL12-31-16Sample.csv")
sales.head()

Unnamed: 0,InventoryId,Store,Brand,Description,Size,SalesQuantity,SalesDollars,SalesPrice,SalesDate,Volume,Classification,ExciseTax,VendorNo,VendorName
0,1_HARDERSFIELD_1009,1,1009,Rebel Yell Variety Pack,750mL 3 Pk,1,49.99,49.99,2016-01-02,750.0,1,0.79,8352,LUXCO INC
1,1_HARDERSFIELD_10238,1,10238,Layer Cake Primitivo Puglia,750mL,2,31.98,15.99,2016-01-02,750.0,2,0.22,4425,MARTIGNETTI COMPANIES
2,1_HARDERSFIELD_10239,1,10239,Cannonball Cab Svgn Cal,750mL,1,13.99,13.99,2016-01-02,750.0,2,0.11,4425,MARTIGNETTI COMPANIES
3,1_HARDERSFIELD_10266,1,10266,Klinker Brick Old Vine Znfdl,750mL,1,16.99,16.99,2016-01-09,750.0,2,0.11,9552,M S WALKER INC
4,1_HARDERSFIELD_1029,1,1029,Fulton's Harvest Apple Pie L,750mL,1,9.99,9.99,2016-01-03,750.0,1,0.79,3924,HEAVEN HILL DISTILLERIES


In [43]:
sales["VendorNumber"] = sales["VendorNo"]
sales.head()

Unnamed: 0,InventoryId,Store,Brand,Description,Size,SalesQuantity,SalesDollars,SalesPrice,SalesDate,Volume,Classification,ExciseTax,VendorNo,VendorName,VendorNumber
0,1_HARDERSFIELD_1009,1,1009,Rebel Yell Variety Pack,750mL 3 Pk,1,49.99,49.99,2016-01-02,750.0,1,0.79,8352,LUXCO INC,8352
1,1_HARDERSFIELD_10238,1,10238,Layer Cake Primitivo Puglia,750mL,2,31.98,15.99,2016-01-02,750.0,2,0.22,4425,MARTIGNETTI COMPANIES,4425
2,1_HARDERSFIELD_10239,1,10239,Cannonball Cab Svgn Cal,750mL,1,13.99,13.99,2016-01-02,750.0,2,0.11,4425,MARTIGNETTI COMPANIES,4425
3,1_HARDERSFIELD_10266,1,10266,Klinker Brick Old Vine Znfdl,750mL,1,16.99,16.99,2016-01-09,750.0,2,0.11,9552,M S WALKER INC,9552
4,1_HARDERSFIELD_1029,1,1029,Fulton's Harvest Apple Pie L,750mL,1,9.99,9.99,2016-01-03,750.0,1,0.79,3924,HEAVEN HILL DISTILLERIES,3924


In [44]:
purchases.head()

Unnamed: 0,InventoryId,Store,Brand,Description,Size,VendorNumber,VendorName,PONumber,PODate,ReceivingDate,InvoiceDate,PayDate,PurchasePrice,Quantity,Dollars,Classification
0,69_MOUNTMEND_8412,69,8412,Tequila Ocho Plata Fresno,750mL,105,ALTAMAR BRANDS LLC,8124,2015-12-21,2016-01-02,2016-01-04,2016-02-16,35.71,6,214.26,1
1,34_PITMERDEN_5215,34,5215,TGI Fridays Long Island Iced,1.75L,4466,AMERICAN VINTAGE BEVERAGE,8137,2015-12-22,2016-01-02,2016-01-07,2016-02-21,9.41,5,47.05,1
2,76_DONCASTER_2034,76,2034,Glendalough Double Barrel,750mL,388,ATLANTIC IMPORTING COMPANY,8169,2015-12-24,2016-01-02,2016-01-09,2016-02-16,21.32,5,106.6,1
3,5_SUTTON_3348,5,3348,Bombay Sapphire Gin,1.75L,480,BACARDI USA INC,8106,2015-12-20,2016-01-02,2016-01-12,2016-02-05,22.38,6,134.28,1
4,30_CULCHETH_4903,30,4903,Bacardi Superior Rum,200mL,480,BACARDI USA INC,8106,2015-12-20,2016-01-01,2016-01-12,2016-02-05,2.87,48,137.76,1


In [45]:
small_sales = sales[["VendorNumber", "SalesDollars"]].groupby("VendorNumber").sum("SalesDollars")
small_sales.head(10)

Unnamed: 0_level_0,SalesDollars
VendorNumber,Unnamed: 1_level_1
2,86.97
60,6132.4
105,1513.68
200,17.99
287,139.41
388,5090.4
480,1935245.08
516,209660.93
653,178517.11
660,388267.29


In [46]:
small_purchases = purchases[["VendorNumber", "Dollars"]].groupby("VendorNumber").sum("Dollars")
small_purchases.head(10)

Unnamed: 0_level_0,Dollars
VendorNumber,Unnamed: 1_level_1
2,1751.3
54,105.07
60,30175.0
105,4388.99
200,764.52
287,771.42
388,20321.72
480,7420630.63
516,692669.85
653,650999.58


In [47]:
merge5 = pd.merge(left = small_sales, right = small_purchases, how='outer', on=["VendorNumber"])
merge5 = merge5.fillna(0)
merge5.head()

Unnamed: 0_level_0,SalesDollars,Dollars
VendorNumber,Unnamed: 1_level_1,Unnamed: 2_level_1
2,86.97,1751.3
60,6132.4,30175.0
105,1513.68,4388.99
200,17.99,764.52
287,139.41,771.42


In [48]:
merge5["Purchase Amount"] = merge5["Dollars"]

In [49]:
merge5["Profit"] = merge5["SalesDollars"] - merge5["Purchase Amount"]

In [50]:
merge5.head()

Unnamed: 0_level_0,SalesDollars,Dollars,Purchase Amount,Profit
VendorNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,86.97,1751.3,1751.3,-1664.33
60,6132.4,30175.0,30175.0,-24042.6
105,1513.68,4388.99,4388.99,-2875.31
200,17.99,764.52,764.52,-746.53
287,139.41,771.42,771.42,-632.01


In [51]:
merge5

Unnamed: 0_level_0,SalesDollars,Dollars,Purchase Amount,Profit
VendorNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,86.97,1751.30,1751.30,-1664.33
60,6132.40,30175.00,30175.00,-24042.60
105,1513.68,4388.99,4388.99,-2875.31
200,17.99,764.52,764.52,-746.53
287,139.41,771.42,771.42,-632.01
...,...,...,...,...
173357,3045.93,12455.17,12455.17,-9409.24
201359,149.94,17.00,17.00,132.94
54,0.00,105.07,105.07,-105.07
4901,0.00,140.94,140.94,-140.94


In [52]:
sorted_merge5 = merge5[["SalesDollars", "Purchase Amount", "Profit"]].groupby("VendorNumber").sum().sort_values(by="Profit", ascending = False)
sorted_merge5.head(10)

Unnamed: 0_level_0,SalesDollars,Purchase Amount,Profit
VendorNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
201359,149.94,17.0,132.94
90026,27.96,0.0,27.96
90033,7.99,0.0,7.99
54,0.0,105.07,-105.07
4901,0.0,140.94,-140.94
9099,0.0,236.64,-236.64
1265,24.99,365.3,-340.31
1003,212.73,701.9,-489.17
287,139.41,771.42,-632.01
200,17.99,764.52,-746.53


I conducted a profit analysis using the sales data and the purchase data. Above are the top 10 most profitable vendors. 