# Data Wrangling Part 2

In [1]:
# load packages

import pandas as pd
import numpy as np

In [2]:
# load csv
iowa = pd.read_csv('iowa_clean.csv')

  exec(code_obj, self.user_global_ns, self.user_ns)


In [3]:
iowa.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22815313 entries, 0 to 22815312
Data columns (total 16 columns):
 #   Column                Dtype  
---  ------                -----  
 0   Date                  object 
 1   City                  object 
 2   Zip Code              int64  
 3   County                object 
 4   Item Number           object 
 5   Item Description      object 
 6   Bottle Volume (ml)    int64  
 7   State Bottle Cost     float64
 8   State Bottle Retail   float64
 9   Bottles Sold          int64  
 10  Sale (Dollars)        float64
 11  Volume Sold (Liters)  float64
 12  Store Name            object 
 13  Category Name         object 
 14  Category              float64
 15  Vendor Name           object 
dtypes: float64(5), int64(3), object(8)
memory usage: 2.7+ GB


In [4]:
# Attempted to convert item number to int and it failed.
# found the random entry. it was 3 entries
iowa = iowa[iowa['Item Number'] != 'x904631']

# change the data type
iowa['Item Number'] = iowa['Item Number'].astype('int')

In [5]:
# convert date to date time object
iowa['Date'] = pd.to_datetime(iowa['Date'])

## STATE BOTTLE COST

In [6]:
# let's take a look the numerical categories

iowa['State Bottle Cost'].describe()

count    2.281530e+07
mean     1.013200e+01
std      1.078194e+01
min      0.000000e+00
25%      5.510000e+00
50%      8.250000e+00
75%      1.249000e+01
max      7.680000e+03
Name: State Bottle Cost, dtype: float64

### Observation:
1. Some bottle costs are zero.
After speaking with some Washington State Liquor Reps. I learned that the only reason an invoice would read 0 cost is for sample bottles or broken/ replacement bottles. Each state has different liquor laws however I will proceed like this rule applies.
2. The average cost is $10
3. Highest is $7680
4. Mode is lower than the mean so the high end has a tail/ outliers. This makes sense. The average cost is $10 and you cannot have a negative price.

I will be removing the zero cost bottles. 

In [10]:
# how often is liquor free?
bottles = iowa[iowa['State Bottle Cost'] != 0.0]
broken = iowa[iowa['State Bottle Cost'] == 0.0]

print('Broken bottle percentage: ', broken['Bottles Sold'].sum() / bottles['Bottles Sold'].sum() * 100)

Broken bottle percentage:  0.007711920987747631


### Is 0.007711% a good thing?
Do warehouse staff or drivers need to be better? 
Do sample bottles lead to sales? 
I do not know, but the number could be used for reference

In [11]:
# Remove the zero cost bottles. 
iowa = iowa[iowa['State Bottle Cost'] != 0.0]

In [20]:
# exploring bottle cost.  What is the max outlier?
iowa[iowa['State Bottle Cost'] == iowa['State Bottle Cost'].max()]

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
17989104,2016-10-25,CEDAR RAPIDS,52402,LINN,927849,Cedar Ridge Reserve Bourbon Private Cask DNO,750,7680.0,11520.0,1,11520.0,0.75,Hy-vee food store #5 / cedar rapids,BOURBON WHISKY,1011250.0,"CEDAR RIDGE VINEYARDS,LLC"


## CITY

In [21]:
# CITY column
iowa['City'].value_counts()

DES MOINES        1969961
CEDAR RAPIDS      1482468
DAVENPORT          986758
WATERLOO           726064
COUNCIL BLUFFS     670957
                   ...   
EARLING               182
SIDNEY                178
CHESTER               171
MILO                  158
ALVORD                 10
Name: City, Length: 452, dtype: int64

### Observaiton:
ALVORD has only 10 entries/ invoices?

In [22]:
# city of alvord
iowa[iowa['City']== 'ALVORD']

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
12151270,2012-01-11,ALVORD,51230,LYON,41076,Jeremiah Weed Sweet Tea Vodka,750,9.46,14.2,3,42.6,2.25,Direct liquor / alvord,FLAVORED VODKA,1031200.0,DIAGEO AMERICAS
12151271,2012-01-11,ALVORD,51230,LYON,13638,Lord Calvert Canadian Whisky,1750,9.97,14.95,1,14.95,1.75,Direct liquor / alvord,CANADIAN WHISKY,1012100.0,LUXCO-ST LOUIS
12151272,2012-01-11,ALVORD,51230,LYON,11586,Black Velvet Reserve,750,6.75,10.12,4,40.48,3.0,Direct liquor / alvord,CANADIAN WHISKY,1012100.0,"CONSTELLATION WINE COMPANY, INC."
12151273,2012-01-11,ALVORD,51230,LYON,13636,Lord Calvert Canadian Whisky,750,5.25,7.87,2,15.74,1.5,Direct liquor / alvord,CANADIAN WHISKY,1012100.0,LUXCO-ST LOUIS
12151274,2012-01-11,ALVORD,51230,LYON,46351,Hawkeye Light Rum,1000,4.22,6.33,12,75.96,12.0,Direct liquor / alvord,RUM,1062400.0,LUXCO-ST LOUIS
12151275,2012-01-11,ALVORD,51230,LYON,43338,Captain Morgan Spiced Rum,1750,16.99,25.48,6,152.88,10.5,Direct liquor / alvord,RUM,1062400.0,DIAGEO AMERICAS
12151276,2012-01-11,ALVORD,51230,LYON,11777,Black Velvet,1000,6.63,9.94,12,119.28,12.0,Direct liquor / alvord,CANADIAN WHISKY,1012100.0,"CONSTELLATION WINE COMPANY, INC."
12151277,2012-01-11,ALVORD,51230,LYON,43337,Captain Morgan Spiced Rum,1000,10.99,16.49,12,197.88,12.0,Direct liquor / alvord,RUM,1062400.0,DIAGEO AMERICAS
12151278,2012-01-11,ALVORD,51230,LYON,76487,Paramount Amaretto,1000,4.04,6.06,2,12.12,2.0,Direct liquor / alvord,LIQUEUR,1082900.0,LUXCO-ST LOUIS
12151279,2012-01-11,ALVORD,51230,LYON,11788,Black Velvet,1750,10.45,15.67,6,94.02,10.5,Direct liquor / alvord,CANADIAN WHISKY,1012100.0,"CONSTELLATION WINE COMPANY, INC."


### Oberservation:
it appears that this store might have closed. All 10 invoices were from the first month of all data collected and 'direct liquor alvord' doesn't exist on google. I won't delete it because the sales are real.

## COUNTY

In [23]:
# COUNTY
iowa['County'].value_counts()

POLK          4208232
LINN          1901012
SCOTT         1397198
BLACK HAWK    1281301
JOHNSON       1149129
               ...   
ADAMS           18664
WAYNE           17549
DAVIS           17149
RINGGOLD        15546
FREMONT          3567
Name: County, Length: 99, dtype: int64

## CATEGORY

In [24]:
#CATEGORY NAME
iowa['Category Name'].value_counts()

VODKA                            4648435
RUM                              2265155
CANADIAN WHISKY                  2197313
WHISKY                           2079346
SCHNAPPS                         1544325
LIQUEUR                          1442946
BOURBON WHISKY                   1400309
BRANDY                           1198734
FLAVORED VODKA                   1131515
TEQUILA                          1087101
FLAVORED WHISKY                   979396
GIN                               761938
COCKTAILS RTD                     655518
SCOTCH                            487698
FLAVORED RUM                      326540
SPECIAL PACKAGING                 322171
RYE WHISKY                        122681
NEUTRAL GRAIN                      52958
NEUTRAL GRAIN SPIRIT FLAVORED      42124
SPECIALTY                          26812
FLAVORED GIN                       11943
MEZCAL                              7544
IOWA LOCAL                          2937
HIGH BEER                             32
Name: Category N

### Observation:
1. Vodka is #1 by entries, proving that 'vodka pays the bills'
2. Rum is #2? This is surprising however It could be due to different styles
3. whisky, whisky but schnapps is 5th?

## ITEM DESCRIPTION

In [25]:
# item description
iowa['Item Description'].value_counts()

Black Velvet                          583803
Hawkeye Vodka                         507616
Fireball Cinnamon Whiskey             298683
Titos Handmade Vodka                  257122
Five O'clock Vodka                    254808
                                       ...  
New Amsterdam Citron Flavored              1
Old Pulteney 12yr Sing Malt Scotch         1
Mr Boston 5 Star Canadian Whisky           1
Don Cesar Pisco Puro                       1
Burnett's  Blackberry  Flav. Vodka         1
Name: Item Description, Length: 10083, dtype: int64

### Observation:
1. Black Velvet! Canadian whisky is the single most common item on invoices
2. the rest of the top are variations of vodka or whisky

In [26]:
# What is the most popular item by volume?
group = iowa.groupby('Item Description')['Volume Sold (Liters)'].agg('sum','sum').reset_index()
group.sort_values('Volume Sold (Liters)',ascending=False)

Unnamed: 0,Item Description,Volume Sold (Liters)
1207,Black Velvet,13178692.13
4633,Hawkeye Vodka,8586083.34
9413,Titos Handmade Vodka,5649855.32
3616,Fireball Cinnamon Whiskey,4425843.57
1940,Captain Morgan Spiced Rum,4318257.98
...,...,...
6192,Mark and Diggers Rye Apple Moonshine Mini,0.05
7945,Ryder Cup Lemonade Moonshine Mini,0.05
493,Appalachian Apple Pie Moonshine Mini,0.05
8869,Stoli Crushed Pineapple Mini,0.05


### Observation:
1. BLACK VELVET is a clear favorite. with 1.3 M liters of Black Velvet over ~9 years
2. Rum did return to the top.

## BOTTLE VOLUME

In [27]:
iowa['Bottle Volume (ml)'].value_counts()

750       10249803
1750       4533531
1000       2904417
375        2316718
200         811182
500         722400
50          648298
600         218660
100         149335
3000         94403
300          93446
800          21644
1200         19490
2400          7904
850           3192
400           2214
1850          1883
1800          1760
20            1582
900           1481
6000          1290
3500          1122
950            864
250            858
150            789
2250           663
1950           566
4800           476
1125           411
502            349
25             252
603            201
1500           190
5250           178
3600           161
301            130
4500            79
1420            63
2550            42
175             39
425             26
378000          24
31500           24
2000            23
2125            16
3900            15
0               10
140000           8
355              7
9000             3
1508             3
180000           2
189000      

### Observation
There are some weird bottle sizes. Anything that doesn't end in a 0 or 5 is odd. Also bottler size of 0 is odd

In [28]:
# examine a couple entries: 12, 15, 7580
iowa[(iowa['Bottle Volume (ml)'] == 12) | (iowa['Bottle Volume (ml)'] == 15) | (iowa['Bottle Volume (ml)'] == 7580) ]

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
2003400,2013-09-17,GRINNELL,50112,POWESHIEK,917836,Early Times Bourbon 354,7580,9.46,14.19,12,170.28,90.96,Mcnally's super valu,BOURBON WHISKY,1011250.0,BROWN-FORMAN CORPORATION
3963735,2014-05-12,WEST DES MOINES,50265,POLK,900673,Balvenie Trio Pack(12/14/17),15,3.55,5.33,32,170.56,0.48,Hy-vee #4 / wdm,SCOTCH,1012210.0,"WILLIAM GRANT AND SONS, INC."
16531109,2012-02-16,CORALVILLE,52241,JOHNSON,903158,Westerhall Plantation Rum,12,33.67,50.5,6,303.0,0.07,Bootleggin' barzini's fin,RUM,1062400.0,MHW LTD


In [29]:
#first odd volume size item
iowa[iowa['Item Description'] == 'Early Times Bourbon 354']

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
548321,2012-03-08,AMES,50010,STORY,17836,Early Times Bourbon 354,750,9.46,14.19,12,170.28,9.0,Cyclone liquors,BOURBON WHISKY,1011250.0,BROWN-FORMAN CORPORATION
1944213,2012-09-27,GRINNELL,50112,POWESHIEK,17836,Early Times Bourbon 354,750,9.46,14.19,12,170.28,9.0,Mcnally's super valu,BOURBON WHISKY,1011250.0,BROWN-FORMAN CORPORATION
2003400,2013-09-17,GRINNELL,50112,POWESHIEK,917836,Early Times Bourbon 354,7580,9.46,14.19,12,170.28,90.96,Mcnally's super valu,BOURBON WHISKY,1011250.0,BROWN-FORMAN CORPORATION
4962186,2013-06-03,DES MOINES,50312,POLK,17836,Early Times Bourbon 354,750,9.46,14.19,12,170.28,9.0,Ingersoll liquor and beverage,BOURBON WHISKY,1011250.0,BROWN-FORMAN CORPORATION


In [30]:
# after a little investigation 7580 is an error which makes sense. 
# 2gal of bourbon for 14 dollars would make the news

iowa['Bottle Volume (ml)'] = iowa['Bottle Volume (ml)'].replace(7580, 750)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iowa['Bottle Volume (ml)'] = iowa['Bottle Volume (ml)'].replace(7580, 750)


In [31]:
# investigating the 12ml bottle that costs $33.67, appears to be incorrect
iowa[iowa['State Bottle Cost'] == 33.67]

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
569239,2014-10-22,AMES,50010,STORY,902551,Kammer Williams Pear Brandy W/pear,750,33.67,50.51,12,606.12,9.0,Cyclone liquors,LIQUEUR,1082900.0,NICHE IMPORT CO.
5258099,2014-09-25,DES MOINES,50314,POLK,902551,Kammer Williams Pear Brandy W/pear,750,33.67,50.51,35,1767.85,26.25,"Central city liquor, inc.",LIQUEUR,1082900.0,NICHE IMPORT CO.
6981415,2015-06-23,CLIVE,50325,POLK,902551,Kammer Williams Pear Brandy W/pear,750,33.67,50.51,23,1161.73,17.25,World liquor & tobacco,LIQUEUR,1082900.0,NICHE IMPORT CO.
9056208,2015-08-06,CEDAR FALLS,50613,BLACK HAWK,902551,Kammer Williams Pear Brandy W/pear,750,33.67,50.51,12,606.12,9.0,Happy's wine & spirits,LIQUEUR,1082900.0,NICHE IMPORT CO.
10033158,2015-02-23,WATERLOO,50701,BLACK HAWK,902551,Kammer Williams Pear Brandy W/pear,750,33.67,50.51,60,3030.6,45.0,Hy-vee wine and spirits / waterloo,LIQUEUR,1082900.0,NICHE IMPORT CO.
10096127,2016-02-29,WATERLOO,50701,BLACK HAWK,902551,Kammer Williams Pear Brandy W/pear,750,33.67,50.51,24,1212.24,18.0,Hy-vee wine and spirits / waterloo,LIQUEUR,1082900.0,NICHE IMPORT CO.
16531109,2012-02-16,CORALVILLE,52241,JOHNSON,903158,Westerhall Plantation Rum,12,33.67,50.5,6,303.0,0.07,Bootleggin' barzini's fin,RUM,1062400.0,MHW LTD


Westerhall plantation rum is real. I am going to change it to 750. 750 is by far the most popular bottle size and the price is close

In [32]:
# correcting the plantation
iowa['Bottle Volume (ml)'] = iowa['Bottle Volume (ml)'].replace(12, 750)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iowa['Bottle Volume (ml)'] = iowa['Bottle Volume (ml)'].replace(12, 750)


In [33]:
# balvinie three pack is actually 15cl not 15ml. so we will change that to 150ml
iowa['Bottle Volume (ml)'] = iowa['Bottle Volume (ml)'].replace(15, 150)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iowa['Bottle Volume (ml)'] = iowa['Bottle Volume (ml)'].replace(15, 150)


In [34]:
# another singular bottle size, but a large one
iowa[iowa['Bottle Volume (ml)']== 225000]

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
18462369,2016-03-21,CEDAR RAPIDS,52402,LINN,946127,Cedar Ridge Barrel Aged Rum 5YR Reserve,225000,5500.0,8250.0,1,8250.0,225.0,Hy-vee food store #5 / cedar rapids,RUM,1062400.0,"CEDAR RIDGE VINEYARDS,LLC"


### This appears to be a whole barrel, and the price matches

In [35]:
# checking the next odd entry, 0
iowa[iowa['Bottle Volume (ml)']== 0]

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
823146,2013-12-11,AMES,50014,STORY,905230,Rondiaz DSS Lemon Rum,0,8.52,12.78,30,383.4,0.0,Aj's liquor / ames,SPECIAL PACKAGING,1701200.0,PRESTIGE WINE AND SPIRITS GROUP
3366231,2014-01-29,PELLA,50219,MARION,964590,Canadian Club Dock 57 Mini DNO,0,4.8,7.2,12,86.4,0.0,Hy-vee wine and spirits / pella,CANADIAN WHISKY,1012100.0,JIM BEAM BRANDS
6350915,2014-04-03,DES MOINES,50320,POLK,934962,Three Olives Orange Vodka,0,11.17,16.76,12,201.12,0.0,Hy-vee #3 / bdi / des moines,VODKA,1032230.0,PROXIMO
16005684,2014-07-24,IOWA CITY,52240,JOHNSON,934962,Three Olives Orange Vodka,0,11.17,16.76,48,804.48,0.0,Hy-vee wine and spirits / iowa city,VODKA,1032230.0,PROXIMO
16042455,2014-04-24,IOWA CITY,52240,JOHNSON,934962,Three Olives Orange Vodka,0,11.17,16.76,120,2011.2,0.0,Liquor downtown / iowa city,VODKA,1032230.0,PROXIMO
16067707,2014-09-17,IOWA CITY,52240,JOHNSON,934962,Three Olives Orange Vodka,0,11.17,16.76,24,402.24,0.0,Hy-vee wine and spirits / iowa city,VODKA,1032230.0,PROXIMO
16170005,2014-03-20,IOWA CITY,52240,JOHNSON,934962,Three Olives Orange Vodka,0,11.17,16.76,48,804.48,0.0,Hy-vee wine and spirits / iowa city,VODKA,1032230.0,PROXIMO
22588548,2015-05-06,DAVENPORT,52807,SCOTT,941063,Burnett's Pink Lemonade Vodka Mini,0,4.25,6.38,12,76.56,0.0,Hy-vee #3 food and drugstore,FLAVORED VODKA,1031200.0,HEAVEN HILL BRANDS
22595956,2014-05-07,DAVENPORT,52807,SCOTT,941063,Burnett's Pink Lemonade Vodka Mini,0,4.25,6.38,12,76.56,0.0,Hy-vee #3 food and drugstore,FLAVORED VODKA,1031200.0,HEAVEN HILL BRANDS
22693830,2015-11-18,DAVENPORT,52807,SCOTT,941063,Burnett's Pink Lemonade Vodka Mini,0,4.25,6.38,12,76.56,0.0,Hy-vee #3 food and drugstore,FLAVORED VODKA,1031200.0,HEAVEN HILL BRANDS


In [36]:
# there are 10 entries that have a volume of zero

iowa = iowa[iowa['Bottle Volume (ml)'] != 0]

In [37]:
# next bottle volume: 140000
iowa[iowa['Bottle Volume (ml)'] == 140000]

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
529833,2014-02-26,AMES,50010,STORY,901645,Absolut w/ Zing Zang Bloody Mary Mix,140000,1599.2,2398.8,1,2398.8,140.0,Sam's club 6568 / ames,SPECIAL PACKAGING,1701200.0,PERNOD RICARD USA/AUSTIN NICHOLS
4860315,2014-02-25,WINDSOR HEIGHTS,50311,POLK,901645,Absolut w/ Zing Zang Bloody Mary Mix,140000,1599.2,2398.8,1,2398.8,140.0,Sam's club 6344 / windsor heights,SPECIAL PACKAGING,1701200.0,PERNOD RICARD USA/AUSTIN NICHOLS
10263377,2014-03-03,WATERLOO,50702,BLACK HAWK,901645,Absolut w/ Zing Zang Bloody Mary Mix,140000,1599.2,2398.8,1,2398.8,140.0,Sam's club 6514 / waterloo,SPECIAL PACKAGING,1701200.0,PERNOD RICARD USA/AUSTIN NICHOLS
11884608,2014-02-26,SIOUX CITY,51106,WOODBURY,901645,Absolut w/ Zing Zang Bloody Mary Mix,140000,1599.2,2398.8,1,2398.8,140.0,Sam's club 6432 / sioux city,SPECIAL PACKAGING,1701200.0,PERNOD RICARD USA/AUSTIN NICHOLS
13350043,2014-03-03,COUNCIL BLUFFS,51501,POTTAWATTAMIE,901645,Absolut w/ Zing Zang Bloody Mary Mix,140000,1599.2,2398.8,1,2398.8,140.0,Sam's club 6472 / council bluffs,SPECIAL PACKAGING,1701200.0,PERNOD RICARD USA/AUSTIN NICHOLS
14809769,2014-02-26,DUBUQUE,52002,DUBUQUE,901645,Absolut w/ Zing Zang Bloody Mary Mix,140000,1599.2,2398.8,1,2398.8,140.0,Sam's club 4973 / dubuque,SPECIAL PACKAGING,1701200.0,PERNOD RICARD USA/AUSTIN NICHOLS
18180186,2014-03-03,CEDAR RAPIDS,52402,LINN,901645,Absolut w/ Zing Zang Bloody Mary Mix,140000,1599.2,2398.8,2,4797.6,280.0,Sam's club 8162 / cedar rapids,SPECIAL PACKAGING,1701200.0,PERNOD RICARD USA/AUSTIN NICHOLS
22652102,2014-02-26,DAVENPORT,52807,SCOTT,901645,Absolut w/ Zing Zang Bloody Mary Mix,140000,1599.2,2398.8,2,4797.6,280.0,Sam's club 8238 / davenport,SPECIAL PACKAGING,1701200.0,PERNOD RICARD USA/AUSTIN NICHOLS


#### 140000 ml appears to be a real entry. they are all the same product and volume and price seem appropriate

In [38]:
# next bottle volume: 9000
iowa[iowa['Bottle Volume (ml)'] == 9000]

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
9304622,2016-04-28,CEDAR FALLS,50613,BLACK HAWK,935961,Pearl Red Berry,9000,7.0,10.5,60,630.0,540.0,Hillstreet news and tobacco,VODKA,1032230.0,LUXCO-ST LOUIS
10673265,2016-04-28,WATERLOO,50703,BLACK HAWK,935961,Pearl Red Berry,9000,7.0,10.5,120,1260.0,1080.0,Sycamore convenience,VODKA,1032230.0,LUXCO-ST LOUIS
16352572,2016-04-25,IOWA CITY,52240,JOHNSON,935961,Pearl Red Berry,9000,7.0,10.5,12,126.0,108.0,Hy-vee wine and spirits / iowa city,VODKA,1032230.0,LUXCO-ST LOUIS


#### 9000 seems off. mainly due to 9L of vodka being sold for $10.50

In [39]:
# exploring the 9000ml item
iowa[(iowa['Item Description'] == 'Pearl Red Berry') & (iowa['State Bottle Cost'] == 7.0)]

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
6258276,2016-12-05,DES MOINES,50320,POLK,935961,Pearl Red Berry,750,7.0,10.5,12,126.0,9.0,Hy-vee #3 / bdi / des moines,VODKA,1032230.0,LUXCO-ST LOUIS
6285029,2016-09-19,DES MOINES,50320,POLK,935961,Pearl Red Berry,750,7.0,10.5,12,126.0,9.0,Hy-vee #3 / bdi / des moines,VODKA,1032230.0,LUXCO-ST LOUIS
6292939,2018-01-29,DES MOINES,50320,POLK,935961,Pearl Red Berry,750,7.0,10.5,12,126.0,9.0,Hy-vee #3 / bdi / des moines,VODKA,1032230.0,LUXCO-ST LOUIS
8899547,2018-12-28,CEDAR FALLS,50613,BLACK HAWK,935961,Pearl Red Berry,750,7.0,10.5,120,1260.0,90.0,Hillstreet news and tobacco,VODKA,1032230.0,LUXCO-ST LOUIS
8938552,2017-05-11,CEDAR FALLS,50613,BLACK HAWK,935961,Pearl Red Berry,750,7.0,10.5,60,630.0,45.0,Hillstreet news and tobacco,VODKA,1032230.0,LUXCO-ST LOUIS
8981810,2018-03-08,CEDAR FALLS,50613,BLACK HAWK,935961,Pearl Red Berry,750,7.0,10.5,60,630.0,45.0,Hillstreet news and tobacco,VODKA,1032230.0,LUXCO-ST LOUIS
8992272,2018-05-31,CEDAR FALLS,50613,BLACK HAWK,935961,Pearl Red Berry,750,7.0,10.5,120,1260.0,90.0,Hillstreet news and tobacco,VODKA,1032230.0,LUXCO-ST LOUIS
9304622,2016-04-28,CEDAR FALLS,50613,BLACK HAWK,935961,Pearl Red Berry,9000,7.0,10.5,60,630.0,540.0,Hillstreet news and tobacco,VODKA,1032230.0,LUXCO-ST LOUIS
10467820,2017-05-11,WATERLOO,50703,BLACK HAWK,935961,Pearl Red Berry,750,7.0,10.5,60,630.0,45.0,Sycamore convenience,VODKA,1032230.0,LUXCO-ST LOUIS
10673265,2016-04-28,WATERLOO,50703,BLACK HAWK,935961,Pearl Red Berry,9000,7.0,10.5,120,1260.0,1080.0,Sycamore convenience,VODKA,1032230.0,LUXCO-ST LOUIS


In [40]:
# 9000 is supposed to be 750
iowa['Bottle Volume (ml)'] = iowa['Bottle Volume (ml)'].replace(9000, 750)

In [41]:
# Bottle volume: 1508
iowa[iowa['Bottle Volume (ml)'] == 1508]

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
16125252,2012-10-01,IOWA CITY,52240,JOHNSON,35261,Purus Organic Vodka Mini,1508,4.4,6.6,132,871.2,199.06,Hy-vee wine and spirits / iowa city,VODKA,1032230.0,ANHEUSER-BUSCH/LONGTAIL LIBATIONS
17186553,2012-10-02,MONTICELLO,52310,JONES,35261,Purus Organic Vodka Mini,1508,4.4,6.6,8,52.8,12.06,Fareway stores #840 / monticello,VODKA,1032230.0,ANHEUSER-BUSCH/LONGTAIL LIBATIONS
20254889,2012-10-02,FORT MADISON,52627,LEE,35261,Purus Organic Vodka Mini,1508,4.4,6.6,4,26.4,6.03,Quicker liquor store,VODKA,1032230.0,ANHEUSER-BUSCH/LONGTAIL LIBATIONS


In [42]:
# I can tell that something labeled mini is wrong at 1.5L
iowa[(iowa['Item Description'] == 'Purus Organic Vodka Mini')]

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
16125252,2012-10-01,IOWA CITY,52240,JOHNSON,35261,Purus Organic Vodka Mini,1508,4.4,6.6,132,871.2,199.06,Hy-vee wine and spirits / iowa city,VODKA,1032230.0,ANHEUSER-BUSCH/LONGTAIL LIBATIONS
17186553,2012-10-02,MONTICELLO,52310,JONES,35261,Purus Organic Vodka Mini,1508,4.4,6.6,8,52.8,12.06,Fareway stores #840 / monticello,VODKA,1032230.0,ANHEUSER-BUSCH/LONGTAIL LIBATIONS
20254889,2012-10-02,FORT MADISON,52627,LEE,35261,Purus Organic Vodka Mini,1508,4.4,6.6,4,26.4,6.03,Quicker liquor store,VODKA,1032230.0,ANHEUSER-BUSCH/LONGTAIL LIBATIONS


### Thoughts
These are the only entries for Purus Vodka Mini. So I do not have a reference point. I am just going to change them to the standard mini size 50ml, it will be close. The price of $4.4 assures me it is close.

In [43]:
# make those changes
iowa['Bottle Volume (ml)'] = iowa['Bottle Volume (ml)'].replace(1508, 50)

In [44]:
# Bottle volume: 301
iowa[iowa['Bottle Volume (ml)'] == 301]

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
42549,2012-11-05,ALDEN,50006,HARDIN,2928,Bacardi Multi-Flavor Mini 6 Pack,301,3.33,5.0,10,50.0,3.01,Shamrock spirits,SPECIAL PACKAGING,1701200.0,BACARDI USA INC
131187,2012-04-12,ALTOONA,50009,POLK,2928,Bacardi Multi-Flavor Mini 6 Pack,301,3.33,5.0,20,100.0,6.02,Hy-vee food store / altoona,SPECIAL PACKAGING,1701200.0,BACARDI USA INC
137107,2012-02-23,ALTOONA,50009,POLK,2928,Bacardi Multi-Flavor Mini 6 Pack,301,3.33,5.0,1,5.0,0.30,Hy-vee food store / altoona,SPECIAL PACKAGING,1701200.0,BACARDI USA INC
397583,2012-11-12,AMES,50010,STORY,2928,Bacardi Multi-Flavor Mini 6 Pack,301,3.33,5.0,20,100.0,6.02,Hy-vee food store #1 / ames,SPECIAL PACKAGING,1701200.0,BACARDI USA INC
454710,2012-07-19,AMES,50010,STORY,2928,Bacardi Multi-Flavor Mini 6 Pack,301,3.33,5.0,20,100.0,6.02,Cyclone liquors,SPECIAL PACKAGING,1701200.0,BACARDI USA INC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21438615,2012-11-01,LE CLAIRE,52753,SCOTT,2928,Bacardi Multi-Flavor Mini 6 Pack,301,3.33,5.0,2,10.0,0.60,Slagle's grocery / le claire,SPECIAL PACKAGING,1701200.0,BACARDI USA INC
21439949,2012-06-28,LE CLAIRE,52753,SCOTT,2928,Bacardi Multi-Flavor Mini 6 Pack,301,3.33,5.0,1,5.0,0.30,Slagle's grocery / le claire,SPECIAL PACKAGING,1701200.0,BACARDI USA INC
21445564,2012-05-31,LE CLAIRE,52753,SCOTT,2928,Bacardi Multi-Flavor Mini 6 Pack,301,3.33,5.0,2,10.0,0.60,Slagle's grocery / le claire,SPECIAL PACKAGING,1701200.0,BACARDI USA INC
22147102,2012-08-16,DAVENPORT,52804,SCOTT,2928,Bacardi Multi-Flavor Mini 6 Pack,301,3.33,5.0,20,100.0,6.02,Hy-vee wine and spirits #2,SPECIAL PACKAGING,1701200.0,BACARDI USA INC


In [45]:
# quick swap from 301 to 300
iowa['Bottle Volume (ml)'] = iowa['Bottle Volume (ml)'].replace(301, 300)

In [46]:
# Now all the bottle sizes should be correct
iowa['Bottle Volume (ml)'].value_counts()

750       10249808
1750       4533531
1000       2904417
375        2316718
200         811182
500         722400
50          648301
600         218660
100         149335
3000         94403
300          93576
800          21644
1200         19490
2400          7904
850           3192
400           2214
1850          1883
1800          1760
20            1582
900           1481
6000          1290
3500          1122
950            864
250            858
150            790
2250           663
1950           566
4800           476
1125           411
502            349
25             252
603            201
1500           190
5250           178
3600           161
4500            79
1420            63
2550            42
175             39
425             26
378000          24
31500           24
2000            23
2125            16
3900            15
140000           8
355              7
180000           2
189000           2
225000           1
Name: Bottle Volume (ml), dtype: int64

In [47]:
# two more! 603, 502
# Bottle volume: 603
iowa[iowa['Bottle Volume (ml)'] == 603]

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
137508,2015-09-17,ALTOONA,50009,POLK,65121,Grand Marnier Cordon Rouge Mini,603,22.68,34.02,1,34.02,0.60,Super stop 2 / altoona,LIQUEUR,1082900.0,"MOET HENNESSY USA, INC."
170690,2016-03-16,ALTOONA,50009,POLK,65121,Grand Marnier Cordon Rouge Mini,603,22.68,34.02,20,680.40,12.06,Fareway stores #925 / altoona,LIQUEUR,1082900.0,"MOET HENNESSY USA, INC."
415695,2015-08-12,AMES,50010,STORY,65121,Grand Marnier Cordon Rouge Mini,603,22.68,34.02,1,34.02,0.60,Hy-vee drugstore #2 / ames,LIQUEUR,1082900.0,"MOET HENNESSY USA, INC."
421909,2015-08-26,AMES,50010,STORY,65121,Grand Marnier Cordon Rouge Mini,603,22.68,34.02,1,34.02,0.60,Hy-vee #2 / ames,LIQUEUR,1082900.0,"MOET HENNESSY USA, INC."
512730,2015-10-21,AMES,50010,STORY,65121,Grand Marnier Cordon Rouge Mini,603,22.68,34.02,1,34.02,0.60,Hy-vee #2 / ames,LIQUEUR,1082900.0,"MOET HENNESSY USA, INC."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22591514,2015-08-05,DAVENPORT,52807,SCOTT,65121,Grand Marnier Cordon Rouge Mini,603,22.68,34.02,1,34.02,0.60,Hy-vee #3 food and drugstore,LIQUEUR,1082900.0,"MOET HENNESSY USA, INC."
22601856,2015-10-14,DAVENPORT,52807,SCOTT,65121,Grand Marnier Cordon Rouge Mini,603,22.68,34.02,1,34.02,0.60,Hy-vee #3 food and drugstore,LIQUEUR,1082900.0,"MOET HENNESSY USA, INC."
22648332,2015-10-07,DAVENPORT,52807,SCOTT,65121,Grand Marnier Cordon Rouge Mini,603,22.68,34.02,1,34.02,0.60,Hy-vee #3 food and drugstore,LIQUEUR,1082900.0,"MOET HENNESSY USA, INC."
22709951,2015-12-29,DAVENPORT,52807,SCOTT,65121,Grand Marnier Cordon Rouge Mini,603,22.68,34.02,1,34.02,0.60,Hy-vee #3 food and drugstore,LIQUEUR,1082900.0,"MOET HENNESSY USA, INC."


#### 603 that looks right

In [48]:
# Bottle volume: 502
iowa[iowa['Bottle Volume (ml)'] == 502]

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
141228,2012-04-26,ALTOONA,50009,POLK,42310,Malibu Red PET Mini,502,3.3,4.95,24,118.80,12.05,Hy-vee food store / altoona,RUM,1062400.0,PERNOD RICARD USA/AUSTIN NICHOLS
142676,2012-04-12,ALTOONA,50009,POLK,42310,Malibu Red PET Mini,502,3.3,4.95,12,59.40,6.02,Hy-vee food store / altoona,RUM,1062400.0,PERNOD RICARD USA/AUSTIN NICHOLS
819059,2012-06-14,AMES,50014,STORY,42310,Malibu Red PET Mini,502,3.3,4.95,2,9.90,1.00,Aj's liquor / ames,RUM,1062400.0,PERNOD RICARD USA/AUSTIN NICHOLS
831071,2012-03-29,AMES,50014,STORY,42310,Malibu Red PET Mini,502,3.3,4.95,1,4.95,0.50,Aj's liquor / ames,RUM,1062400.0,PERNOD RICARD USA/AUSTIN NICHOLS
959933,2012-09-12,ANKENY,50021,POLK,42310,Malibu Red PET Mini,502,3.3,4.95,1,4.95,0.50,Hy-vee drugstore / ankeny,RUM,1062400.0,PERNOD RICARD USA/AUSTIN NICHOLS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22638603,2012-04-03,DAVENPORT,52807,SCOTT,42310,Malibu Red PET Mini,502,3.3,4.95,1,4.95,0.50,Hy-vee #3 food and drugstore,RUM,1062400.0,PERNOD RICARD USA/AUSTIN NICHOLS
22645302,2012-08-07,DAVENPORT,52807,SCOTT,42310,Malibu Red PET Mini,502,3.3,4.95,1,4.95,0.50,Hy-vee #3 food and drugstore,RUM,1062400.0,PERNOD RICARD USA/AUSTIN NICHOLS
22650351,2012-09-18,DAVENPORT,52807,SCOTT,42310,Malibu Red PET Mini,502,3.3,4.95,3,14.85,1.51,Hy-vee #3 food and drugstore,RUM,1062400.0,PERNOD RICARD USA/AUSTIN NICHOLS
22667910,2012-06-05,DAVENPORT,52807,SCOTT,42310,Malibu Red PET Mini,502,3.3,4.95,3,14.85,1.51,Hy-vee #3 food and drugstore,RUM,1062400.0,PERNOD RICARD USA/AUSTIN NICHOLS


In [49]:
# 502 is supposed to be 5cl
iowa['Bottle Volume (ml)'] = iowa['Bottle Volume (ml)'].replace(502, 50)

## State Bottle Retail




In [50]:
## state bottle retail
iowa['State Bottle Retail'].describe()

count    2.281221e+07
mean     1.521282e+01
std      1.617233e+01
min      2.900000e-01
25%      8.270000e+00
50%      1.238000e+01
75%      1.874000e+01
max      1.152000e+04
Name: State Bottle Retail, dtype: float64

### Observations
1. There is no zero's

### Thoughts
The way I have to check for errors are if retail is less than cost, however things can be sold at a loss.

In [51]:
# How many times were items sold for a loss?
iowa[iowa['State Bottle Retail'] < iowa['State Bottle Cost']]

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
456653,2012-02-16,AMES,50010,STORY,35109,Russian Standard Original Vodka,750,16.33,14.83,2,29.66,1.50,Hy-vee #2 / ames,VODKA,1032230.0,"RUSSIAN STANDARD VODKA, USA"
459762,2012-02-02,AMES,50010,STORY,35109,Russian Standard Original Vodka,750,16.33,14.83,4,59.32,3.00,Hy-vee #2 / ames,VODKA,1032230.0,"RUSSIAN STANDARD VODKA, USA"
535443,2012-02-09,AMES,50010,STORY,35109,Russian Standard Original Vodka,750,16.33,14.83,2,29.66,1.50,Hy-vee #2 / ames,VODKA,1032230.0,"RUSSIAN STANDARD VODKA, USA"
1587503,2019-12-13,CARLISLE,50047,WARREN,647,Kahlua w/Absolut 50ml & Glass,800,10.99,7.50,6,98.94,4.80,Fareway stores #147 / carlisle,SPECIAL PACKAGING,1701200.0,PERNOD RICARD USA/AUSTIN NICHOLS
1587877,2020-12-04,CARLISLE,50047,WARREN,12865,Rich & Rare Apple Mini,50,60.00,7.74,1,9.00,0.05,Fareway stores #147 / carlisle,CANADIAN WHISKY,1012100.0,SAZERAC COMPANY INC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19099579,2012-02-02,CEDAR RAPIDS,52405,LINN,35109,Russian Standard Original Vodka,750,16.33,14.83,2,29.66,1.50,Hy-vee food store #1 / cedar rapids,VODKA,1032230.0,"RUSSIAN STANDARD VODKA, USA"
19860559,2012-02-15,OSKALOOSA,52577,BUCHANAN,35109,Russian Standard Original Vodka,750,16.33,14.83,2,29.66,1.50,C b liquor,VODKA,1032230.0,"RUSSIAN STANDARD VODKA, USA"
20291426,2019-11-06,FORT MADISON,52627,LEE,100762,Bird Dog Peach w/2-50mls,850,10.00,9.00,6,90.00,5.10,Quicker liquor store,SPECIAL PACKAGING,1701200.0,WESTERN SPIRITS BEVERAGE CO. LLC
20869124,2012-02-14,BETTENDORF,52722,SCOTT,35109,Russian Standard Original Vodka,750,16.33,14.83,2,29.66,1.50,Hy-vee wine and spirits / bettendorf,VODKA,1032230.0,"RUSSIAN STANDARD VODKA, USA"


In [63]:
# some deeper filtering
iowa[(iowa['State Bottle Retail'] < iowa['State Bottle Cost']) & (iowa['Item Description'] != 'Russian Standard Original Vodka')]

Unnamed: 0,Date,City,Zip Code,County,Category Name,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category,Vendor Name
1585794,2019-12-13,CARLISLE,50047,WARREN,SPECIAL PACKAGING,647,Kahlua w/Absolut 50ml & Glass,800,10.99,7.5,6,98.94,4.8,Fareway stores #147 / carlisle,1701200.0,PERNOD RICARD USA
1586167,2020-12-04,CARLISLE,50047,WARREN,CANADIAN WHISKY,12865,Rich & Rare Apple Mini,50,60.0,7.74,1,9.0,0.05,Fareway stores #147 / carlisle,1012100.0,SAZERAC CO
2431210,2020-12-03,JOHNSTON,50131,POLK,CANADIAN WHISKY,12865,Rich & Rare Apple Mini,50,60.0,7.74,3,27.0,0.15,Hy-vee food store / johnston,1012100.0,SAZERAC CO
2818905,2019-12-17,MARSHALLTOWN,50158,MARSHALL,SPECIAL PACKAGING,647,Kahlua w/Absolut 50ml & Glass,800,10.99,9.74,6,98.94,4.8,Marshall beer wine spirits,1701200.0,PERNOD RICARD USA
2937011,2020-12-04,MONTEZUMA,50171,POWESHIEK,CANADIAN WHISKY,12865,Rich & Rare Apple Mini,50,60.0,7.74,1,9.0,0.05,Monte spirits,1012100.0,SAZERAC CO
3141897,2019-11-12,NEWTON,50208,JASPER,SPECIAL PACKAGING,100762,Bird Dog Peach w/2-50mls,850,10.0,8.31,6,90.0,5.1,Pit stop liquors / newton,1701200.0,WESTERN SPIRIT BEVERAGE
3445239,2019-12-13,PERRY,50220,DALLAS,SPECIAL PACKAGING,647,Kahlua w/Absolut 50ml & Glass,800,10.99,10.67,6,98.94,4.8,Bluejay market,1701200.0,PERNOD RICARD USA
3982488,2019-11-06,WEST DES MOINES,50265,POLK,SPECIAL PACKAGING,100762,Bird Dog Peach w/2-50mls,850,10.0,6.0,6,90.0,5.1,Hy-vee #4 / wdm,1701200.0,WESTERN SPIRIT BEVERAGE
4005733,2021-02-11,WEST DES MOINES,50265,POLK,CANADIAN WHISKY,12865,Rich & Rare Apple Mini,50,60.0,7.74,20,154.8,1.0,Casey's general store #3098 / wdm,1012100.0,SAZERAC CO
5390543,2021-02-03,DES MOINES,50314,POLK,CANADIAN WHISKY,12865,Rich & Rare Apple Mini,50,60.0,7.74,10,77.4,0.5,"Central city liquor, inc.",1012100.0,SAZERAC CO


### Observations
1. The products left over some bird dog peach which definitely has the wrong sales amount,but that will be fixed later
2. Rich & Rare Apple Mini can't be $60
3. Corazon Reposado Buy the Barrel has a cost of 262.46 and a sale of 20.63
4. the kahlua / vodka kit seems to also have incorrect sale prices
5. they are all 98.94 for 6 packs but the retail price changes by invoice
6. it seems like these promotional packaged booze lose money

In [52]:
# I don't really know the R & R mini should be costed at. a google search says in texas they go for $1.09
# which is much less than the $7.74
iowa[iowa['Item Description'] == 'Rich & Rare Apple Mini']

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
61263,2016-11-16,ALTOONA,50009,POLK,12865,Rich & Rare Apple Mini,600,6.00,9.00,5,45.00,3.00,Fareway stores #925 / altoona,CANADIAN WHISKY,1012100.0,SAZERAC COMPANY INC
184358,2021-11-11,ALTOONA,50009,POLK,12865,Rich & Rare Apple Mini,50,5.16,7.74,10,77.40,0.50,Hy-vee food store / altoona,CANADIAN WHISKY,1012100.0,SAZERAC COMPANY INC
192364,2021-12-23,ALTOONA,50009,POLK,12865,Rich & Rare Apple Mini,50,5.16,7.74,1,7.74,0.05,Fareway stores #925 / altoona,CANADIAN WHISKY,1012100.0,SAZERAC COMPANY INC
232542,2016-05-25,ALTOONA,50009,POLK,12865,Rich & Rare Apple Mini,600,6.00,9.00,5,45.00,3.00,Fareway stores #925 / altoona,CANADIAN WHISKY,1012100.0,SAZERAC COMPANY INC
293041,2017-01-18,AMES,50010,STORY,12865,Rich & Rare Apple Mini,600,6.00,9.00,5,45.00,3.00,Hy-vee drugstore #2 / ames,CANADIAN WHISKY,1012100.0,SAZERAC COMPANY INC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22082864,2017-03-08,DAVENPORT,52804,SCOTT,12865,Rich & Rare Apple Mini,600,6.00,9.00,1,9.00,0.60,Hy-vee wine and spirits #2,CANADIAN WHISKY,1012100.0,SAZERAC COMPANY INC
22114007,2018-04-04,DAVENPORT,52804,SCOTT,12865,Rich & Rare Apple Mini,600,6.00,9.00,10,90.00,6.00,Famous liquors,CANADIAN WHISKY,1012100.0,SAZERAC COMPANY INC
22291160,2021-10-26,DAVENPORT,52804,SCOTT,12865,Rich & Rare Apple Mini,50,5.16,7.74,1,7.74,0.05,Hy-vee #5 / davenport,CANADIAN WHISKY,1012100.0,SAZERAC COMPANY INC
22348696,2017-03-16,DAVENPORT,52806,SCOTT,12865,Rich & Rare Apple Mini,600,6.00,9.00,3,27.00,1.80,The liquor stop llc,CANADIAN WHISKY,1012100.0,SAZERAC COMPANY INC


In [53]:
# it would appear $5.16 is the correct answer. 

iowa[(iowa['State Bottle Cost'] == 60.00) & (iowa['Item Description'] == 'Rich & Rare Apple Mini')] = iowa[(iowa['State Bottle Cost'] == 60.00) & (iowa['Item Description'] == 'Rich & Rare Apple Mini')].replace(60.0,5.16)

In [54]:
# 'Corazon Reposado Buy the Barrel'
iowa[iowa['Item Description'] == 'Corazon Reposado Buy the Barrel']


Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
5411033,2021-05-10,DES MOINES,50314,POLK,989235,Corazon Reposado Buy the Barrel,750,11.0,20.63,60,1237.8,45.0,Central city 2,TEQUILA,1022200.0,GEMINI SPIRITS
7297173,2021-05-13,MASON CITY,50401,CERRO GORDO,989235,Corazon Reposado Buy the Barrel,750,262.46,20.63,12,247.56,9.0,Hy-vee food store #1 / mason city,TEQUILA,1022200.0,GEMINI SPIRITS
8075541,2021-05-14,FORT DODGE,50501,WEBSTER,989235,Corazon Reposado Buy the Barrel,750,7.47,20.63,12,247.56,9.0,Hy-vee fort dodge wine and spirits,TEQUILA,1022200.0,GEMINI SPIRITS
9231477,2021-06-10,CEDAR FALLS,50613,BLACK HAWK,989235,Corazon Reposado Buy the Barrel,750,13.75,20.63,6,123.78,4.5,Hillstreet news and tobacco,TEQUILA,1022200.0,SAZERAC COMPANY INC
11937683,2021-05-13,SIOUX CITY,51106,WOODBURY,989235,Corazon Reposado Buy the Barrel,750,9.06,20.63,6,123.78,4.5,Hy-vee food store / sioux city,TEQUILA,1022200.0,GEMINI SPIRITS
13833251,2021-05-12,COUNCIL BLUFFS,51503,POTTAWATTAMIE,989235,Corazon Reposado Buy the Barrel,750,11.07,20.63,12,247.56,9.0,Hy-vee food store #2 / council bluffs,TEQUILA,1022200.0,GEMINI SPIRITS
14689257,2021-05-14,DUBUQUE,52001,DUBUQUE,989235,Corazon Reposado Buy the Barrel,750,6.0,20.63,12,247.56,9.0,Hy-vee food store / dubuque,TEQUILA,1022200.0,GEMINI SPIRITS
17930650,2021-05-10,CEDAR RAPIDS,52401,LINN,989235,Corazon Reposado Buy the Barrel,750,5.94,20.63,48,990.24,36.0,Benz distributing,TEQUILA,1022200.0,GEMINI SPIRITS
20433435,2021-05-13,KEOKUK,52632,LEE,989235,Corazon Reposado Buy the Barrel,750,15.33,20.63,6,123.78,4.5,Keokuk spirits,TEQUILA,1022200.0,GEMINI SPIRITS
20913610,2021-05-10,BETTENDORF,52722,SCOTT,989235,Corazon Reposado Buy the Barrel,750,8.74,20.63,24,495.12,18.0,Lot-a-spirits,TEQUILA,1022200.0,GEMINI SPIRITS


### Interesting result. 
the bottle cost for 'Corazon Reposado Buy the Barrel' is all over the place, however mostly great returns
I am going to replace the 262... with a value from the same invoice date



In [55]:
iowa[(iowa['State Bottle Cost'] == 262.46) & (iowa['Item Description'] == 'Corazon Reposado Buy the Barrel')] = iowa[(iowa['State Bottle Cost'] == 262.46) & (iowa['Item Description'] == 'Corazon Reposado Buy the Barrel')].replace(262.46, 15.33)

## BOTTLE SOLD

In [57]:
iowa['Bottles Sold'].value_counts()

12      6037635
6       4601786
2       3011982
1       2548857
3       2256427
         ...   
469           1
2352          1
538           1
337           1
193           1
Name: Bottles Sold, Length: 652, dtype: int64

In [58]:
iowa[iowa['Bottles Sold'] == 0]

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
1743524,2018-06-05,GRIMES,50111,POLK,5153,Glenlivet Founders Reserve,750,19.98,29.97,0,359.64,0.06,Southern glazers wine & spirits of iowa,WHISKY,1012400.0,PERNOD RICARD USA/AUSTIN NICHOLS
2051601,2017-10-31,INDIANOLA,50125,WARREN,16416,Old Grand-Dad 100 Prf Bond Bourbon 4 Yr,750,13.12,19.68,0,236.16,0.68,Indy 66 #928 / indianola,RYE WHISKY,1081500.0,JIM BEAM BRANDS
3399453,2016-08-31,PELLA,50219,MARION,44217,Barton Rum Light,1000,4.0,6.0,0,72.0,0.5,Hy-vee wine and spirits / pella,RUM,1062400.0,SAZERAC NORTH AMERICA
4108869,2016-10-06,WEST DES MOINES,50266,DALLAS,987514,Kirkland Signature French Vodka,1750,15.12,22.68,0,0.0,0.21,Costco wholesale #788 / wdm,VODKA,1032230.0,MISA IMPORTS INC
8756845,2016-08-30,STORM LAKE,50588,BUENA VISTA,69636,Dr. Mcgillicuddy's Cherry Schnapps,750,8.66,12.99,0,155.88,0.06,Hy-vee wine and spirits / storm lake,SCHNAPPS,1082390.0,SAZERAC COMPANY INC
10679449,2016-08-30,WATERLOO,50703,BLACK HAWK,86673,Jack Daniel's Tennessee Honey,1750,30.68,46.02,0,276.12,0.29,Prime mart / broadway waterloo,WHISKY,1012400.0,BROWN-FORMAN CORPORATION
21421024,2017-04-12,LE CLAIRE,52753,SCOTT,76658,Pear Necessity Liqueur,375,9.0,13.5,0,162.0,0.03,Mississippi river distilling co,SCHNAPPS,1082390.0,MISSISSIPPI RIVER DISTILLING CO.
21740745,2016-08-29,MUSCATINE,52761,MUSCATINE,35109,Russian Standard,750,10.0,15.0,0,180.0,0.56,Hy-vee food store / muscatine,VODKA,1032230.0,"RUSSIAN STANDARD VODKA, USA"


### Observations
1. multiple errors here when bottles sold are 0
2. every bottles sold, appears to be actually 12, this is based on sale dollars and retail sale price.
3. the exception is costco vodka. which has no bottles sold and no sale price. I might have to delete that entry

In [59]:
# exploring costco
iowa[iowa['Item Description'] == 'Kirkland Signature French Vodka']

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
4108595,2018-10-04,WEST DES MOINES,50266,DALLAS,987514,Kirkland Signature French Vodka,1750,14.48,21.72,576,12510.72,1008.00,Costco wholesale #788 / wdm,VODKA,1032230.0,MISA IMPORTS INC
4108869,2016-10-06,WEST DES MOINES,50266,DALLAS,987514,Kirkland Signature French Vodka,1750,15.12,22.68,0,0.00,0.21,Costco wholesale #788 / wdm,VODKA,1032230.0,MISA IMPORTS INC
4114370,2018-12-13,WEST DES MOINES,50266,DALLAS,987514,Kirkland Signature French Vodka,1750,14.48,21.72,288,6255.36,504.00,Costco wholesale #788 / wdm,VODKA,1032230.0,MISA IMPORTS INC
4117365,2019-01-24,WEST DES MOINES,50266,DALLAS,987514,Kirkland Signature French Vodka,1750,14.48,21.72,1728,37532.16,3024.00,Costco wholesale #788 / wdm,VODKA,1032230.0,MISA IMPORTS INC
4120565,2018-08-02,WEST DES MOINES,50266,DALLAS,987514,Kirkland Signature French Vodka,1750,14.48,21.72,2010,43657.20,3517.50,Costco wholesale #788 / wdm,VODKA,1032230.0,MISA IMPORTS INC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22765165,2021-09-28,DAVENPORT,52807,SCOTT,987514,Kirkland Signature French Vodka,1750,11.69,17.54,612,10734.48,1071.00,Costco wholesale #1325 / davenport,VODKA,1032230.0,MISA IMPORTS INC
22773311,2020-11-03,DAVENPORT,52807,SCOTT,987514,Kirkland Signature French Vodka,1750,14.47,21.71,576,12504.96,1008.00,Costco wholesale #1325 / davenport,VODKA,1032230.0,MISA IMPORTS INC
22780545,2020-11-17,DAVENPORT,52807,SCOTT,987514,Kirkland Signature French Vodka,1750,14.47,21.71,288,6252.48,504.00,Costco wholesale #1325 / davenport,VODKA,1032230.0,MISA IMPORTS INC
22791790,2020-06-09,DAVENPORT,52807,SCOTT,987514,Kirkland Signature French Vodka,1750,14.47,21.71,288,6252.48,504.00,Costco wholesale #1325 / davenport,VODKA,1032230.0,MISA IMPORTS INC


In [60]:
## I am going to delete that entry because Costco buys so much vodka at once that I can not possibly predict
# the correct value
iowa[(iowa['Item Description'] == 'Kirkland Signature French Vodka') & (iowa['Bottles Sold'] == 0)]

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
4108869,2016-10-06,WEST DES MOINES,50266,DALLAS,987514,Kirkland Signature French Vodka,1750,15.12,22.68,0,0.0,0.21,Costco wholesale #788 / wdm,VODKA,1032230.0,MISA IMPORTS INC


In [61]:
# drop that costco index
iowa = iowa.drop(4104807)

In [62]:
# now we want to make all these 0's 12's, and it is easy because the only entries with 0's are the bottle sold column
iowa['Bottles Sold'] = iowa['Bottles Sold'].replace(0,12)

## Sale (Dollars) and Volume Sold (Liters)

In [63]:
# These columns contain some errors and will be updated through math. 

# sale is: bottles sold * retail
# volume sold is: bottles sold * (bottle volume/1000)

# 1000 is for unit conversion
iowa['Sale (Dollars)'] = iowa['Bottles Sold'] * iowa['State Bottle Retail']
iowa['Volume Sold (Liters)'] = iowa['Bottles Sold'] * iowa['Bottle Volume (ml)'] / 1000

In [64]:
iowa.head()

Unnamed: 0,Date,City,Zip Code,County,Item Number,Item Description,Bottle Volume (ml),State Bottle Cost,State Bottle Retail,Bottles Sold,Sale (Dollars),Volume Sold (Liters),Store Name,Category Name,Category,Vendor Name
0,2018-09-24,ADAIR,50002,ADAIR,86691,Jack Daniels Tennessee Fire Mini,500,9.06,13.59,3,40.77,1.5,Casey's general store #2521 / adair,WHISKY,1012400.0,BROWN-FORMAN CORPORATION
1,2018-09-24,ADAIR,50002,ADAIR,25606,Seagrams 7 Crown Bl Whiskey,750,7.0,10.5,12,126.0,9.0,Kum & go #76 / adair,WHISKY,1012400.0,DIAGEO AMERICAS
2,2018-09-24,ADAIR,50002,ADAIR,37994,Smirnoff 80prf,375,4.75,7.13,6,42.78,2.25,Casey's general store #2521 / adair,VODKA,1032230.0,DIAGEO AMERICAS
3,2018-09-24,ADAIR,50002,ADAIR,36304,Hawkeye Vodka,375,1.86,2.79,24,66.96,9.0,Kum & go #76 / adair,VODKA,1032230.0,LUXCO-ST LOUIS
4,2018-09-24,ADAIR,50002,ADAIR,26826,Jack Daniels Old #7 Black Lbl,750,15.57,23.36,12,280.32,9.0,Kum & go #76 / adair,WHISKY,1012400.0,BROWN-FORMAN CORPORATION


In [65]:
iowa.to_csv('iowa_clean.csv')