# Import packages

In [1]:
import os
import pandas as pd
import difflib

# Change directory to one level up

In [2]:
os.getcwd()

'C:\\Users\\kaaym\\Documents\\mike\\projects\\poultry_meat_industry_etl\\notebooks'

In [3]:
os.chdir('../')

In [4]:
os.getcwd()

'C:\\Users\\kaaym\\Documents\\mike\\projects\\poultry_meat_industry_etl'

# Read file

In [5]:
input_df = pd.read_excel('data/MPI_Directory_by_Establishment_Name_1.xls')

In [7]:
input_df.head()

Unnamed: 0,EstNumber,Company,Street,City,State,Zip,Phone,GrantDate,Activities,DBAs
0,M13561 + P13561,165368 C. Corporation,"5617 Hoover Street, Suite A",Houston,TX,77092,(713) 263-1944,06/30/2014,Meat Processing,Long Phung Food Products
1,M53869 + P53869,"1845 Smoked Meat Company, LLC",1555 North Business 35,New Braunfels,TX,78130,(210) 378-1911,10/25/2021,"Meat Processing, Poultry Processing",
2,M47086,1923 Chili,2712 Wilmington Rd,New Castle,PA,16105,(724) 657-7151,12/12/2019,Meat Processing,
3,M7067 + P7067,"1st Original Texas Chili Company, Inc.",3313 N. Jones Street,Fort Worth,TX,76106,(817) 626-0983,11/28/2003,Meat Processing,T.C. Foods; Texas Chili
4,M48294 + P48294,210 Foods LLC,53 Kinder Dr.,San Antonio,TX,78212,(210) 277-0732,11/16/2017,"Meat Processing, Poultry Processing",210 Foods


In [8]:
input_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6649 entries, 0 to 6648
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   EstNumber   6649 non-null   object
 1   Company     6649 non-null   object
 2   Street      6649 non-null   object
 3   City        6649 non-null   object
 4   State       6649 non-null   object
 5   Zip         6649 non-null   int64 
 6   Phone       6649 non-null   object
 7   GrantDate   6602 non-null   object
 8   Activities  6637 non-null   object
 9   DBAs        2047 non-null   object
dtypes: int64(1), object(9)
memory usage: 519.6+ KB


# Issues identified about the data
- Some companies have multiple EstNumbers
- Some companies have the same phone numbers (Phone) but different or slight variations in name spelling
- Some company names have comma(,) others dont. **No consistency**
- Some companies have multiple Activities
- Some companies have Zip codes with less than 5 characters. **No consistency**
- Some companies have multiple DBAs


# Normalized Database
The objective of this project is to download data, clean data, create a normalized database and dump clean data in database. As part of the normalization, individual tables need to be created and companies with multiple **EstNumbers**, multiple **Activities** and **DBAs** must have individual entries for each **EstNumbers**, **Activities**, and **DBAs**. More light will be thrown on this when we design the model for the database.

# Cleaning data

In [9]:
def remove_comma(text):
    if ',' in text:
        text = ' '.join(text.split(','))
        
    return text.lower()

In [10]:
def sep_name_dba_variants(text):
    # remove dba variants in company names
    identified = False
    if ' dba' in text.lower():
        identified = True
        names = text.split(' dba')
        
    if identified:
        return [name.strip() for name in names]
    else:
        return [text]
            

In [11]:
def remove_reg_type(text):
    # remove registration type from company names       
    hld = text.split(' ')
    for expr in ['llc', 'inc', 'limited', 'ltd', 'incorporated', 'corporation', 'corp', 'co', 'company']:
        if expr in hld:
            hld.remove(expr)
    return ' '.join(hld)
            

In [12]:
def remove_puncts(text):
    hld = text
    change = False
    for punct in ['.',':',';','(',')','"','/',',']:
        change = True
        if punct in hld:
            hld = hld.replace(punct,'').strip()
    if change:
        return hld
    else:
        text

In [13]:
def rename_companies_with_common_phone(df):
    raw_df = df.copy()
    phone_index = raw_df.Phone.value_counts(sort=True, ascending=False)[raw_df.Phone.value_counts() > 1].index
    if len(phone_index) > 1:
        rename_dict = {raw_df[raw_df.Phone == num].Company.value_counts().index[0]: \
                       raw_df[raw_df.Phone == num].index for num in phone_index}

    for k,v in rename_dict.items():
        for idx in v:
            raw_df.loc[idx, 'Company'] = k
    
    return raw_df

In [14]:
def homoginize_names(text, name_list):
    matches = difflib.get_close_matches(text, name_list, cutoff = 0.7, n=2)
    if len(matches) > 1:
        if text in matches[1]:
            return matches[1]
    return text

In [15]:
def separate_compound_names(text, joiner='/'):
    if joiner in text:
        return text.split(joiner)
    else:
        return text

In [16]:
def update_dba(name, index, df):
    if type(df.DBAs.loc[index]) != float:
        df.loc[index, 'DBAs'] = df.DBAs.loc[index] +'; ' + name

    else:
        df.loc[index, 'DBAs'] = name

In [17]:
def separate_loc_names(text):
    for expr in [' - ', ' -- ']:
        if expr in text:
            return text.split(expr)[0].strip()
    return text

In [18]:
def clean_zip(text):
    return str(text).zfill(5)

In [19]:
def separate_est_num(dba, est_lst):
    hld = dba.split(' ')
    for num in est_lst:
        if num in hld:
            hld.remove(num)
    if len(hld) != 0:
        return ' '.join(hld)
    return None

In [20]:
def clean_company_names(idx, row, raw_df):
    
    # Separating compound company names
    comp_name = separate_compound_names(row.Company)
    if type(comp_name) == list:
        comp_name = comp_name[0]
        update_dba(comp_name, idx, raw_df)
    comp_name = comp_name.lower()

    # remove undesiredd punctuations
    raw_df.loc[idx, 'Company'] = remove_puncts(comp_name)

    # Separate dba variants in names
    names = sep_name_dba_variants(raw_df.loc[idx, 'Company'])
    # Update company name
    raw_df.loc[idx, 'Company'] = names[0]
    # Update DBAs
    if len(names) > 2:
        update_dba(names[0], idx, raw_df)

    # remove registration type from company names       
    raw_df.loc[idx, 'Company'] = remove_reg_type(raw_df.loc[idx, 'Company'])

    # remove location from company names
    raw_df.loc[idx, 'Company'] = separate_loc_names(raw_df.loc[idx, 'Company'])

In [21]:
def extract(row, con, col, hld_dict):
    if type(row[col]) != float:
        if con in row[col]:
            hld_dict[col].append(row[col].split(con))
        else:
            hld_dict[col].append([row[col]])
    else:
         hld_dict[col].append(list())

In [22]:
def extract_est_num(row, est_dict):
    if '+' in row.EstNumber:
        est_list = [est.strip() for est in row.EstNumber.split('+')]
        for x in range(len(est_list)):
            est_dict['EstNumber'].append(est_list[x])
    else:
        est_dict['EstNumber'].append(row.EstNumber)

In [23]:
def extract_activities(row):
    if '+' in row.EstNumber:
        est_list = [est.strip() for est in r.EstNumber.split('+')]
        for x in range(len(est_list)):
            est_dict['EstNumber'].append(est_list[x])
    else:
        est_dict['EstNumber'].append(row.EstNumber)

In [24]:
def clean_dba(i, dba_dict):    
    # Separate compound names
    hld_ = dba_dict['DBAs'][i]
    hld_i = dba_dict['DBAs'][i].copy()
    if len(hld_) > 0:
        for dba in hld_:
            hld_name = separate_compound_names(dba)
            if type(hld_name) == list:
                hld_i.remove(dba)
                hld_i.extend(hld_name)
                dba_dict['DBAs'][i] = hld_i
                
    dba_dict['DBAs'][i] = [dba.strip().lower() for dba in dba_dict['DBAs'][i]]
    
    # Remove undesired punctuations
    dba_dict['DBAs'][i] = [remove_puncts(dba) for dba in dba_dict['DBAs'][i]]
    
    # Remove registration type
    dba_dict['DBAs'][i] = [remove_reg_type(dba) for dba in dba_dict['DBAs'][i]]
    
    # Remove location from names
    dba_dict['DBAs'][i] = [separate_loc_names(dba) for dba in dba_dict['DBAs'][i]]
    
    # Remove estsblishment numbers from dbas
    est_lst=[i.strip().lower() for i in input_df.EstNumber[i].split('+')]
    dba_dict['DBAs'][i] = [separate_est_num(dba, est_lst).strip() for dba in dba_dict['DBAs'][i] if separate_est_num(dba, est_lst) != None]

In [25]:
def homogenize_dict(i, hld_dict, name_list, col):
    hld_dict[col][i] = [homoginize_names(dba, name_list) for dba in hld_dict[col][i]]

In [26]:
def homogenize_company_names(index, raw_df, company_list):
    matched_name = homoginize_names(raw_df.loc[index, 'Company'], company_list)
    if raw_df.loc[index, 'Company'] != matched_name:
        raw_df.loc[index, 'Company'] = matched_name
    else:
        raw_df.loc[index, 'Company'] = matched_name

In [27]:
def creat_val_df(df, col_slice, val_list, val_col):
    hld_dict = {col:[] for col in col_slice}
    update_col = col_slice.copy()
    update_col.remove(val_col)
    
    for idx in df.index:
        for i in range(len(val_list[idx])):
            for col in update_col:
                hld_dict[col].append(df[col][idx])
            
            hld_dict[val_col].append(val_list[idx][i].strip())
            
    return pd.DataFrame(hld_dict)
        

In [28]:
def create_business_df(df, unique_cols):
    vals= df[unique_cols].value_counts(sort=False).index
    cols = df[unique_cols].value_counts(sort=False).index.names
    data = [((tup[0]),(tup[1]),(tup[2]),(tup[3]),(tup[4])) for tup in vals]

    return pd.DataFrame(data=data, columns=cols)

In [29]:
def clean_data(df, unique_cols=['Company', 'Street', 'City', 'State', 'Zip']):
    raw_df = df.copy()
    
    dba_col_slice = unique_cols.copy()
    dba_col_slice.append('DBAs')
    dba_dict = {'DBAs':[]}
    
    est_col_slice = unique_cols.copy()
    est_col_slice.append('EstNumber')
    est_col_slice.append('GrantDate')
    est_dict = {'EstNumber':[]}
    
    act_col_slice = unique_cols.copy()
    act_col_slice.append('Activities')
    act_dict = {'Activities':[]}
    
    phone_col_slice = unique_cols.copy()
    phone_col_slice.append('Phone')
    
    print('Separating compound company names...\nRemoving undesiredd punctuations...', \
          '\nSeparating dba variants in names...\nRemoving registration type from company names...', \
          '\nExtracting DBAs...\n')
    for idx,row in df.iterrows():
        
        # clean company names
        clean_company_names(idx, row, raw_df)
        
        # extract dbas 
        extract(row, con=';', col='DBAs', hld_dict=dba_dict)
            
        # extract Activities 
        extract(row, con=',', col='Activities', hld_dict=act_dict)
        
        # Clean Zip
        raw_df.loc[idx, 'Zip'] = clean_zip(raw_df.loc[idx, 'Zip'])
        
        # Extract Establishment number
        extract(row, con='+', col='EstNumber', hld_dict=est_dict)
    
    
    for i in range(len(dba_dict['DBAs'])):
        # clean dba
        clean_dba(i, dba_dict)
    
    name_list = set([act for _ in act_dict['Activities'] for act in _])
    company_list = set([dba_ for dba in dba_dict['DBAs'] for dba_ in dba]).union(set(raw_df.Company.values))
    for index in raw_df.index:
        #homogenize dbas
        homogenize_dict(i=index, hld_dict=dba_dict, name_list=company_list, col='DBAs')
        
        #homogenize activities
        homogenize_dict(i=index, hld_dict=act_dict, name_list=company_list, col='Activities')
    
        # homogenize names
        homogenize_company_names(index=index, raw_df=raw_df, company_list=company_list)
        
        if index %100 == 0 or index == 0:
            print('Homogenizing company names and DBAs...\nIndex: ',index,'/',len(raw_df), \
                  '\nCompany: ',raw_df.loc[index, 'Company'], '\nDBAs: ',dba_dict['DBAs'][index],'\n')
    
    
    # create df of dba_dict
    dba_df = creat_val_df(df=raw_df, col_slice=dba_col_slice, val_list=dba_dict['DBAs'], val_col='DBAs')
    
    # create df of est_dict
    est_df = creat_val_df(df=raw_df, col_slice=est_col_slice, val_list=est_dict['EstNumber'], val_col='EstNumber')
    
    # create df of act_dict
    act_df = creat_val_df(df=raw_df, col_slice=act_col_slice, val_list=act_dict['Activities'], val_col='Activities')
    
    # create df for business
    business_df = create_business_df(df=raw_df, unique_cols=unique_cols)
    
    # create df for phone numbers
    phone_df = business_df.merge(raw_df[phone_col_slice], on=unique_cols)
    
    return dba_df, est_df, act_df, business_df, phone_df
        

In [30]:
out_dfs = clean_data(input_df)

Separating compound company names...
Removing undesiredd punctuations... 
Separating dba variants in names...
Removing registration type from company names... 
Extracting DBAs...

Homogenizing company names and DBAs...
Index:  0 / 6649 
Company:  165368 c 
DBAs:  ['long phung food products'] 

Homogenizing company names and DBAs...
Index:  100 / 6649 
Company:  abe manufacturing 
DBAs:  [] 

Homogenizing company names and DBAs...
Index:  200 / 6649 
Company:  aliyans global 
DBAs:  [] 

Homogenizing company names and DBAs...
Index:  300 / 6649 
Company:  americold logistics 
DBAs:  [] 

Homogenizing company names and DBAs...
Index:  400 / 6649 
Company:  andy's meats 
DBAs:  [] 

Homogenizing company names and DBAs...
Index:  500 / 6649 
Company:  b&a gourment food 
DBAs:  ["abuela's gourmet kitchen", 'america food services', 'big ranch beef', 'charlie steak house', 'corte argentino', 'goya', 'la toca blanca', 'mambo', 'menu art', 'natura foods', "nature's cut", 'tradiciones andinas', 

Homogenizing company names and DBAs...
Index:  5500 / 6649 
Company:  southwest processor 
DBAs:  ['kot'] 

Homogenizing company names and DBAs...
Index:  5600 / 6649 
Company:  stormberg foods 
DBAs:  [] 

Homogenizing company names and DBAs...
Index:  5700 / 6649 
Company:  sysco western minnesota 
DBAs:  ["appert's foodservice", 'buckhead meats of minnesota'] 

Homogenizing company names and DBAs...
Index:  5800 / 6649 
Company:  texas tech university gordon w davis meat science laboratory 
DBAs:  [] 

Homogenizing company names and DBAs...
Index:  5900 / 6649 
Company:  thibodeaux's cajun food 
DBAs:  [] 

Homogenizing company names and DBAs...
Index:  6000 / 6649 
Company:  truitt bros 
DBAs:  [] 

Homogenizing company names and DBAs...
Index:  6100 / 6649 
Company:  tyson prepared foods 
DBAs:  ['tyson deli', 'tyson foodsinc', 'tyson sales and distribution'] 

Homogenizing company names and DBAs...
Index:  6200 / 6649 
Company:  united states cold storage lp 
DBAs:  ['united stat

In [39]:
out_dfs[4]

Unnamed: 0,Company,Street,City,State,Zip,Phone
0,165368 c,"5617 Hoover Street, Suite A",Houston,TX,77092,(713) 263-1944
1,1845 smoked meat,1555 North Business 35,New Braunfels,TX,78130,(210) 378-1911
2,1923 chili,2712 Wilmington Rd,New Castle,PA,16105,(724) 657-7151
3,1st original texas chili,3313 N. Jones Street,Fort Worth,TX,76106,(817) 626-0983
4,210 foods,53 Kinder Dr.,San Antonio,TX,78212,(210) 277-0732
...,...,...,...,...,...,...
6644,zummo meat,3705 St. James Blvd.,Beaumont,TX,77705,(409) 842-1810
6645,zuppardi's frozen foods,214 Main Street,West Haven,CT,06516,(203) 980-0392
6646,zwanenberg food group usa,3640 Muddy Creek Road,Cincinnati,OH,45238,(513) 682-6000
6647,zweigle's,651 Plymouth Ave North,Rochester,NY,14608,(585) 546-1740


In [32]:
for val in input_df.Activities.values:
    print(val,'\n')

Meat Processing 

Meat Processing, Poultry Processing 

Meat Processing 

Meat Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Certification - Export, Identification - Meat, Identification - Poultry 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Meat Slaughter, Voluntary Processing - Meat, Voluntary Slaughter - Meat 

Meat Processing, Meat Slaughter 

Meat Processing 

Meat Processing, Meat Slaughter, Poultry Processing, Voluntary Processing - Exotic, Voluntary Processing - Meat, Voluntary Slaughter - Exotic, Voluntary Slaughter - Meat 

Meat Processing, Poultry Processing 

Animal Foods Inspection, Meat Processing, Poultry Processing, Voluntary Processing - Meat 

Meat Processing, Meat Slaughter 

Meat Processing, Meat Slaughter 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing, Voluntary Processing - Meat 

Meat Processing, Poultry Processing 

Certification - Export 

M

Meat Processing, Meat Slaughter 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Meat Slaughter 

Certification - Trichinae, Meat Processing, Meat Slaughter, Poultry Processing, Poultry Slaughter, Voluntary Processing - Meat, Voluntary Processing - Poultry, Voluntary Slaughter - Meat, Voluntary Sl 

Meat Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Certification - Export, Food Inspection, Meat Processing, Poultry Processing, Voluntary Processing - Poultry 

Meat Processing, Poultry Processing 

Certification - Export, Meat Processing, Poultry Processing 

Certification - Cysticercus, Certification - Trichinae, Identification - Meat, Identification - Poultry, Off-Premise Freezing - Meat, Off-Premise Freezing - Poultry 

Certification - Export 

Meat Processing, Poultry Processing 

Meat Processing 

Meat P


Meat Processing, Poultry Processing 

Meat Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Certification - Export, Identification - Meat, Identification - Poultry 

Meat Processing, Poultry Processing 

Meat Processing 

Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Meat Slaughter, Poultry Processing, Poultry Slaughter, Voluntary Processing - Exotic, Voluntary Processing - Meat, Voluntary Slaughter - Exotic, Voluntary Slaughter - Meat 

Meat Processing, Meat Slaughter, Voluntary Processing - Meat, Voluntary Slaughter - Meat 

Identification - Meat, Meat Processing, Voluntary Processing - Exotic, Voluntary Processing - Meat, Voluntary Processing - Poultry, Voluntary Processing - Rabbit 

Meat Processing 

Meat Processing, Poultry Processing 

Meat Processing 

Meat P

Meat Processing, Meat Slaughter, Poultry Processing, Poultry Slaughter 

Meat Processing, Poultry Processing, Voluntary Processing - Meat 

Certification - Cysticercus, Certification - Export, Certification - Trichinae, Identification - Meat, Identification - Poultry, Off-Premise Freezing - Meat, Off-Premise Freezing - Poultry 

Meat Processing, Poultry Processing 

Identification - Meat, Identification - Poultry 

Meat Processing, Poultry Processing 

Certification - Export, Identification - Meat, Identification - Poultry 

Meat Processing, Poultry Processing 

Certification - Export, Identification - Meat, Identification - Poultry, Identification - Siluriformes 

Imported Product 

Certification - Export, Certification - Trichinae, Identification - Meat, Identification - Poultry 

Certification - Export, Certification - Trichinae, Identification - Meat, Identification - Poultry, Off-Premise Freezing - Egg Products, Off-Premise Freezing - Meat, Off-Premise Freezing - Poultry 

Meat Pr

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing 

Meat Processing, Poultry Processing 

Food Inspection, Meat Slaughter 

Meat Processing, Poultry Processing 

Poultry Processing, Poultry Slaughter 

Meat Processing 

Meat Processing 

Meat Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing 

Meat Processing, Poultry Processing 

Meat Processing 

Certification - Export, Identification - Meat, Identification - Poultry 

Meat Processing, Meat Slaughter, Poultry Processing 

Meat Processing, Meat Slaughter 

Meat Processing, Meat Slaughter, Poultry Processing, Poultry Slaughter 

Meat Processing 

Meat Processing 

Meat Processing, Poultry Processing 

Meat Processing

Meat Processing 

Meat Processing, Meat Slaughter 

Meat Processing, Meat Slaughter 

Egg Product 

Meat Processing, Meat Slaughter 

Meat Processing, Poultry Processing, Voluntary Processing - Meat 

Meat Processing, Poultry Processing 

Meat Processing, Meat Slaughter, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Meat Slaughter, Voluntary Processing - Meat, Voluntary Slaughter - Meat 

Meat Processing, Poultry Processing 

Meat Processing, Meat Slaughter 

Certification - Export, Meat Processing, Meat Slaughter, Voluntary Processing - Meat, Voluntary Slaughter - Meat 

Meat Processing, Meat Slaughter 

Meat Processing, Poultry Processing, Voluntary Processing - Meat, Voluntary Slaughter - Meat 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing, Voluntary Processing - Meat, Voluntary Slaughter - Meat 

Meat Processing, Poultry Processing 

Certification - Export, Egg Product 

Meat Processing

Identification - Meat, Identification - Poultry 

Certification - Cysticercus, Certification - Export, Certification - Trichinae, Identification - Meat, Identification - Poultry, Off-Premise Freezing - Meat, Off-Premise Freezing - Poultry 

Meat Processing 

Meat Processing, Meat Slaughter 

Meat Processing 

Certification - Export, Identification - Meat, Identification - Poultry 

Meat Processing, Meat Slaughter, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Meat Slaughter, Poultry Processing, Poultry Slaughter 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing, Voluntary Processing - Exotic, Voluntary Processing - Meat, Voluntary Processing - Rabbit 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Certification - Export, Meat Processing 

Certification


Meat Processing, Poultry Processing 

Meat Processing 

Meat Processing, Meat Slaughter, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Certification - Export, Meat Processing, Poultry Processing 

Certification - Export, Meat Processing, Poultry Processing 

Meat Processing, Meat Slaughter, Voluntary Processing - Exotic, Voluntary Processing - Meat, Voluntary Slaughter - Exotic, Voluntary Slaughter - Meat 

Meat Processing, Poultry Processing 

Imported Product 

Meat Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Meat Slaughter 

Meat Processing, Poultry Processing, Voluntary Slaughter - Meat 

Meat Processing, Poultry Processing, Poultry Slaughter, Voluntary Processin

Certification - Export, Identification - Meat, Identification - Poultry, Off-Premise Freezing - Meat, Off-Premise Freezing - Poultry 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Voluntary Processing - Meat 

Meat Processing, Poultry Processing 

Imported Product 

Certification - Export, Certification - Trichinae, Identification - Meat, Identification - Poultry, Off-Premise Freezing - Meat, Off-Premise Freezing - Poultry 

Certification - Export, Certification - Trichinae, Identification - Meat, Identification - Poultry, Off-Premise Freezing - Meat, Off-Premise Freezing - Poultry 

Meat Processing 

Meat Processing, Poultry Processing 

Certification - Export, Identification - Meat, Identification - Poultry, Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Poultry Slaughter 

Meat Processing 

Meat Processing 

Imported Product 

Meat Processing, Poultry Processing 

Certification - Export, Identification - Me

Meat Processing, Poultry Processing 

Meat Processing, Meat Slaughter, Poultry Processing 

Certification - Export, Identification - Meat, Identification - Poultry 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Meat Slaughter, Poultry Processing 

Meat Processing 

Meat Processing 

Meat Processing, Meat Slaughter 

Voluntary Processing - Meat, Voluntary Slaughter - Meat 

Egg Product 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Meat Slaughter, Poultry Processing, Poultry Slaughter 

Meat Processing 

Meat Processing, Meat Slaughter, Poultry Processing, Voluntary Processing - Meat, Voluntary Slaughter - Meat 

Meat Processing, Poultry Processing, Poultry Slaughter 

Meat Processing, Poultry Processing 


Meat Processing, Poultry Processing 

Meat Processing 

Meat Processing, Meat Slaughter, Voluntary Processing - Meat, Voluntary Slaughter - Meat 

Imported Product 

Meat Processing, Meat Slaughter, Voluntary Processing - Meat, Voluntary Slaughter - Meat 

Meat Processing, Poultry Processing, Voluntary Processing - Meat, Voluntary Slaughter - Meat 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Certification - Export, Meat Processing 

Certification - Export, Meat Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Certification - Export, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing, Voluntary Processing - Meat 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing,

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Certification - Export, Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Meat Slaughter 

Food Inspection, Identification - Meat, Identification - Poultry, Meat Processing, Poultry Processing 

Meat Processing, Meat Slaughter 

Meat Processing, Poultry Processing 

Certification - Export, Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Meat Slaughter, Voluntary Processing - Meat, Voluntary Slaughter - Meat 

Certification - Export, Food Inspection, Identification - Meat, Identification - Poultry 

Meat Processing, Voluntary Processing - Meat 

Poultry Processing, Poultry Slaughter, Voluntary Processing - Poultry, Voluntary Slaughter - Poultry 

Certification - Export, Meat P


Certification - Export, Meat Processing, Poultry Processing 

Certification - Export 

Meat Processing 

Certification - Export, Identification - Meat, Identification - Poultry 

Certification - Export, Meat Processing, Poultry Processing, Voluntary Processing - Meat 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing 

Meat Processing, Poultry Processing 

Meat Processing 

Poultry Processing, Poultry Slaughter 

Meat Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Poultry Processing, Poultry Slaughter, Voluntary Processing - Poultry, Voluntary Slaughter - Poultry 

Meat Processing, Meat Slaughter, Poultry Processing, Volun

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing 

Certification - Export 

Meat Processing, Meat Slaughter 

Meat Processing, Meat Slaughter 

Meat Processing, Meat Slaughter, Poultry Processing, Voluntary Processing - Meat, Voluntary Slaughter - Meat 

Meat Processing, Meat Slaughter, Voluntary Processing - Exotic, Voluntary Processing - Meat, Voluntary Processing - Poultry, Voluntary Processing - Rabbit, Voluntary Slaughter - Exotic, Voluntary Slau 

Poultry Processing 

Meat Processing, Meat Slaughter, Poultry Processing 

Certification - Export, Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing, Voluntary Processing - Exotic, Voluntary Processing - Meat 

Meat Processing, Poultry Processing 

Meat Processing, Meat Slaughter 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing 

Meat Processing, Poultry Processing, Voluntary Processing - Meat, Voluntary Processing - Rabbit 

Certification - Ex

In [33]:
ph = input_df.Phone.value_counts(sort=True, ascending=False)[input_df.Phone.value_counts() > 1]
assert len(set(input_df.Phone.unique())) - len(set(ph.index)) + ph.sum() == len(input_df)

In [34]:
len(input_df.Company.unique())

5751

In [35]:
len(set(input_df.Company.apply(lambda x:x.lower())))

5732