In [147]:
# import custom_functions as cf
import pandas as pd
import numpy as np
import os

import itertools as it

from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report,accuracy_score

In [144]:
def list_files(files):
    """Accepts a list and returns a dictionary of each item in list with a numbered index (key) starting at 1"""

    dir_list = {0:'Exit'}
    file_count = 0
    for file in files:
        file_count +=1
        dir_list[file_count] = file  # assign a number to each item in the list starting with 1 
    return dir_list  # returns the dictionary of items in the list

def print_dict(dict_items):
    """Prints the (key : value) pairing in a dictionary"""

    for key, value in dict_items.items():
        print(key, ":", value)    

def check_dict(dict_to_check, choice):
    """Checks if the input choice is a key in the provided dictionary
        and returns 'valid' if true, 'invalid' if false"""

    if choice in dict_to_check:
        return "valid"
    else:
        return "invalid"

def error_message(invalid_choice):
    print(f"Your input of '{invalid_choice}' is not a valid choice. Try again!")

def validate_int(user_input):
    """Checks if user input is an int or can be converted to an int. If true returns 'valid', if false returns 'invalid'"""

    try:
        int(float(user_input))
        return "valid"
    except ValueError:
        return "invalid"

def target_test_keys(keys): 
    """Accepts a list of lists and returns a list of the first value from each list"""
    
    spec_keys = []
    for i in range(len(keys)):
        spec_keys.append(keys[i][0])
    return spec_keys

def get_target_columns(col_list, spec_keys):
    target_columns = []
    for i in range(len(col_list)):
        if col_list[i] in spec_keys:
            target_columns.append(col_list[i])
    return target_columns

def create_spec_dict(spec_arr):
    new_dict = {}
    new_list = []
    for i in range(len(spec_arr)):
        new_list = [spec_arr[i][1],spec_arr[i][2]]
        new_dict[spec_arr[i][0]] = new_list
    return new_dict

def combine_limits(targ_col,prod_spec_dict):  # This is making an assumption that the lower limits are listed before upper limits in specifications file
    prod_ll = []
    prod_ul = []
    prod_ll_dict = {}
    prod_ul_dict = {}
    for i in range(len(targ_col)):
        key = targ_col[i]
        prod_ll.append(prod_spec_dict[key][0])
        prod_ul.append(prod_spec_dict[key][1])
        
    prod_ll_dict['lower_limits'] = prod_ll  # create dict of lower limits
    prod_ul_dict['upper_limits'] = prod_ul  # create dict of upper limits
    return {**prod_ll_dict, **prod_ul_dict}  # return dict of both dictionaries

### Display file options to user

In [27]:
product_options = sorted(list(('BR0010P850','YL0010C229')))
user_options = list_files(product_options)  # returned dict of files in directory
print_dict(user_options)

0 : Exit
1 : BR0010P850
2 : YL0010C229


### Allow user to choose a product displayed above

In [76]:
user_choice = input('Enter the number next to the file to open. (0 to exit): ')
validate_choice = validate_int(user_choice) # confirm user input is an int or can be converted to an int 
if validate_choice == 'valid':
    user_choice = int(float(user_choice))
    validate_choice = check_dict(user_options, user_choice) # check to see if user input is a key in the dictionary
while validate_choice == 'invalid':
    error_message(user_choice)
    user_choice = input('Enter the number next to the file to open. (0 to exit): ')
    validate_choice = validate_int(user_choice)
    if validate_choice == 'valid':
        user_choice = int(float(user_choice))
        validate_choice = check_dict(user_options, user_choice) # check to see if user input is a key in the dictionary

selected_product = user_options[user_choice]
if selected_product == 'Exit':
    print(f'No product selected')
else:
    print(f'You selected {selected_product}')

You selected BR0010P850


### Set file paths

In [119]:
if selected_product == 'Exit':
    print('No product was selected from the cell above. Please select a product in the cell above to continue.')
else:
    path = 'data/'
    file_ext = ".xlsx"
    prod_ingredient_file = path + selected_product + "_ingredients" + file_ext
    prod_color_results = path + selected_product + "_color_results" + file_ext
    approved_ingredients = path + "approved_ingredients" + file_ext  # all products are in this file
    formulas = path + "formulas" + file_ext  # all products are in this file
    specifications = path + "specifications" + file_ext  # all product specs are in this file

data/BR0010P850_ingredients.xlsx
data/specifications.xlsx


### Read files into pandas dataframe

In [131]:
ingredient_df = pd.read_excel(prod_ingredient_file)  # ingriendients used for each batch of selected product
color_results_df= pd.read_excel(prod_color_results)  # color results for each batch of selected product
approv_ingred_df = pd.read_excel(approved_ingredients).sort_values(by=['Material','rm_code'])
approv_ingred_df = approv_ingred_df[approv_ingred_df['Material']== selected_product + '-RMIX']  # filter df by selected product
formula_df = pd.read_excel(formulas).sort_values(by=['Material','Ingredient'])
formula_df = formula_df[formula_df['Material']== selected_product + '-RMIX']  # filter df by selected product
product_specs_df = pd.read_excel(specifications)
product_specs_df = product_specs_df[product_specs_df['product'] == selected_product].set_index('product')

### Investigate ingredient df for null values and data types

In [93]:
print(ingredient_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 230 entries, 0 to 229
Data columns (total 11 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   Date Issued                 66 non-null     datetime64[ns]
 1   Material                    230 non-null    object        
 2   Order                       66 non-null     float64       
 3   RMIX Batch                  230 non-null    int64         
 4   Antimony Oxide              230 non-null    object        
 5   Manganic Oxide              230 non-null    object        
 6   Titanium Dioxide - Anatase  230 non-null    object        
 7   Kiln                        66 non-null     float64       
 8   Hearth                      0 non-null      float64       
 9   Cycle                       0 non-null      float64       
 10  Temp                        0 non-null      float64       
dtypes: datetime64[ns](1), float64(5), int64(1), object(4)
memo

### Modify ingredient df as needed

In [99]:
ingredient_df_mod =  ingredient_df.drop(columns=['Date Issued', 'Order',
       'Kiln', 'Hearth', 'Cycle', 'Temp'])
ingredient_df_mod['RMIX Batch'] = ingredient_df_mod['RMIX Batch'].astype(str)  # RMIX Batch looks like an int but should be treated like a string

### Investigate color results df for null values and data types

In [102]:
print(color_results_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 221 entries, 0 to 220
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   RMIX Batch  221 non-null    int64  
 1   Fin Batch   221 non-null    int64  
 2   Equip       221 non-null    object 
 3   MDL         221 non-null    float64
 4   MDA         221 non-null    float64
 5   MDB         221 non-null    float64
 6   TDL         221 non-null    float64
 7   TDA         221 non-null    float64
 8   TDB         221 non-null    float64
 9   STR         221 non-null    int64  
dtypes: float64(6), int64(3), object(1)
memory usage: 17.4+ KB
None


### Modify color results df as needed

In [103]:
# color_results_df_mod =  color_results_df.drop(columns=[''])
color_results_df_mod = color_results_df
color_results_df_mod[['RMIX Batch','Fin Batch']] = color_results_df_mod[['RMIX Batch','Fin Batch']].astype(str)  # RMIX Batch & Fin Batch should be treated like a string

### Merge ingredient df and color df

In [106]:
df = ingredient_df_mod.merge(color_results_df_mod,how='left',left_on='RMIX Batch',right_on='RMIX Batch')

In [107]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 231 entries, 0 to 230
Data columns (total 14 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Material                    231 non-null    object 
 1   RMIX Batch                  231 non-null    object 
 2   Antimony Oxide              231 non-null    object 
 3   Manganic Oxide              231 non-null    object 
 4   Titanium Dioxide - Anatase  231 non-null    object 
 5   Fin Batch                   203 non-null    object 
 6   Equip                       203 non-null    object 
 7   MDL                         203 non-null    float64
 8   MDA                         203 non-null    float64
 9   MDB                         203 non-null    float64
 10  TDL                         203 non-null    float64
 11  TDA                         203 non-null    float64
 12  TDB                         203 non-null    float64
 13  STR                         203 non

### Only interested in rows that have both ingredients and color values

In [108]:
df = df.dropna()

In [122]:
df

Unnamed: 0,Material,RMIX Batch,Antimony Oxide,Manganic Oxide,Titanium Dioxide - Anatase,Fin Batch,Equip,MDL,MDA,MDB,TDL,TDA,TDB,STR
1,BR0010P850-RMIX,23633,RM0615,RM4620,RM6729,23799,MX003,-1.0,-0.4,-0.9,-0.7,-0.4,-1.0,104.0
2,BR0010P850-RMIX,23633,RM0615,RM4620,RM6729,23799,MX003,-1.0,-0.4,-0.9,-0.7,-0.4,-1.0,104.0
6,BR0010P850-RMIX,54216,RM0615,RM4620,RM6718,54250,MX003,0.6,0.1,0.0,-0.2,0.2,0.3,102.0
7,BR0010P850-RMIX,54715,RM0615,RM4620,RM6718,54729,MX003,-1.4,-0.4,-0.7,-0.4,-0.4,-1.0,102.0
8,BR0010P850-RMIX,54716,RM0615,RM4620,RM6718,54730,MX003,-0.1,-0.2,-0.7,-0.2,-0.1,-0.6,101.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
215,BR0010P850-RMIX,40693,RM0615,RM4621,RM6713,40705,MX003,0.4,-0.1,-0.2,0.5,-0.2,-0.1,97.0
216,BR0010P850-RMIX,40694,RM0615,RM4621,RM6713,40706,MX003,0.1,-0.2,-0.2,0.6,-0.3,-0.6,95.0
217,BR0010P850-RMIX,41775,RM0615,RM4620,RM6713,41792,MX003,-0.3,0.0,-0.1,-0.1,-0.1,-0.2,100.0
219,BR0010P850-RMIX,41779,RM0615,RM4620,RM6713,41794,MX003,0.1,0.0,0.1,-0.1,0.0,0.2,101.0


### Arrange product specifications to compare against color results

In [152]:
prod_specs_arr = product_specs_df.values  # create numpy array from product specifications df
prod_target_keys = target_test_keys(prod_specs_arr)  # creates a list of product test keys that are in specifications file
target_columns = get_target_columns(df.columns, prod_target_keys)  # creates a list of column name values from the selected file that match values in prod_target_keys list
prod_spec_dict = create_spec_dict(prod_specs_arr)  # creates a dict of prod test keys {key = test key: value = [lower limit value, upper limit value]}
prod_limits_dict = combine_limits(target_columns,prod_spec_dict) # create dict of all product limits {key = upper_limits or lower_limits: value = [all test key limits]}


### Compare product color results versus product specifications

In [174]:
test_values = df[target_columns].to_numpy(dtype=float)  # numpy array of color results that have an associated product specification
lower_limits = np.array(prod_limits_dict['lower_limits'])
upper_limits = np.array(prod_limits_dict['upper_limits'])
is_below_ll = (np.less(test_values,lower_limits))  # array displays true if test value below lower spec limit
is_above_ul = (np.greater(test_values,upper_limits))  # array displays true if test value above upper spec limit
combined_arr = np.add(is_below_ll, is_above_ul)  # adds the two boolean arrays 
# create dataframe of the reverse boolean numpy array (using ~)
combined_df_in_spec = ~pd.DataFrame(data=combined_arr[0:,0:],
                        index=(df['Fin Batch'].values),  #  assuming "Lot" is a column name in selected product file
                        columns=target_columns)

print(combined_arr)

[[False False False ... False  True False]
 [False False False ... False  True False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]


In [175]:
combined_df_in_spec['is_in_spec'] = combined_df_in_spec.prod(1)  # create column in df to show if all test results for a batch are within all specifications
batch_in_spec_df = combined_df_in_spec['is_in_spec']
target_df = df.merge(batch_in_spec_df, how='left', left_on='Fin Batch', right_index=True)

In [176]:
target_df

Unnamed: 0,Material,RMIX Batch,Antimony Oxide,Manganic Oxide,Titanium Dioxide - Anatase,Fin Batch,Equip,MDL,MDA,MDB,TDL,TDA,TDB,STR,is_in_spec
1,BR0010P850-RMIX,23633,RM0615,RM4620,RM6729,23799,MX003,-1.0,-0.4,-0.9,-0.7,-0.4,-1.0,104.0,0
1,BR0010P850-RMIX,23633,RM0615,RM4620,RM6729,23799,MX003,-1.0,-0.4,-0.9,-0.7,-0.4,-1.0,104.0,0
2,BR0010P850-RMIX,23633,RM0615,RM4620,RM6729,23799,MX003,-1.0,-0.4,-0.9,-0.7,-0.4,-1.0,104.0,0
2,BR0010P850-RMIX,23633,RM0615,RM4620,RM6729,23799,MX003,-1.0,-0.4,-0.9,-0.7,-0.4,-1.0,104.0,0
6,BR0010P850-RMIX,54216,RM0615,RM4620,RM6718,54250,MX003,0.6,0.1,0.0,-0.2,0.2,0.3,102.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
215,BR0010P850-RMIX,40693,RM0615,RM4621,RM6713,40705,MX003,0.4,-0.1,-0.2,0.5,-0.2,-0.1,97.0,1
216,BR0010P850-RMIX,40694,RM0615,RM4621,RM6713,40706,MX003,0.1,-0.2,-0.2,0.6,-0.3,-0.6,95.0,1
217,BR0010P850-RMIX,41775,RM0615,RM4620,RM6713,41792,MX003,-0.3,0.0,-0.1,-0.1,-0.1,-0.2,100.0,1
219,BR0010P850-RMIX,41779,RM0615,RM4620,RM6713,41794,MX003,0.1,0.0,0.1,-0.1,0.0,0.2,101.0,1


In [None]:
total_df_rows = combined_df_in_spec.shape[0]
total_in_spec_rows = combined_df_in_spec['is_in_spec'].sum()

In [20]:

product_ingredients = formula_df['Ingredient'].tolist()
test_dict = {}
test_list = []
for i in range(len(product_ingredients)):
   ingred = product_ingredients[i]
   df_ingred = approv_ingred_df.loc[approv_ingred_df['ingredient_name'] == ingred]
   ingred_list = df_ingred['rm_code'].to_list()
   test_dict[ingred] = ingred_list
   test_list.append(test_dict[ingred])
   print(test_list)


for key, value in test_dict.items():
   print(key,value)
#    approv_ingred_df.iloc

# prod = os.path.splitext(selected_file)[0]  # separate filename from extension 
# df_prod_specs = df_all_specs.iloc[lambda x: x.index == prod]  # filter specifications by product name from the selected file

[['RM0614', 'RM0615']]
[['RM0614', 'RM0615'], ['RM4615', 'RM4620', 'RM4621', 'RM4623', 'RM4626']]
[['RM0614', 'RM0615'], ['RM4615', 'RM4620', 'RM4621', 'RM4623', 'RM4626'], ['RM6707', 'RM6713', 'RM6714', 'RM6718', 'RM6729']]
Antimony Oxide ['RM0614', 'RM0615']
Manganic Oxide ['RM4615', 'RM4620', 'RM4621', 'RM4623', 'RM4626']
Titanium Dioxide - Anatase ['RM6707', 'RM6713', 'RM6714', 'RM6718', 'RM6729']


In [24]:
all_combinations = list(it.product(*test_list))
print(all_combinations)

# aggregate batch ingredients and count ratio of grade 1?


[('RM0614', 'RM4615', 'RM6707'), ('RM0614', 'RM4615', 'RM6713'), ('RM0614', 'RM4615', 'RM6714'), ('RM0614', 'RM4615', 'RM6718'), ('RM0614', 'RM4615', 'RM6729'), ('RM0614', 'RM4620', 'RM6707'), ('RM0614', 'RM4620', 'RM6713'), ('RM0614', 'RM4620', 'RM6714'), ('RM0614', 'RM4620', 'RM6718'), ('RM0614', 'RM4620', 'RM6729'), ('RM0614', 'RM4621', 'RM6707'), ('RM0614', 'RM4621', 'RM6713'), ('RM0614', 'RM4621', 'RM6714'), ('RM0614', 'RM4621', 'RM6718'), ('RM0614', 'RM4621', 'RM6729'), ('RM0614', 'RM4623', 'RM6707'), ('RM0614', 'RM4623', 'RM6713'), ('RM0614', 'RM4623', 'RM6714'), ('RM0614', 'RM4623', 'RM6718'), ('RM0614', 'RM4623', 'RM6729'), ('RM0614', 'RM4626', 'RM6707'), ('RM0614', 'RM4626', 'RM6713'), ('RM0614', 'RM4626', 'RM6714'), ('RM0614', 'RM4626', 'RM6718'), ('RM0614', 'RM4626', 'RM6729'), ('RM0615', 'RM4615', 'RM6707'), ('RM0615', 'RM4615', 'RM6713'), ('RM0615', 'RM4615', 'RM6714'), ('RM0615', 'RM4615', 'RM6718'), ('RM0615', 'RM4615', 'RM6729'), ('RM0615', 'RM4620', 'RM6707'), ('RM061

In [25]:
# approv_ingred_df = pd.read_excel('data/approved_ingredients.xlsx').set_index('ingredient_name')

product_ingredients = formula_df['Ingredient'].tolist()
print(product_ingredients)

listA = ['a','b','c','d','e']
listB = ['c','d','e']
listC = ['f','g','h']


listA = (list(it.combinations(['Antimony Oxide', 'Manganic Oxide', 'Titanium Dioxide - Anatase'],3)))
listB = (list(it.combinations(['Antimony Oxide', 'Antimony Oxide', 'Titanium Dioxide - Anatase'],3)))
listC = (list(it.combinations(['Antimony Oxide', 'Manganic Oxide', 'Manganic Oxide'],3)))

print()

# xy = (zip(listA,listB))
# print(list(xy))
# # new_list = 

# convert lists to sets and test equality before adding to a unique list
# if set(product_ingredients) == for each comb in list(it.combinations(approved_list,3))

['Antimony Oxide', 'Manganic Oxide', 'Titanium Dioxide - Anatase']



In [4]:
print(ingredient_df.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 230 entries, 0 to 229
Data columns (total 11 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   Date Issued                 66 non-null     datetime64[ns]
 1   Plan Month                  22 non-null     object        
 2   Material                    230 non-null    object        
 3   Order                       66 non-null     float64       
 4   RMIX Batch                  230 non-null    int64         
 5   Antimony Oxide              230 non-null    object        
 6   Manganic Oxide              230 non-null    object        
 7   Titanium Dioxide - Anatase  230 non-null    object        
 8   Kiln                        66 non-null     float64       
 9   Hearth                      0 non-null      float64       
 10  Cycle                       0 non-null      float64       
dtypes: datetime64[ns](1), float64(4), int64(1), object(5)
memo

In [24]:
# ingredient_df[['RMIX Batch', 'Kiln', 'Hearth','Cycle']] = ingredient_df[['RMIX Batch', 'Kiln', 'Hearth','Cycle']].astype(str)

In [25]:
ing_df_wk['RMIX Batch'] = ing_df_wk['RMIX Batch'].astype(str)

In [26]:
ing_df_wk.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 230 entries, 0 to 229
Data columns (total 5 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   Material                    230 non-null    object
 1   RMIX Batch                  230 non-null    object
 2   Antimony Oxide              230 non-null    object
 3   Manganic Oxide              230 non-null    object
 4   Titanium Dioxide - Anatase  230 non-null    object
dtypes: object(5)
memory usage: 9.1+ KB


In [27]:
# print(color_results_df.info())

In [28]:
print(color_results_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 221 entries, 0 to 220
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   RMIX Batch  221 non-null    int64  
 1   Fin Batch   221 non-null    int64  
 2   Equip       221 non-null    object 
 3   MDL         221 non-null    float64
 4   MDA         221 non-null    float64
 5   MDB         221 non-null    float64
 6   TDL         221 non-null    float64
 7   TDA         221 non-null    float64
 8   TDB         221 non-null    float64
 9   STR         221 non-null    int64  
dtypes: float64(6), int64(3), object(1)
memory usage: 17.4+ KB
None


In [29]:
color_results_df[['RMIX Batch', 'Fin Batch']] = color_results_df[['RMIX Batch', 'Fin Batch']].astype(str)

In [30]:
color_df_wk = color_results_df

In [31]:
color_df_wk

Unnamed: 0,RMIX Batch,Fin Batch,Equip,MDL,MDA,MDB,TDL,TDA,TDB,STR
0,23633,23799,MX003,-1.0,-0.4,-0.9,-0.7,-0.4,-1.0,104
1,35324,35816,MX003,-0.2,-0.2,-0.5,-0.3,-0.4,-0.8,102
2,42908,53780,MX007,0.0,-0.1,-0.2,0.2,-0.1,-0.2,98
3,42909,53781,MX003,0.2,-0.2,-0.4,-0.1,-0.1,-0.3,100
4,42910,53782,MX003,0.4,0.0,-0.2,0.2,0.0,0.0,98
...,...,...,...,...,...,...,...,...,...,...
216,40693,40705,MX003,0.4,-0.1,-0.2,0.5,-0.2,-0.1,97
217,40694,40706,MX003,0.1,-0.2,-0.2,0.6,-0.3,-0.6,95
218,41775,41792,MX003,-0.3,0.0,-0.1,-0.1,-0.1,-0.2,100
219,41779,41794,MX003,0.1,0.0,0.1,-0.1,0.0,0.2,101


In [33]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 231 entries, 0 to 230
Data columns (total 14 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Material                    231 non-null    object 
 1   RMIX Batch                  231 non-null    object 
 2   Antimony Oxide              231 non-null    object 
 3   Manganic Oxide              231 non-null    object 
 4   Titanium Dioxide - Anatase  231 non-null    object 
 5   Fin Batch                   203 non-null    object 
 6   Equip                       203 non-null    object 
 7   MDL                         203 non-null    float64
 8   MDA                         203 non-null    float64
 9   MDB                         203 non-null    float64
 10  TDL                         203 non-null    float64
 11  TDA                         203 non-null    float64
 12  TDB                         203 non-null    float64
 13  STR                         203 non

In [35]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 203 entries, 1 to 220
Data columns (total 14 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Material                    203 non-null    object 
 1   RMIX Batch                  203 non-null    object 
 2   Antimony Oxide              203 non-null    object 
 3   Manganic Oxide              203 non-null    object 
 4   Titanium Dioxide - Anatase  203 non-null    object 
 5   Fin Batch                   203 non-null    object 
 6   Equip                       203 non-null    object 
 7   MDL                         203 non-null    float64
 8   MDA                         203 non-null    float64
 9   MDB                         203 non-null    float64
 10  TDL                         203 non-null    float64
 11  TDA                         203 non-null    float64
 12  TDB                         203 non-null    float64
 13  STR                         203 non

In [36]:
df['Material'].dtype == 'object'

True

In [37]:
for column in df.columns:
    print(f'{column} (' + str(df.loc[df[column]==''].count().iloc[0]) + ' empty strings)')

Material (0 empty strings)
RMIX Batch (0 empty strings)
Antimony Oxide (0 empty strings)
Manganic Oxide (0 empty strings)
Titanium Dioxide - Anatase (0 empty strings)
Fin Batch (0 empty strings)
Equip (0 empty strings)
MDL (0 empty strings)
MDA (0 empty strings)
MDB (0 empty strings)
TDL (0 empty strings)
TDA (0 empty strings)
TDB (0 empty strings)
STR (0 empty strings)


In [55]:
print(approv_ingred_df.head())

                        Material rm_code
ingredient_name                         
Antimony Oxide   BR0010P850-RMIX  RM0615
Antimony Oxide   BR0010P850-RMIX  RM0614
Manganic Oxide   BR0010P850-RMIX  RM4620
Manganic Oxide   BR0010P850-RMIX  RM4621
Manganic Oxide   BR0010P850-RMIX  RM4615


In [46]:
for i in range(len(product_ingredients)):
    print(f'{i +1} {product_ingredients[i]}')


1 Antimony Oxide
2 Manganic Oxide
3 Titanium Dioxide - Anatase


In [68]:
print(approv_ingred_df)

           Material rm_code             ingredient_name
1   BR0010P850-RMIX  RM0614              Antimony Oxide
0   BR0010P850-RMIX  RM0615              Antimony Oxide
4   BR0010P850-RMIX  RM4615              Manganic Oxide
2   BR0010P850-RMIX  RM4620              Manganic Oxide
3   BR0010P850-RMIX  RM4621              Manganic Oxide
5   BR0010P850-RMIX  RM4623              Manganic Oxide
6   BR0010P850-RMIX  RM4626              Manganic Oxide
8   BR0010P850-RMIX  RM6707  Titanium Dioxide - Anatase
10  BR0010P850-RMIX  RM6713  Titanium Dioxide - Anatase
11  BR0010P850-RMIX  RM6714  Titanium Dioxide - Anatase
9   BR0010P850-RMIX  RM6718  Titanium Dioxide - Anatase
7   BR0010P850-RMIX  RM6729  Titanium Dioxide - Anatase


In [69]:
approv_ingred_df[approv_ingred_df['ingredient_name'] == 'Titanium Dioxide - Anatase']
# approv_ingred_df[(approv_ingred_df['ingredient_name'] == 'Titanium Dioxide - Anatase') & (approv_ingred_df['Material'] == 'BR0010P850-RMIX')]

Unnamed: 0,Material,rm_code,ingredient_name
8,BR0010P850-RMIX,RM6707,Titanium Dioxide - Anatase
10,BR0010P850-RMIX,RM6713,Titanium Dioxide - Anatase
11,BR0010P850-RMIX,RM6714,Titanium Dioxide - Anatase
9,BR0010P850-RMIX,RM6718,Titanium Dioxide - Anatase
7,BR0010P850-RMIX,RM6729,Titanium Dioxide - Anatase


In [70]:
approv_ingred_df.query('ingredient_name == "Titanium Dioxide - Anatase"')

Unnamed: 0,Material,rm_code,ingredient_name
8,BR0010P850-RMIX,RM6707,Titanium Dioxide - Anatase
10,BR0010P850-RMIX,RM6713,Titanium Dioxide - Anatase
11,BR0010P850-RMIX,RM6714,Titanium Dioxide - Anatase
9,BR0010P850-RMIX,RM6718,Titanium Dioxide - Anatase
7,BR0010P850-RMIX,RM6729,Titanium Dioxide - Anatase


In [71]:
approv_ingred_df.loc[approv_ingred_df['ingredient_name'] == 'Titanium Dioxide - Anatase']

Unnamed: 0,Material,rm_code,ingredient_name
8,BR0010P850-RMIX,RM6707,Titanium Dioxide - Anatase
10,BR0010P850-RMIX,RM6713,Titanium Dioxide - Anatase
11,BR0010P850-RMIX,RM6714,Titanium Dioxide - Anatase
9,BR0010P850-RMIX,RM6718,Titanium Dioxide - Anatase
7,BR0010P850-RMIX,RM6729,Titanium Dioxide - Anatase


In [72]:

test_dict = {}
for i in range(len(product_ingredients)):
   ingred = product_ingredients[i]
   ingred_num = f'ingred_{i+1}'
   df_ingred = approv_ingred_df.loc[approv_ingred_df['ingredient_name'] == ingred]

   print(df_ingred)

#    approv_ingred_df.iloc

# prod = os.path.splitext(selected_file)[0]  # separate filename from extension 
# df_prod_specs = df_all_specs.iloc[lambda x: x.index == prod]  # filter specifications by product name from the selected file

          Material rm_code ingredient_name
1  BR0010P850-RMIX  RM0614  Antimony Oxide
0  BR0010P850-RMIX  RM0615  Antimony Oxide
          Material rm_code ingredient_name
4  BR0010P850-RMIX  RM4615  Manganic Oxide
2  BR0010P850-RMIX  RM4620  Manganic Oxide
3  BR0010P850-RMIX  RM4621  Manganic Oxide
5  BR0010P850-RMIX  RM4623  Manganic Oxide
6  BR0010P850-RMIX  RM4626  Manganic Oxide
           Material rm_code             ingredient_name
8   BR0010P850-RMIX  RM6707  Titanium Dioxide - Anatase
10  BR0010P850-RMIX  RM6713  Titanium Dioxide - Anatase
11  BR0010P850-RMIX  RM6714  Titanium Dioxide - Anatase
9   BR0010P850-RMIX  RM6718  Titanium Dioxide - Anatase
7   BR0010P850-RMIX  RM6729  Titanium Dioxide - Anatase
