# Cleaning the Dataset for ML Algorithm
#### This Notebook will delete all the products with the toxins and create a new dataset used for the machine learning algorithm
- This notebook will only contain a dataframe and a seperate list of product names (used for website) with the clean products
- The dataframe is [here](../data/processed/cleaned_products_data.csv)
- The seperate list of products is [here](../reports/hazardous_products.txt)

In [114]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy as sp

In [115]:
df = pd.read_csv('../data/raw/cosmetics.csv')
df.info()
df.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1472 entries, 0 to 1471
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Label        1472 non-null   object 
 1   Brand        1472 non-null   object 
 2   Name         1472 non-null   object 
 3   Price        1472 non-null   int64  
 4   Rank         1472 non-null   float64
 5   Ingredients  1472 non-null   object 
 6   Combination  1472 non-null   int64  
 7   Dry          1472 non-null   int64  
 8   Normal       1472 non-null   int64  
 9   Oily         1472 non-null   int64  
 10  Sensitive    1472 non-null   int64  
dtypes: float64(1), int64(6), object(4)
memory usage: 126.6+ KB


Unnamed: 0,Label,Brand,Name,Price,Rank,Ingredients,Combination,Dry,Normal,Oily,Sensitive
0,Moisturizer,LA MER,Crème de la Mer,175,4.1,"Algae (Seaweed) Extract, Mineral Oil, Petrolat...",1,1,1,1,1
1,Moisturizer,SK-II,Facial Treatment Essence,179,4.1,"Galactomyces Ferment Filtrate (Pitera), Butyle...",1,1,1,1,1
2,Moisturizer,DRUNK ELEPHANT,Protini™ Polypeptide Cream,68,4.4,"Water, Dicaprylyl Carbonate, Glycerin, Ceteary...",1,1,1,1,0
3,Moisturizer,LA MER,The Moisturizing Soft Cream,175,3.8,"Algae (Seaweed) Extract, Cyclopentasiloxane, P...",1,1,1,1,1
4,Moisturizer,IT COSMETICS,Your Skin But Better™ CC+™ Cream with SPF 50+,38,4.1,"Water, Snail Secretion Filtrate, Phenyl Trimet...",1,1,1,1,1
5,Moisturizer,TATCHA,The Water Cream,68,4.2,"Water, Saccharomyces/Camellia Sinensis Leaf/Cl...",1,0,1,1,1
6,Moisturizer,DRUNK ELEPHANT,Lala Retro™ Whipped Cream,60,4.2,"Water, Glycerin, Caprylic/ Capric Triglyceride...",1,1,1,1,0
7,Moisturizer,DRUNK ELEPHANT,Virgin Marula Luxury Facial Oil,72,4.4,100% Unrefined Sclerocraya Birrea (Marula) Ker...,1,1,1,1,0
8,Moisturizer,KIEHL'S SINCE 1851,Ultra Facial Cream,29,4.4,"Water, Glycerin, Cyclohexasiloxane, Squalane, ...",1,1,1,1,1
9,Moisturizer,LA MER,Little Miss Miracle Limited-Edition Crème de l...,325,5.0,"Algae (Seaweed) Extract, Mineral Oil, Petrolat...",0,0,0,0,0


### Make a list of all of the toxins

In [116]:
ingredents_list = df['Ingredients'].tolist()
# print(ingredents_list)

In [117]:
# Create a dictionary with words as keys and variable names as values
words = {'Ethanolamine': 'ethanolamine_occ', 
         'Oxybenzone': 'oxybenzone_occ', 
         'Resorcinol': 'resorcinol_occ', 
         'Formaldehyde': 'formaldehyde_occ', 
         'Diethanol': 'diethanolamine_occ', 
         'Silane': 'silanes_occ', 
         'Siloxane': 'siloxanes_occ', 
         'Octinaxate': 'octinaxate_occ',
         #MODDERATE
         'Parfum': 'parfum_occ', 
         'Fragrance': 'fragrance_occ',
         'Triclosan': 'triclosan_occ', 
         'Octinoxates':'octinoxates_occ', 
         'Homosalate' : 'homosalate_occ', 
         'Teflon': 'teflon_occ',
         #HIGH
         'Talc': 'talc_occ', 
         'Parabens':'parabens_occ', 
         'Propylene Glycol': 'glycol_occ'}

# Iterate over the dictionary
for word, var in words.items():
  # Use the `str.contains` method to find the occurrences where the word is found in the Ingredients column
  # and create a variable with the corresponding name and value
  globals()[var] = df[df['Ingredients'].str.contains(word)]

In [118]:
%%capture cap 

words = {'Ethanolamine': 'ethanolamine_occ', 
         'Oxybenzone': 'oxybenzone_occ', 
         'Resorcinol': 'resorcinol_occ', 
         'Formaldehyde': 'formaldehyde_occ', 
         'Diethanol': 'diethanolamine_occ', 
         'Silane': 'silanes_occ', 
         'Siloxane': 'siloxanes_occ', 
         'Octinaxate': 'octinaxate_occ',
         #MODDERATE
         'Parfum': 'parfum_occ', 
         'Fragrance': 'fragrance_occ',
         'Triclosan': 'triclosan_occ', 
         'Octinoxates':'octinoxates_occ', 
         'Homosalate' : 'homosalate_occ', 
         'Teflon': 'teflon_occ',
         #HIGH
         'Talc': 'talc_occ', 
         'Parabens':'parabens_occ', 
         'Propylene Glycol': 'glycol_occ'}

words_tuples = {}
for chemical, variable in words.items():
    variable_value = eval(variable)
    index = variable_value.index
    name = variable_value.loc[index, "Name"]
    if index.empty:
        words_tuples[chemical] = "No chemical found"
    else:
        tuples = [(i, n) for i, n in zip(index, name)]
        words_tuples[chemical] = tuples

with open('../reports/hazardous_products.txt', 'w') as f:
    for chemical, tuples in words_tuples.items():
        if tuples == "No chemical found":
            f.write(f"No {chemical} found\n")
        else:
            f.write(f"{chemical} found at index numbers:\n")
            for i, n in tuples:
                f.write(f"{i} - {n}\n")
            f.write("\n")

In [112]:
with open("../references/hazardous_products.txt", "r") as file:
    index_numbers = [int(line.strip()) for line in file]

selected_products = df.loc[df['Name'].isin(index_numbers)]

print(selected_products['Name'])

ValueError: invalid literal for int() with base 10: 'No Ethanolamine found'

In [119]:
with open("../references/hazardous_products.txt", "r") as file:
    index_numbers = []
    for line in file:
        try:
            index_numbers.append(int(line.strip()))
        except ValueError:
            continue
    index_numbers = [int(line.strip()) for line in file]

# Use the .loc[] method to select the rows of the dataframe where the index number matches the number in your list
selected_products = df.loc[df['Name'].isin(index_numbers)]

# Print the selected products
print(selected_products)

Empty DataFrame
Columns: [Label, Brand, Name, Price, Rank, Ingredients, Combination, Dry, Normal, Oily, Sensitive]
Index: []


### Make a loop that will run the same function on other toxins
#### Store the toxins in a list
- master list
- hazard levels
    - low
    - moderate
    - high
#### Use a loop to print the products that contains the toxins

### Lists sorted by Toxicity (for reference)


In [120]:
# master_list = [
#     silanes_occ, oxybenzone_occ, siloxanes_occ, resorcinol_occ, ethanolamine_occ, formaldehyde_occ, diethanolamine_occ, octinaxate_occ,
#     parfum_occ, fragrance_occ, triclosan_occ, octinoxates_occ, homosalate_occ, teflon_occ,
#     talc_occ, parabens_occ, glycol_occ]

# low_list = [
#     silanes_occ, oxybenzone_occ, siloxanes_occ, resorcinol_occ, ethanolamine_occ, formaldehyde_occ, diethanolamine_occ, octinaxate_occ,
#     ]

# moderate_list = [
#     parfum_occ, fragrance_occ, triclosan_occ, octinoxates_occ, homosalate_occ, teflon_occ,
#     ]

# high_list = [
#     talc_occ, parabens_occ, glycol_occ
# ]

### Low Toxicity toxin products index number list is [here](../references/low_hazard_products.txt)

In [121]:
low = {'Ethanolamine': 'ethanolamine_occ',
       'Oxybenzone': 'oxybenzone_occ',
       'Resorcinol': 'resorcinol_occ',
       'Formaldehyde': 'formaldehyde_occ',
       'Diethanol': 'diethanolamine_occ',
       'Silane': 'silanes_occ',
       'Siloxane': 'siloxanes_occ',
       'Octinaxate': 'octinaxate_occ', }

low_tuples = {}
for chemical, variable in low.items():
    variable_value = eval(variable)
    index = variable_value.index
    name = variable_value.loc[index, "Name"]
    if index.empty:
        low_tuples[chemical] = "No chemical found"
    else:
        tuples = [(i, n) for i, n in zip(index, name)]
        low_tuples[chemical] = tuples

with open('../references/low_hazard_products.txt', 'w') as f:
    for chemical, tuples in low_tuples.items():
        if tuples == "No chemical found":
            f.write(f"No {chemical} found\n")
        else:
            f.write(f"{chemical} found at index numbers:\n")
            for i, n in tuples:
                f.write(f"{i} - {n}\n")
            f.write("\n")


### Moderate Toxicity toxin products index number list is [here](../references/mod_hazard_products.txt)

In [122]:
mod = {
    'Parfum': 'parfum_occ',
    'Fragrance': 'fragrance_occ',
    'Triclosan': 'triclosan_occ',
    'Octinoxates': 'octinoxates_occ',
    'Homosalate': 'homosalate_occ',
    'Teflon': 'teflon_occ',
}

mod_tuples = {}
for chemical, variable in mod.items():
    variable_value = eval(variable)
    index = variable_value.index
    name = variable_value.loc[index, "Name"]
    if index.empty:
        mod_tuples[chemical] = "No chemical found"
    else:
        tuples = [(i, n) for i, n in zip(index, name)]
        mod_tuples[chemical] = tuples

with open('../references/mod_hazard_products.txt', 'w') as f:
    for chemical, tuples in mod_tuples.items():
        if tuples == "No chemical found":
            f.write(f"No {chemical} found\n")
        else:
            f.write(f"{chemical} found at index numbers:\n")
            for i, n in tuples:
                f.write(f"{i} - {n}\n")
            f.write("\n")

### High Toxicity toxin products index number list is [here](../references/high_hazard_products.txt)

In [130]:
high = {'Talc': 'talc_occ',
        'Parabens': 'parabens_occ',
        'Propylene Glycol ': 'glycol_occ'}

high_tuples = {}
for chemical, variable in mod.items():
    variable_value = eval(variable)
    index = variable_value.index
    name = variable_value.loc[index, "Name"]
    if index.empty:
        high_tuples[chemical] = "No chemical found"
    else:
        tuples = [(i, n) for i, n in zip(index, name)]
        high_tuples[chemical] = tuples

with open('../references/mod_hazard_products.txt', 'w') as f:
    for chemical, tuples in high_tuples.items():
        if tuples == "No chemical found":
            f.write(f"No {chemical} found\n")
        else:
            f.write(f"{chemical} found at index numbers:\n")
            for i, n in tuples:
                f.write(f"{i} - {n}\n")
            f.write("\n")


### Reading the textfile outputs to get the index numbers (for deletion)
1. A new array is created to hold all of the indicies
2. The index numbers are read from [here](../references/hazardous_products.txt)
3. Adds to the ```products``` array if the text is a digit

In [124]:
%%capture cap 
with open('../reports/hazardous_products.txt', 'r') as f:
    contents = f.read()
    
lines = contents.split('\n')

products = []

for line in lines:
    words = line.split()
    for word in words:
        if word.isdigit():
            products.append(int(word))

print(products)

# Close the file
f.close()

print(type(products))

products_array = np.array(products)
print(type(products_array))

print(products_array)

### Add the toxicity column with these values:

- Low
- Moderate
- High
- n/a

### Now we must remove the index numbers (products) that are in the lists to make a dataset with non-toxic products

In [156]:
# removed_products = []
# with open('../reports/hazardous_products.txt', 'r') as f:
#     for line in f:
#         if 'found at index numbers:' in line:
#             for index in f:
#                 if index.strip().isdigit():
#                     removed_products.append(int(index))
#                 else:
#                     break

# removed_products_array = np.array(removed_products)

# print(removed_products_array)
# df = df.drop(removed_products_array)


# index_numbers = []
# with open('../references/hazardous_products.txt', 'r') as f:
#     for line in f:
#         if 'found at index numbers:' in line:
#             for index in f:
#                 if index.strip().isdigit():
#                     index_numbers.append(int(index))
#                 else:
#                     break

# index_array = np.array(index_numbers)
# df = df.drop(index_array)

with open('../references/hazardous_products.txt', 'r') as f:
    contents = f.read()
    
lines = contents.split('\n')

products = []

for line in lines:
    words = line.split()
    for word in words:
        if word.isdigit():
            products.append(int(word))


f.close()

products_array = np.array(products)

print(products_array)

df = df.drop(products_array)

[ 126   35  182   30  234  373 1344   30 1361   50 1364 1375   50 1405
   50 1427   35 1443 1453   30 1060    8   24   37   42   73   99  105
  134  194  200  286  398  485  495  527  538  581  619  672  687  695
  712  715  746  777  785  790  849  871  888  940  948  951  997 1067
 1071 1074 1081 1090 1124 1140 1148 1172 1215 1226 1255 1261  360 1272
 1332   30 1209   15 1274 1276    5   41   43   49   59   64   71   76
   99  100  107  112  128  149  164  165  175  183  189  196  203   15
  217  227  229  230  245  267  287  295  304  323  329  333  338  344
  370  380  381  389  400  401  411  420  423  448  472  482  495  501
  520  524  525  541  547  550  554  566  570  577  584  598  600  603
  610  621  634  637  642  646  674  686  688  700  702  732  739  779
  782  788  790  814  816  823  833  834  836  838  841  867  872  874
  889  895  899  910  917  919  947  949  952  953  955  976  977  990
  993 1036 1039 1042 1050 1070 1095 1097 1109 1118 1125 1129 1152 1158
 1167 

KeyError: '[126, 35, 182, 30, 234, 373, 1344, 30, 1361, 50, 1364, 1375, 50, 1405, 50, 1427, 35, 1443, 1453, 30, 1060, 8, 24, 37, 42, 73, 99, 105, 134, 194, 200, 286, 398, 485, 495, 527, 538, 581, 619, 672, 687, 695, 712, 715, 746, 777, 785, 790, 849, 871, 888, 940, 948, 951, 997, 1067, 1071, 1074, 1081, 1090, 1124, 1140, 1148, 1172, 1215, 1226, 1255, 1261, 360, 1272, 1332, 30, 1209, 15, 1274, 1276, 5, 41, 43, 49, 59, 64, 71, 76, 99, 100, 107, 112, 128, 149, 164, 165, 175, 183, 189, 196, 203, 15, 217, 227, 229, 230, 245, 267, 287, 295, 304, 323, 329, 333, 338, 344, 370, 380, 381, 389, 400, 401, 411, 420, 423, 448, 472, 482, 495, 501, 520, 524, 525, 541, 547, 550, 554, 566, 570, 577, 584, 598, 600, 603, 610, 621, 634, 637, 642, 646, 674, 686, 688, 700, 702, 732, 739, 779, 782, 788, 790, 814, 816, 823, 833, 834, 836, 838, 841, 867, 872, 874, 889, 895, 899, 910, 917, 919, 947, 949, 952, 953, 955, 976, 977, 990, 993, 1036, 1039, 1042, 1050, 1070, 1095, 1097, 1109, 1118, 1125, 1129, 1152, 1158, 1167, 1201, 1225, 1251, 1286, 15, 1338, 20, 1376, 25, 1384, 30, 30, 1388, 30, 1430, 1468, 1470, 0, 3, 5, 9, 12, 18, 19, 20, 22, 23, 24, 29, 30, 31, 36, 37, 40, 41, 43, 45, 47, 49, 52, 59, 60, 61, 64, 66, 68, 71, 72, 73, 76, 77, 50, 78, 79, 84, 87, 88, 18, 94, 96, 99, 100, 101, 102, 107, 112, 113, 115, 120, 122, 18, 127, 128, 129, 130, 131, 135, 137, 146, 149, 151, 162, 164, 165, 167, 30, 170, 171, 174, 175, 176, 177, 178, 179, 180, 183, 184, 187, 189, 190, 196, 197, 198, 201, 203, 15, 208, 209, 30, 211, 212, 214, 216, 50, 217, 219, 30, 223, 30, 224, 226, 227, 229, 230, 233, 236, 238, 244, 245, 247, 248, 15, 256, 262, 18, 265, 267, 269, 278, 281, 285, 286, 287, 290, 291, 23, 293, 295, 297, 303, 304, 308, 309, 315, 320, 323, 326, 327, 329, 331, 333, 337, 338, 340, 344, 348, 350, 352, 355, 357, 359, 361, 365, 369, 370, 371, 374, 379, 380, 381, 382, 383, 384, 389, 393, 398, 399, 400, 401, 405, 407, 409, 410, 411, 413, 420, 423, 431, 435, 436, 439, 440, 441, 442, 444, 446, 448, 456, 468, 472, 473, 476, 478, 482, 484, 485, 486, 488, 489, 494, 495, 497, 499, 501, 502, 503, 507, 509, 514, 515, 520, 522, 524, 525, 526, 535, 537, 540, 541, 542, 545, 547, 550, 554, 556, 559, 564, 566, 567, 570, 573, 576, 577, 582, 583, 584, 589, 591, 592, 598, 600, 601, 603, 608, 610, 612, 613, 619, 620, 621, 623, 624, 629, 634, 635, 637, 640, 30, 641, 642, 643, 645, 646, 648, 660, 662, 669, 672, 674, 677, 679, 683, 686, 688, 690, 692, 693, 700, 702, 710, 715, 717, 723, 728, 730, 731, 732, 734, 737, 738, 739, 748, 751, 753, 754, 760, 761, 763, 765, 779, 782, 785, 788, 790, 791, 797, 798, 799, 802, 804, 808, 811, 812, 814, 816, 823, 826, 830, 833, 834, 836, 838, 840, 848, 851, 852, 853, 856, 864, 866, 867, 869, 871, 872, 874, 879, 882, 883, 884, 885, 887, 888, 889, 890, 893, 894, 895, 896, 899, 900, 901, 906, 908, 910, 916, 917, 919, 923, 927, 931, 934, 935, 940, 947, 949, 952, 953, 955, 957, 961, 962, 963, 966, 967, 968, 973, 974, 976, 977, 978, 980, 986, 989, 990, 991, 992, 993, 997, 999, 1007, 1009, 1012, 1013, 1028, 1036, 1040, 1041, 1042, 1043, 1049, 1051, 1052, 1054, 1056, 1061, 1063, 1064, 1066, 1070, 1073, 1079, 1082, 1083, 1084, 1088, 1093, 1095, 1097, 1099, 1100, 1101, 1104, 1117, 1118, 1120, 1121, 1123, 1125, 1126, 1129, 1132, 1138, 1141, 1143, 1145, 1149, 1152, 1153, 1158, 1167, 1169, 1170, 1172, 1174, 1181, 1185, 1190, 1197, 1201, 1225, 1236, 1247, 1249, 1251, 1252, 1254, 1264, 1286, 15, 1292, 1295, 1297, 1298, 1304, 42, 1308, 35, 1309, 1319, 50, 1322, 35, 1326, 1327, 15, 1332, 30, 1334, 30, 1336, 30, 1337, 1338, 20, 1339, 30, 1340, 20, 1342, 1360, 30, 1361, 50, 1365, 1366, 1374, 20, 1376, 25, 1384, 30, 30, 1388, 30, 1389, 15, 1391, 30, 1394, 30, 1402, 50, 1405, 50, 1410, 30, 1413, 50, 1414, 15, 1417, 15, 1418, 50, 1427, 35, 1431, 1438, 30, 1445, 15, 1447, 30, 1448, 30, 1449, 25, 1451, 50, 1453, 30, 1454, 20, 1459, 1467, 30, 1468, 1470, 361, 25, 114, 144, 40, 161, 25, 373, 603, 1341, 25, 1345, 50, 1352, 20, 1361, 50, 1364, 1375, 50, 1405, 50, 1448, 30, 1452, 30, 1453, 30, 1460, 50, 30, 66, 120, 158, 196, 214, 215, 226, 245, 344, 442, 730, 753, 810, 887, 895, 921, 1064, 1197, 1252, 1290, 1309, 1326, 1327, 15, 1340, 20, 1342, 1365, 1376, 25, 1416, 1454, 20, 10, 28, 37, 42, 67, 35, 73, 81, 94, 105, 112, 118, 133, 146, 177, 184, 194, 201, 207, 215, 224, 236, 269, 273, 274, 279, 284, 286, 293, 295, 307, 323, 332, 333, 350, 359, 361, 369, 370, 380, 381, 382, 399, 400, 401, 431, 475, 479, 482, 485, 495, 506, 510, 524, 536, 538, 555, 556, 568, 570, 586, 599, 607, 612, 631, 636, 640, 30, 647, 652, 668, 681, 695, 704, 713, 730, 731, 733, 747, 752, 772, 784, 785, 805, 816, 827, 839, 840, 843, 855, 857, 866, 869, 877, 880, 884, 888, 946, 957, 978, 1001, 1002, 1004, 1005, 1026, 1032, 1045, 1047, 1051, 1054, 1064, 1086, 1092, 1093, 1096, 1099, 1101, 1108, 1110, 1124, 1134, 1156, 1163, 1164, 1174, 1193, 1198, 1208, 1211, 1214, 1224, 1236, 1237, 1238, 1241, 1243, 1254, 1260, 1272, 1273, 1274, 1292, 1327, 15, 1328, 30, 1335, 30, 1337, 1354, 30, 1356, 40, 1360, 30, 1384, 30, 30, 1391, 30, 1408, 15, 1421, 25, 1427, 35, 1440, 15, 1446, 30, 1470] not found in axis'

### **A total of 825 out of 1472 products are clean!**

In [157]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 825 entries, 1 to 1471
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Label        825 non-null    object 
 1   Brand        825 non-null    object 
 2   Name         825 non-null    object 
 3   Price        825 non-null    int64  
 4   Rank         825 non-null    float64
 5   Ingredients  825 non-null    object 
 6   Combination  825 non-null    int64  
 7   Dry          825 non-null    int64  
 8   Normal       825 non-null    int64  
 9   Oily         825 non-null    int64  
 10  Sensitive    825 non-null    int64  
dtypes: float64(1), int64(6), object(4)
memory usage: 109.6+ KB


In [158]:
df.head()

Unnamed: 0,Label,Brand,Name,Price,Rank,Ingredients,Combination,Dry,Normal,Oily,Sensitive
1,Moisturizer,SK-II,Facial Treatment Essence,179,4.1,"Galactomyces Ferment Filtrate (Pitera), Butyle...",1,1,1,1,1
2,Moisturizer,DRUNK ELEPHANT,Protini™ Polypeptide Cream,68,4.4,"Water, Dicaprylyl Carbonate, Glycerin, Ceteary...",1,1,1,1,0
4,Moisturizer,IT COSMETICS,Your Skin But Better™ CC+™ Cream with SPF 50+,38,4.1,"Water, Snail Secretion Filtrate, Phenyl Trimet...",1,1,1,1,1
6,Moisturizer,DRUNK ELEPHANT,Lala Retro™ Whipped Cream,60,4.2,"Water, Glycerin, Caprylic/ Capric Triglyceride...",1,1,1,1,0
7,Moisturizer,DRUNK ELEPHANT,Virgin Marula Luxury Facial Oil,72,4.4,100% Unrefined Sclerocraya Birrea (Marula) Ker...,1,1,1,1,0


In [159]:
df.describe()

Unnamed: 0,Price,Rank,Combination,Dry,Normal,Oily,Sensitive
count,825.0,825.0,825.0,825.0,825.0,825.0,825.0
mean,51.911515,4.140121,0.661818,0.620606,0.658182,0.620606,0.528485
std,39.633432,0.620094,0.473378,0.485531,0.474607,0.485531,0.499491
min,3.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,29.0,4.0,0.0,0.0,0.0,0.0,0.0
50%,41.0,4.2,1.0,1.0,1.0,1.0,1.0
75%,65.0,4.5,1.0,1.0,1.0,1.0,1.0
max,370.0,5.0,1.0,1.0,1.0,1.0,1.0


### Export the results to a s CSV file that has the new toxicity column to processed

In [160]:
df.to_csv('../data/processed/clean_products_data.csv', index=False)

### Creating the list of clean products