In [206]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [207]:
import pandas as pd
import numpy as np
from fastai.core import Path

In [208]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [209]:
from sqlalchemy import create_engine

In [210]:
db_string = 'postgres://sidravic:sidravic@localhost:5432/lost_grandma_staging'
engine = create_engine(db_string)
db_conn = engine.connect()

In [211]:
! pwd

ROOT_PATH = Path('/home/sidravic/Dropbox/code/workspace/rails_apps/idylmynds/data_preprocessing/notebooks/category_based_recommendations')
DATA_PATH = ROOT_PATH/'data'
SAVE_PATH = ROOT_PATH/'save_model'

/home/sidravic/Dropbox/code/workspace/rails_apps/idylmynds/data_preprocessing/notebooks/category_based_recommendations/train


In [212]:
query = """
    SELECT cp.id as product_id,
           cp.name as product_name,
           cb.name as brand_name,
           cp.ingredients as ingredients,
           cp.categories as categories
                         
    from cosmetics_products cp
    inner join cosmetics_brands cb on cp.cosmetics_brand_id = cb.id
"""



In [213]:
df = pd.read_sql_query(query, con=db_conn)

In [11]:
df.shape

(9824, 5)

In [12]:
df.columns

Index(['product_id', 'product_name', 'brand_name', 'ingredients',
       'categories'],
      dtype='object')

In [15]:
dedup_df = df.drop_duplicates(subset=['product_name', 'brand_name'], inplace=False)

In [16]:
dedup_df.shape

(5041, 5)

In [32]:
dedup_df

Unnamed: 0,product_id,product_name,brand_name,ingredients,categories
0,62cfdb35-9acf-4f21-abb9-2b7209d72cbf,Cashmere Mist Deodorant,Donna Karan,-Aluminum Zirconium Tetrachlorohydrex Gly 15....,"{'classification': ['Fragrance', 'Women'], 'or..."
1,71c7d214-62de-4e62-94a7-0257564b8526,Cashmere Mist Eau de Parfum,Donna Karan,,"{'classification': ['Fragrance', 'Women'], 'or..."
2,e54ba7fd-6ab2-4ee4-a5aa-d771ed17d1a8,Cashmere Mist Eau de Toilette,Donna Karan,,"{'classification': ['Fragrance', 'Women'], 'or..."
3,d2fed8f5-d806-4185-bac1-4a4ed2123970,Cashmere Mist Travel Spray,Donna Karan,,"{'classification': ['Fragrance', 'Women'], 'or..."
4,6374bd64-016f-4dab-bfd7-f1d97594c15f,Cashmere Mist Body Lotion,Donna Karan,,"{'classification': ['Fragrance'], 'ordered': T..."
...,...,...,...,...,...
9733,29590d67-17d6-4229-9347-75c61a05ae91,The Ultimate Care Kit,DevaCurl,-Matcha Butter: Helps moisturize.-Vegan Protei...,"{'classification': ['Hair'], 'ordered': True}"
9734,84cbbf03-6189-444f-9c5b-ced33f7c0406,Share the Wavy Love,DevaCurl,-Rice Protein: Hels add volume. -Chia-Flaxseed...,"{'classification': ['Hair'], 'ordered': True}"
9737,c4ac9659-80b9-4ca1-9656-39050eca48b8,Share the Super Curly Love,DevaCurl,-Chufa Milk: Moisturizes. -Quinoa Protein: Hel...,"{'classification': ['Hair'], 'ordered': True}"
9802,fc8248c5-6de1-49c5-9af4-a67f17514d70,Light Blue Eau de Toilette Gift Set,DOLCE&amp;GABBANA,,"{'classification': ['Fragrance'], 'ordered': T..."


In [33]:
def flatten_categories(category):
    categories = category['classification']
    
    primary_category = None
    secondary_category = None
    
    if len(categories) == 2:
        primary_category, secondary_category =  categories
    elif len(categories) == 1:
        primary_category = categories[0]
        
    return primary_category, secondary_category



    

In [36]:
categories_series = dedup_df['categories'].apply(flatten_categories)
type(categories_series)

pandas.core.series.Series

In [44]:
primary, secondary = zip(*categories_series)
primary

('Fragrance',
 'Fragrance',
 'Fragrance',
 'Fragrance',
 'Fragrance',
 'Fragrance',
 'Fragrance',
 'Hair',
 'Hair',
 'Hair',
 'Hair',
 'Hair',
 'Hair',
 'Hair',
 'Hair',
 'Hair',
 'Hair',
 'Hair',
 'Hair',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Bath & Body',
 'Bath & Body',
 'Bath & Body',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Hair',
 'Skincare',
 'Bath & Body',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Makeup',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Skincare',
 'Bath &

In [45]:
dedup_df['primary_category'] = primary
dedup_df['secondary_category'] = secondary

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [48]:
dedup_df.drop(columns=['categories'], inplace=True)
dedup_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,product_id,product_name,brand_name,ingredients,primary_category,secondary_category
0,62cfdb35-9acf-4f21-abb9-2b7209d72cbf,Cashmere Mist Deodorant,Donna Karan,-Aluminum Zirconium Tetrachlorohydrex Gly 15....,Fragrance,Women
1,71c7d214-62de-4e62-94a7-0257564b8526,Cashmere Mist Eau de Parfum,Donna Karan,,Fragrance,Women
2,e54ba7fd-6ab2-4ee4-a5aa-d771ed17d1a8,Cashmere Mist Eau de Toilette,Donna Karan,,Fragrance,Women
3,d2fed8f5-d806-4185-bac1-4a4ed2123970,Cashmere Mist Travel Spray,Donna Karan,,Fragrance,Women
4,6374bd64-016f-4dab-bfd7-f1d97594c15f,Cashmere Mist Body Lotion,Donna Karan,,Fragrance,
...,...,...,...,...,...,...
9733,29590d67-17d6-4229-9347-75c61a05ae91,The Ultimate Care Kit,DevaCurl,-Matcha Butter: Helps moisturize.-Vegan Protei...,Hair,
9734,84cbbf03-6189-444f-9c5b-ced33f7c0406,Share the Wavy Love,DevaCurl,-Rice Protein: Hels add volume. -Chia-Flaxseed...,Hair,
9737,c4ac9659-80b9-4ca1-9656-39050eca48b8,Share the Super Curly Love,DevaCurl,-Chufa Milk: Moisturizes. -Quinoa Protein: Hel...,Hair,
9802,fc8248c5-6de1-49c5-9af4-a67f17514d70,Light Blue Eau de Toilette Gift Set,DOLCE&amp;GABBANA,,Fragrance,


In [58]:
dedup_df.groupby('primary_category')['ingredients'].apply(lambda x: (x == "").sum())

primary_category
Bath & Body         18
Fragrance          781
Gifts                1
Hair               107
Makeup             303
Men                 20
Mini Size           12
Skincare           149
Tools & Brushes     18
Name: ingredients, dtype: int64

In [60]:
dedup_df.to_csv(DATA_PATH/'ingredients.csv')

In [61]:
data_df = pd.read_csv(DATA_PATH/'ingredients.csv')

In [63]:
data_df.drop(columns='Unnamed: 0', inplace=True)

In [71]:
ingredients_not_empty_df = data_df['ingredients'][~data_df['ingredients'].isna()]

In [77]:
i1 = ingredients_not_empty_df.iloc[0]
i2 = ingredients_not_empty_df.iloc[1]
i1, i2

(' -Aluminum Zirconium Tetrachlorohydrex Gly 15.4%: Helps keep skin dry.\nCyclopentasiloxane, Stearyl Alcohol, Hydrogenated Castor Oil, Ppg-14 Butyl Ether, Dimethicone, Talc, Cashmere Mist Fragrance, Silica, Peg-8 Distearate, Allantoin, Maltodextrin, Corn Starch Modified, Hexyl Cinnamal, Hydroxycitronellal, Linalool, Citronellol, Alpha-Isomethyl Ionone, Benzyl Benzoate, Coumarin, Geraniol, Isoeugenol, Eugenol, Bht. \n',
 ' -Organic Grape Water: Moisturize and soothe.-Grape-seed Polyphenols: Protect skin from environmental aggressors with antioxidants.-Vinolevure: Strengthen and moisturize.-Chamomile: Calm and sooth.Vinosource Moisturizing Sorbet:Water, Vitis Vinifera (Grape) Fruit Water*, Dicaprylyl Ether*, Glycerin*, Butyrospermum Parkii (Shea Butter) Extract*, Hexyldecanol*, Hexyldecyl Laurate*, Behenyl Alcohol*, Glyceryl Stearate*, Erythritol, Acrylates/C10-30 Alkyl Acrylate Crosspolymer, Parfum (Fragrance), Tocopherol*, Lecithin*, Caprylyl Glycol, Mannitol*, Sodium Benzoate, Xantha

In [80]:
bagOfWords1 = i1.split(' ')
bagOfWords2 = i2.split(' ')
bagOfWords1, bagOfWords2

(['',
  '-Aluminum',
  'Zirconium',
  'Tetrachlorohydrex',
  'Gly',
  '15.4%:',
  'Helps',
  'keep',
  'skin',
  'dry.\nCyclopentasiloxane,',
  'Stearyl',
  'Alcohol,',
  'Hydrogenated',
  'Castor',
  'Oil,',
  'Ppg-14',
  'Butyl',
  'Ether,',
  'Dimethicone,',
  'Talc,',
  'Cashmere',
  'Mist',
  'Fragrance,',
  'Silica,',
  'Peg-8',
  'Distearate,',
  'Allantoin,',
  'Maltodextrin,',
  'Corn',
  'Starch',
  'Modified,',
  'Hexyl',
  'Cinnamal,',
  'Hydroxycitronellal,',
  'Linalool,',
  'Citronellol,',
  'Alpha-Isomethyl',
  'Ionone,',
  'Benzyl',
  'Benzoate,',
  'Coumarin,',
  'Geraniol,',
  'Isoeugenol,',
  'Eugenol,',
  'Bht.',
  '\n'],
 ['',
  '-Organic',
  'Grape',
  'Water:',
  'Moisturize',
  'and',
  'soothe.-Grape-seed',
  'Polyphenols:',
  'Protect',
  'skin',
  'from',
  'environmental',
  'aggressors',
  'with',
  'antioxidants.-Vinolevure:',
  'Strengthen',
  'and',
  'moisturize.-Chamomile:',
  'Calm',
  'and',
  'sooth.Vinosource',
  'Moisturizing',
  'Sorbet:Water,',

In [82]:
uniqueWords = set(bagOfWords1).union(bagOfWords2)
uniqueWords

{'',
 '\n',
 '(Fragrance),',
 '(Grape)',
 '(Matricaria)',
 '(Shea',
 '(Soybean)',
 '*Plant',
 '-Aluminum',
 '-Organic',
 '15.4%:',
 'Acetyl',
 'Acid,',
 'Acrylate',
 'Acrylates/C10-30',
 'Alcohol*,',
 'Alcohol,',
 'Alkyl',
 'Allantoin,',
 'Alpha-Isomethyl',
 'Behenyl',
 'Benzoate,',
 'Benzyl',
 'Betaglucan,',
 'Bht.',
 'Biosaccharide',
 'Butter)',
 'Butyl',
 'Butylene',
 'Butyrospermum',
 'Calm',
 'Caprylate*,',
 'Caprylyl',
 'Carboxymethyl',
 'Cashmere',
 'Castor',
 'Chamomilla',
 'Cinnamal,',
 'Citrate,',
 'Citric',
 'Citronellol,',
 'Corn',
 'Coumarin,',
 'Crosspolymer,',
 'Dicaprylyl',
 'Dimethicone,',
 'Distearate,',
 'Erythritol,',
 'Ether*,',
 'Ether,',
 'Eugenol,',
 'Extract*,',
 'Flower',
 'Fragrance,',
 'Fruit',
 'Geraniol,',
 'Gly',
 'Glycerin*,',
 'Glyceryl',
 'Glycine',
 'Glycol,',
 'Grape',
 'Gum,',
 'Gum-1,',
 'Hcl,',
 'Helps',
 'Hexyl',
 'Hexyldecanol*,',
 'Hexyldecyl',
 'Homarine',
 'Hyaluronate,',
 'Hydrogenated',
 'Hydroxide,',
 'Hydroxycitronellal,',
 'Ionone,',
 'I

In [84]:
numOfWords1 = dict.fromkeys(uniqueWords, 0)
numOfWords1

{'': 0,
 'Hydroxide,': 0,
 'Phytate*,': 0,
 'Fragrance,': 0,
 'Water:Vitis': 0,
 'Erythritol,': 0,
 'Allantoin,': 0,
 'Citronellol,': 0,
 'Geraniol,': 0,
 'Gly': 0,
 'Coumarin,': 0,
 '15.4%:': 0,
 'Glycol,': 0,
 'Ionone,': 0,
 'Fruit': 0,
 'Hyaluronate,': 0,
 'Origin.Grape': 0,
 'Strengthen': 0,
 'Hexyldecanol*,': 0,
 'Potassium': 0,
 'Laurate*,': 0,
 'Hexyl': 0,
 'Starch': 0,
 'Maltodextrin,': 0,
 'Acid,': 0,
 'Zirconium': 0,
 'Seed': 0,
 'Corn': 0,
 'Biosaccharide': 0,
 'dry.\nCyclopentasiloxane,': 0,
 'from': 0,
 'Carboxymethyl': 0,
 '*Plant': 0,
 'Peg-8': 0,
 '(Grape)': 0,
 'Butyrospermum': 0,
 'Xanthan': 0,
 'Alkyl': 0,
 'Sodium': 0,
 'Recutita': 0,
 'Hexyldecyl': 0,
 'with': 0,
 'Tetrapeptide-15.': 0,
 'Grape': 0,
 'Ether*,': 0,
 'Acrylates/C10-30': 0,
 'Mist': 0,
 'Lecithin*,': 0,
 'Eugenol,': 0,
 'Water*,': 0,
 'Acrylate': 0,
 'Silica,': 0,
 'Palmitoyl': 0,
 'Citric': 0,
 'and': 0,
 'Moisturize': 0,
 'Polyphenols:': 0,
 'Acetyl': 0,
 'Cinnamal,': 0,
 'Glycerin*,': 0,
 'Hydroxyc

### Term Frequency (TF)

The number of times a word appears in a document divded by the total number of words in the document. Every document has its own term frequency.

### Inverse Data Frequency (IDF)

The log of the number of documents divided by the number of documents that contain the word w. Inverse data frequency determines the weight of rare words across all documents in the corpus.

### Lastly, the TF-IDF is simply the TF multiplied by IDF.

In [86]:
vectorizer = TfidfVectorizer()

In [94]:
vectors = vectorizer.fit_transform([i1, i2])
vectors

<2x131 sparse matrix of type '<class 'numpy.float64'>'
	with 137 stored elements in Compressed Sparse Row format>

In [95]:
feature_names = vectorizer.get_feature_names()
feature_names

['14',
 '15',
 '30',
 'acetyl',
 'acid',
 'acrylate',
 'acrylates',
 'aggressors',
 'alcohol',
 'alkyl',
 'allantoin',
 'alpha',
 'aluminum',
 'and',
 'antioxidants',
 'behenyl',
 'benzoate',
 'benzyl',
 'betaglucan',
 'bht',
 'biosaccharide',
 'butter',
 'butyl',
 'butylene',
 'butyrospermum',
 'c10',
 'calm',
 'caprylate',
 'caprylyl',
 'carboxymethyl',
 'cashmere',
 'castor',
 'chamomile',
 'chamomilla',
 'cinnamal',
 'citrate',
 'citric',
 'citronellol',
 'corn',
 'coumarin',
 'crosspolymer',
 'cyclopentasiloxane',
 'dicaprylyl',
 'dimethicone',
 'distearate',
 'dry',
 'environmental',
 'erythritol',
 'ether',
 'eugenol',
 'extract',
 'flower',
 'fragrance',
 'from',
 'fruit',
 'geraniol',
 'gly',
 'glycerin',
 'glyceryl',
 'glycine',
 'glycol',
 'grape',
 'gum',
 'hcl',
 'helps',
 'hexyl',
 'hexyldecanol',
 'hexyldecyl',
 'homarine',
 'hyaluronate',
 'hydrogenated',
 'hydroxide',
 'hydroxycitronellal',
 'ionone',
 'isoeugenol',
 'isomethyl',
 'juice',
 'keep',
 'laurate',
 'lecith

In [96]:
dense = vectors.todense()
dense

matrix([[0.150692, 0.107218, 0.      , 0.      , ..., 0.      , 0.      , 0.      , 0.150692],
        [0.      , 0.042825, 0.060189, 0.060189, ..., 0.300944, 0.060189, 0.060189, 0.      ]])

In [97]:
dense.tolist()

[[0.1506915327511556,
  0.10721826666618207,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.10721826666618207,
  0.0,
  0.1506915327511556,
  0.1506915327511556,
  0.1506915327511556,
  0.0,
  0.0,
  0.0,
  0.10721826666618207,
  0.1506915327511556,
  0.0,
  0.1506915327511556,
  0.0,
  0.0,
  0.1506915327511556,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.1506915327511556,
  0.1506915327511556,
  0.0,
  0.0,
  0.1506915327511556,
  0.0,
  0.0,
  0.1506915327511556,
  0.1506915327511556,
  0.1506915327511556,
  0.0,
  0.1506915327511556,
  0.0,
  0.1506915327511556,
  0.1506915327511556,
  0.1506915327511556,
  0.0,
  0.0,
  0.10721826666618207,
  0.1506915327511556,
  0.0,
  0.0,
  0.10721826666618207,
  0.0,
  0.0,
  0.1506915327511556,
  0.1506915327511556,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.1506915327511556,
  0.1506915327511556,
  0.0,
  0.0,
  0.0,
  0.0,
  0.1506915327511556,
  0.0,
  0.1506915327511556,
  0.1506915327511556,
  0.1506915327511556,
  0.1

In [92]:
cosine_similarity(dense)

array([[1.     , 0.02755],
       [0.02755, 1.     ]])

### Fill all `na` values with an empty string

In [101]:
data_df['ingredients'].fillna('', inplace=True)
data_df['ingredients']


0        -Aluminum Zirconium Tetrachlorohydrex Gly 15....
1                                                        
2                                                        
3                                                        
4                                                        
                              ...                        
5036    -Matcha Butter: Helps moisturize.-Vegan Protei...
5037    -Rice Protein: Hels add volume. -Chia-Flaxseed...
5038    -Chufa Milk: Moisturizes. -Quinoa Protein: Hel...
5039                                                     
5040                                                     
Name: ingredients, Length: 5041, dtype: object

In [102]:
vectors = vectorizer.fit_transform(data_df['ingredients'])
vectors

<5041x10414 sparse matrix of type '<class 'numpy.float64'>'
	with 396642 stored elements in Compressed Sparse Row format>

In [103]:
feature_names = vectorizer.get_feature_names()
feature_names

['00',
 '000',
 '002',
 '005',
 '008',
 '01',
 '010',
 '012',
 '019140',
 '02',
 '025',
 '02m',
 '03',
 '031',
 '035',
 '04',
 '04932',
 '05',
 '050',
 '054',
 '05827',
 '067',
 '06p',
 '07',
 '074',
 '075',
 '07m',
 '08v',
 '09',
 '09157',
 '098',
 '10',
 '100',
 '100mg',
 '101',
 '10339',
 '105',
 '10b',
 '10isostearate',
 '10laurate',
 '10m',
 '10n',
 '11',
 '113',
 '11riethoxysilane',
 '12',
 '120',
 '12085',
 '12245',
 '12250',
 '12251',
 '123',
 '125',
 '12719',
 '12833',
 '12olefin',
 '13',
 '131',
 '13171',
 '13687r1',
 '13m',
 '13n',
 '14',
 '14700',
 '14720',
 '14901',
 '14isoparaffin',
 '14m',
 '14olefin',
 '15',
 '150',
 '15050',
 '151',
 '152a',
 '155',
 '15510',
 '15850',
 '15880',
 '15985',
 '15alkane',
 '15alkyl',
 '15linum',
 '15m',
 '16',
 '160',
 '16035',
 '16m',
 '17',
 '171565',
 '17200',
 '17283',
 '175',
 '178',
 '17m',
 '18',
 '180',
 '182',
 '183',
 '184',
 '188',
 '19',
 '190',
 '191140',
 '19140',
 '193',
 '194',
 '196',
 '1979',
 '19m',
 '1a',
 '1cherry',
 '

In [104]:
dense = vectors.todense()
dense

matrix([[0., 0., 0., 0., ..., 0., 0., 0., 0.],
        [0., 0., 0., 0., ..., 0., 0., 0., 0.],
        [0., 0., 0., 0., ..., 0., 0., 0., 0.],
        [0., 0., 0., 0., ..., 0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0., ..., 0., 0., 0., 0.],
        [0., 0., 0., 0., ..., 0., 0., 0., 0.],
        [0., 0., 0., 0., ..., 0., 0., 0., 0.],
        [0., 0., 0., 0., ..., 0., 0., 0., 0.]])

In [109]:
cosine_sim_matrix = cosine_similarity(dense)

In [110]:
cosine_sim_matrix.shape

(5041, 5041)

### Using the `cosine_sim_matrix` to find similar stuff

1. Find the product index
2. 

#### 1. Find the product index

In [116]:
product_index = data_df[data_df['product_name'] == 'Cashmere Mist Deodorant'].index.item()
product_index

  """Entry point for launching an IPython kernel.


0

In [120]:
sorted(list(enumerate(cosine_sim_matrix[product_index])), key=lambda x: x[1], reverse=True)

[(0, 1.0),
 (4941, 0.404641673383024),
 (2314, 0.395781684421583),
 (3252, 0.3760120543121423),
 (3275, 0.3760120543121423),
 (3250, 0.3541688488304035),
 (633, 0.32013494746936866),
 (4049, 0.31149147232829094),
 (4088, 0.31149147232829094),
 (403, 0.3065064008659777),
 (416, 0.3034839492714335),
 (4867, 0.30134461407536683),
 (404, 0.29809378646658197),
 (408, 0.29809378646658197),
 (991, 0.29310410434675005),
 (4923, 0.29310410434675005),
 (3923, 0.29246612224588386),
 (4870, 0.2817979495126674),
 (3916, 0.28094455883202896),
 (3251, 0.2800400932637955),
 (637, 0.27999410547827747),
 (641, 0.27999410547827747),
 (3220, 0.27736958713882165),
 (717, 0.27260715087677934),
 (3666, 0.27209124580740024),
 (3259, 0.27182806369697315),
 (1462, 0.2653178568543291),
 (2994, 0.26370327838897745),
 (3257, 0.2627235528738571),
 (671, 0.2608664944298624),
 (401, 0.2590821287358548),
 (410, 0.2590821287358548),
 (271, 0.25874738094345606),
 (4854, 0.2569430578193562),
 (1019, 0.25684807948236316),

In [122]:
data_df.iloc[4941]['ingredients']

'Alcohol Denat., Fragrance, Water, Alpha-Isomethyl Ionone, Benzyl Salicylate, Hexyl Cinnamal, Citronellol, Limonene, Linalool, Geraniol, Coumarin, Hydroxycitronellal, Citral, Eugenol, BHT, Tocopherol.'

In [123]:
data_df.iloc[0]['ingredients']

' -Aluminum Zirconium Tetrachlorohydrex Gly 15.4%: Helps keep skin dry.\nCyclopentasiloxane, Stearyl Alcohol, Hydrogenated Castor Oil, Ppg-14 Butyl Ether, Dimethicone, Talc, Cashmere Mist Fragrance, Silica, Peg-8 Distearate, Allantoin, Maltodextrin, Corn Starch Modified, Hexyl Cinnamal, Hydroxycitronellal, Linalool, Citronellol, Alpha-Isomethyl Ionone, Benzyl Benzoate, Coumarin, Geraniol, Isoeugenol, Eugenol, Bht. \n'

In [214]:
from nbdev.export import *


In [215]:
notebook2script()

AssertionError: Use `create_config` to create settings.ini for the first time