# Prediction

In [1]:
import pandas as pd
import os
import re
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = [12, 6]

In [2]:
path = r'../output/amazon/' # use your path
all_files = [x for x in os.listdir(path) if x.endswith(".csv")]
li = []

for filename in all_files:
    df = pd.read_csv(path+filename, index_col=None, header=0)
    df["Brand"] = filename.split("_")[0]
    df["Line"] = filename.split("_")[1].split(".")[0]
    li.append(df)

df = pd.concat(li, axis=0, ignore_index=True)
df = df.drop(df.columns[0], axis=1)
df = df[df.Rating.str.contains("stars")]
df["RatingNumeric"]  = df.Rating.str.split(expand=True)[0]
df["Title"] = df["Title"].str.lower()
df["Review"] = df["Review"].str.lower()

In [3]:
df

Unnamed: 0,Product,Rating,Title,Review,Brand,Line,RatingNumeric
1,Carol’s Daughter Coco Creme Curl Quenching Dee...,1.0 out of 5 stars,not for me,"i have thick, wavy, coarse, frizzy hair and th...",Carol's Daughter,Masks and Treatments,1.0
2,Carol’s Daughter Coco Creme Curl Quenching Dee...,5.0 out of 5 stars,my new favorite!!,it's amazing!! it is so slippery that the tini...,Carol's Daughter,Masks and Treatments,5.0
3,Carol’s Daughter Coco Creme Curl Quenching Dee...,5.0 out of 5 stars,1 product serious curl definition,very great product for naturals the best i’ve ...,Carol's Daughter,Masks and Treatments,5.0
4,Carol’s Daughter Coco Creme Curl Quenching Dee...,5.0 out of 5 stars,it worked! i have bouncy curls now!,i never thought i could just let my curls air ...,Carol's Daughter,Masks and Treatments,5.0
5,Carol’s Daughter Coco Creme Curl Quenching Dee...,1.0 out of 5 stars,zero stars - worst. smell. ever. and no way to...,i shampooed my hair six times with anothe prod...,Carol's Daughter,Masks and Treatments,1.0
...,...,...,...,...,...,...,...
56347,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,very nice relaxing product,arrived on time. very nice relaxing product.,SheaMoisture,Coconut and Hibiscus,5.0
56348,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,five stars,"wonderful product, love it!",SheaMoisture,Coconut and Hibiscus,5.0
56349,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,like sand & silk: perfect soak,i ordered this and another scent based on the ...,SheaMoisture,Coconut and Hibiscus,5.0
56350,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,great product!,i got this as a gift for my bachelorette party...,SheaMoisture,Coconut and Hibiscus,5.0


In [4]:
brands = list(df.Brand.unique())
brands

["Carol's Daughter",
 'SheaMoisture',
 "Not Your Mother's",
 'Moptop',
 "Uncle Funky's Daughter",
 'Giovanni',
 'Pacifica',
 'Cake Beauty',
 'TRESemme',
 'Curls']

In [5]:
def get_curl_pattern(c):
    curlpattern = ""
    my_split = re.split('[\W\:\(\)]+',c)
    curlpattern = " ".join(intersection(['1a','1b','1c','2a','2b','2c','3a','3b','3c','4a','4b','4c'], my_split))
    return(curlpattern)

def get_stats(c):
    stopwords = ['porosity','density','thickness']
    my_split = re.split('[\W\:\(\)]+',c)
    
    # get indices for all stopwords in comment
    indices = pd.DataFrame.from_dict([{'key':x, 'val':my_split.index(x)}  for x in stopwords if x in my_split])
    info_dict = {}
    
    if (len(indices) > 0):
        indices = indices.sort_values(by='val')
    
        # get substrings immediately before the descriptor
        prev_idx = -1
        for idx, row in indices.iterrows():
            info_dict[row['key']] = ' '.join([x for x in my_split[max(prev_idx+1, row['val']-4):row['val']] if x in ['low', 'med', 'medium', 'high', 'average']])
            prev_idx=row['val']
    # set non_descriptors to null string
    for desc in [x for x in stopwords if x not in info_dict]:
        info_dict[desc] = ""
        
    if info_dict['thickness'] == '':
        if 'fine' in c:
            info_dict['thickness'] = 'fine'
        elif 'coarse' in c or 'thick' in c:
            info_dict['thickness'] = 'coarse'
    return(info_dict)

def get_curl_pattern_multiple_columns(c):
    return (get_curl_pattern(' '.join([str(x) for x in c.values])))

def get_stats_multiple_columns(c):
    return (get_stats(' '.join([str(x) for x in c.values])))

def intersection(lst1, lst2):
    lst3 = [value for value in lst1 if value in lst2]
    return lst3

In [6]:
df['curlpattern'] = df[['Title', 'Review']].apply(get_curl_pattern_multiple_columns, axis=1)
hair_stats = pd.DataFrame.from_dict(df[['Title', 'Review']].apply(get_stats_multiple_columns, axis=1).values.tolist())
df = pd.concat([df.reset_index(drop=True),hair_stats.reset_index(drop=True)], axis=1)
df.head()

Unnamed: 0,Product,Rating,Title,Review,Brand,Line,RatingNumeric,curlpattern,porosity,density,thickness
0,Carol’s Daughter Coco Creme Curl Quenching Dee...,1.0 out of 5 stars,not for me,"i have thick, wavy, coarse, frizzy hair and th...",Carol's Daughter,Masks and Treatments,1.0,,,,coarse
1,Carol’s Daughter Coco Creme Curl Quenching Dee...,5.0 out of 5 stars,my new favorite!!,it's amazing!! it is so slippery that the tini...,Carol's Daughter,Masks and Treatments,5.0,,,,
2,Carol’s Daughter Coco Creme Curl Quenching Dee...,5.0 out of 5 stars,1 product serious curl definition,very great product for naturals the best i’ve ...,Carol's Daughter,Masks and Treatments,5.0,,,,
3,Carol’s Daughter Coco Creme Curl Quenching Dee...,5.0 out of 5 stars,it worked! i have bouncy curls now!,i never thought i could just let my curls air ...,Carol's Daughter,Masks and Treatments,5.0,,,,fine
4,Carol’s Daughter Coco Creme Curl Quenching Dee...,1.0 out of 5 stars,zero stars - worst. smell. ever. and no way to...,i shampooed my hair six times with anothe prod...,Carol's Daughter,Masks and Treatments,1.0,,,,


In [7]:
df.curlpattern.value_counts()

               47887
4c               843
3b               307
2b               270
3c               252
               ...  
2b 2c 3c           1
2c 3a 4c           1
1b 3b              1
3a 3b 4a 4c        1
2b 3a 3c           1
Name: curlpattern, Length: 69, dtype: int64

In [8]:
df.porosity = [x if x in ['low','high', ''] else 'medium' for x in df.porosity]
df.density = [x if x in ['low','high', ''] else 'medium' for x in df.density]
df.thickness = [x if x in ['low','fine', 'coarse', 'high', ''] else 'medium' for x in df.thickness]
df.thickness = ['fine' if x in ['low','fine'] else x for x in df.thickness]
df.thickness = ['coarse' if x in ['high','coarse'] else x for x in df.thickness]

In [101]:
df[df.Review.str.contains("protein sensitive")]

ValueError: Cannot mask with non-boolean array containing NA / NaN values

In [9]:
for keyword in ['frizz', 'dry', 'volume', 'protein sensitive|protein-sensitive']:
    df[keyword] = df.Title.str.contains(keyword) | df.Review.str.contains(keyword)

In [10]:
df

Unnamed: 0,Product,Rating,Title,Review,Brand,Line,RatingNumeric,curlpattern,porosity,density,thickness,frizz,dry,volume
0,Carol’s Daughter Coco Creme Curl Quenching Dee...,1.0 out of 5 stars,not for me,"i have thick, wavy, coarse, frizzy hair and th...",Carol's Daughter,Masks and Treatments,1.0,,,,coarse,True,True,False
1,Carol’s Daughter Coco Creme Curl Quenching Dee...,5.0 out of 5 stars,my new favorite!!,it's amazing!! it is so slippery that the tini...,Carol's Daughter,Masks and Treatments,5.0,,,,,True,False,False
2,Carol’s Daughter Coco Creme Curl Quenching Dee...,5.0 out of 5 stars,1 product serious curl definition,very great product for naturals the best i’ve ...,Carol's Daughter,Masks and Treatments,5.0,,,,,False,False,False
3,Carol’s Daughter Coco Creme Curl Quenching Dee...,5.0 out of 5 stars,it worked! i have bouncy curls now!,i never thought i could just let my curls air ...,Carol's Daughter,Masks and Treatments,5.0,,,,fine,False,True,False
4,Carol’s Daughter Coco Creme Curl Quenching Dee...,1.0 out of 5 stars,zero stars - worst. smell. ever. and no way to...,i shampooed my hair six times with anothe prod...,Carol's Daughter,Masks and Treatments,1.0,,,,,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51559,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,very nice relaxing product,arrived on time. very nice relaxing product.,SheaMoisture,Coconut and Hibiscus,5.0,,,,,False,False,False
51560,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,five stars,"wonderful product, love it!",SheaMoisture,Coconut and Hibiscus,5.0,,,,,False,False,False
51561,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,like sand & silk: perfect soak,i ordered this and another scent based on the ...,SheaMoisture,Coconut and Hibiscus,5.0,,,,,False,False,False
51562,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,great product!,i got this as a gift for my bachelorette party...,SheaMoisture,Coconut and Hibiscus,5.0,,,,,False,False,False


In [11]:
df['likes_product'] = (df.RatingNumeric == "4.0") | (df.RatingNumeric == "5.0")
df

Unnamed: 0,Product,Rating,Title,Review,Brand,Line,RatingNumeric,curlpattern,porosity,density,thickness,frizz,dry,volume,likes_product
0,Carol’s Daughter Coco Creme Curl Quenching Dee...,1.0 out of 5 stars,not for me,"i have thick, wavy, coarse, frizzy hair and th...",Carol's Daughter,Masks and Treatments,1.0,,,,coarse,True,True,False,False
1,Carol’s Daughter Coco Creme Curl Quenching Dee...,5.0 out of 5 stars,my new favorite!!,it's amazing!! it is so slippery that the tini...,Carol's Daughter,Masks and Treatments,5.0,,,,,True,False,False,True
2,Carol’s Daughter Coco Creme Curl Quenching Dee...,5.0 out of 5 stars,1 product serious curl definition,very great product for naturals the best i’ve ...,Carol's Daughter,Masks and Treatments,5.0,,,,,False,False,False,True
3,Carol’s Daughter Coco Creme Curl Quenching Dee...,5.0 out of 5 stars,it worked! i have bouncy curls now!,i never thought i could just let my curls air ...,Carol's Daughter,Masks and Treatments,5.0,,,,fine,False,True,False,True
4,Carol’s Daughter Coco Creme Curl Quenching Dee...,1.0 out of 5 stars,zero stars - worst. smell. ever. and no way to...,i shampooed my hair six times with anothe prod...,Carol's Daughter,Masks and Treatments,1.0,,,,,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51559,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,very nice relaxing product,arrived on time. very nice relaxing product.,SheaMoisture,Coconut and Hibiscus,5.0,,,,,False,False,False,True
51560,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,five stars,"wonderful product, love it!",SheaMoisture,Coconut and Hibiscus,5.0,,,,,False,False,False,True
51561,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,like sand & silk: perfect soak,i ordered this and another scent based on the ...,SheaMoisture,Coconut and Hibiscus,5.0,,,,,False,False,False,True
51562,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,great product!,i got this as a gift for my bachelorette party...,SheaMoisture,Coconut and Hibiscus,5.0,,,,,False,False,False,True


In [12]:
df.Product.value_counts()

﻿MopTop Gentle Shampoo, Natural Hair Moisturizer, Reduces Frizz, Color Safe Volumizing Shampoo - For All Hair Types, Straight, Curly, Wavy, Thin, Coily (8 oz)                                1738
MopTop Curly Hair Custard Gel for Fine, Thick, Wavy, Curly & Kinky-Coily Natural hair, Anti Frizz Curl Moisturizer, Definer & Lightweight Curl Activator w/ Aloe, great for Dry Hair, 8oz.    1738
Carol’s Daughter Coco Creme Curl Quenching Deep Moisture Hair Mask for Very Dry Hair, with Coconut Oil and Mango Butter, Hair Mask for Curly Hair, 12 oz                                      1512
Carol’s Daughter Coco Creme Curl Quenching Conditioner for Very Dry Hair, with Coconut Oil, Paraben Free Hair Conditioner for Curly Hair, 12 oz                                               1512
Carol’s Daughter Coco Creme Curl Shaping Cream Gel, with Coconut Oil, Coconut Milk, Silicone Free, Paraben Free Hair Gel for Curly Hair , Mineral Oil Free, for Very Dry Hair, 16 Oz          1512
                         

In [17]:
product_df = df[['Product', 'Brand', 'Line']].value_counts().to_frame().reset_index()
product_df.head()

Unnamed: 0,Product,Brand,Line,0
0,"MopTop Curly Hair Custard Gel for Fine, Thick,...",Moptop,Conditioner,869
1,"MopTop Cowash Cleansing Conditioner, Wavy, Cur...",Moptop,Conditioner,869
2,"﻿MopTop Gentle Shampoo, Natural Hair Moisturiz...",Moptop,Conditioner,869
3,"﻿MopTop Daily Conditioner, ﻿Natural Hair Moist...",Moptop,Conditioner,869
4,"﻿MopTop Curl Enhancer Gel, Reduces Frizz, Natu...",Moptop,Conditioner,869


In [33]:
def determine_product_type(s):
    s = s.lower()
    
    labels = []
    if 'shampoo' in s:
        labels.append('shampoo')
    if 'conditioner' in s:
        labels.append('conditioner')
    if 'leave-in' in s or 'leave in' in s:
        labels.append('leave-in')
    if 'gel' in s or 'custard' in s or 'jelly' in s:
        labels.append('gel/custard')
    if 'mousse' in s or 'foam' in s:
        labels.append('mousse/foam')
    if 'cream' in s and 'conditioner' not in s: #cream/creamy is often a descriptor for conditioner...
        labels.append('cream')
    if 'mask' in s or 'masque' in s:
        labels.append('mask')
        
    return ', '.join(labels)
    

In [34]:
product_df['ProductType'] = product_df['Product'].apply(determine_product_type)

In [41]:
product_df.columns = ['Comments' if x==0 else x for x in product_df.columns]
product_df.head()

Unnamed: 0,Product,Brand,Line,Comments,ProductType
0,"MopTop Curly Hair Custard Gel for Fine, Thick,...",Moptop,Conditioner,869,gel/custard
1,"MopTop Cowash Cleansing Conditioner, Wavy, Cur...",Moptop,Conditioner,869,conditioner
2,"﻿MopTop Gentle Shampoo, Natural Hair Moisturiz...",Moptop,Conditioner,869,shampoo
3,"﻿MopTop Daily Conditioner, ﻿Natural Hair Moist...",Moptop,Conditioner,869,conditioner
4,"﻿MopTop Curl Enhancer Gel, Reduces Frizz, Natu...",Moptop,Conditioner,869,gel/custard


In [61]:
# row = product_df.iloc[0]

def get_comment_counts(row):
    subset = df[(df.Product == row.Product) & (df.Brand == row.Brand) & (df.Line == df.Line)]
    
    lowpo_pos = len(subset[(subset.porosity=='low') & ((subset.RatingNumeric == '4.0') | (subset.RatingNumeric == '5.0'))])
    lowpo_neg = len(subset[(subset.porosity=='low') & ((subset.RatingNumeric == '1.0') | (subset.RatingNumeric == '2.0'))])
    lowpo_score = (lowpo_pos - lowpo_neg)/len(subset[(subset.porosity=='low')]) if len(subset[(subset.porosity=='low')]) else 0

    highpo_pos = len(subset[(subset.porosity=='high') & ((subset.RatingNumeric == '4.0') | (subset.RatingNumeric == '5.0'))])
    highpo_neg = len(subset[(subset.porosity=='high') & ((subset.RatingNumeric == '1.0') | (subset.RatingNumeric == '2.0'))])
    highpo_score = (highpo_pos - highpo_neg)/len(subset[(subset.porosity=='high')]) if len(subset[(subset.porosity=='high')]) else 0

    fine_pos = len(subset[(subset.thickness=='fine') & ((subset.RatingNumeric == '4.0') | (subset.RatingNumeric == '5.0'))])
    fine_neg = len(subset[(subset.thickness=='fine') & ((subset.RatingNumeric == '1.0') | (subset.RatingNumeric == '2.0'))])
    fine_score = (fine_pos - fine_neg)/len(subset[(subset.thickness=='fine')]) if len(subset[(subset.thickness=='fine')]) else 0

    coarse_pos = len(subset[(subset.thickness=='coarse') & ((subset.RatingNumeric == '4.0') | (subset.RatingNumeric == '5.0'))])
    coarse_neg = len(subset[(subset.thickness=='coarse') & ((subset.RatingNumeric == '1.0') | (subset.RatingNumeric == '2.0'))])
    coarse_score = (coarse_pos - coarse_neg)/len(subset[(subset.thickness=='coarse')]) if len(subset[(subset.thickness=='coarse')]) else 0

    frizz_pos = len(subset[(subset.frizz) & ((subset.RatingNumeric == '4.0') | (subset.RatingNumeric == '5.0'))])
    frizz_neg = len(subset[(subset.frizz) & ((subset.RatingNumeric == '1.0') | (subset.RatingNumeric == '2.0'))])
    frizz_score = (frizz_pos - frizz_neg)/len(subset[(subset.frizz)]) if len(subset[(subset.frizz)]) else 0

    dry_pos = len(subset[(subset.dry) & ((subset.RatingNumeric == '4.0') | (subset.RatingNumeric == '5.0'))])
    dry_neg = len(subset[(subset.dry) & ((subset.RatingNumeric == '1.0') | (subset.RatingNumeric == '2.0'))])
    dry_score = (dry_pos - dry_neg)/len(subset[(subset.dry)]) if len(subset[(subset.dry)]) else 0

    volume_pos = len(subset[(subset.volume) & ((subset.RatingNumeric == '4.0') | (subset.RatingNumeric == '5.0'))])
    volume_neg = len(subset[(subset.volume) & ((subset.RatingNumeric == '1.0') | (subset.RatingNumeric == '2.0'))])
    volume_score = (volume_pos - volume_neg)/len(subset[(subset.volume)]) if len(subset[(subset.volume)]) else 0
    
    return([lowpo_score, highpo_score, fine_score, coarse_score, frizz_score, dry_score, volume_score])



In [74]:
score_df = pd.DataFrame(list(product_df.apply(get_comment_counts, axis=1)), columns=['LowPorosity', 'HighPorosity', 'Fine', 'Coarse', 'Frizz', 'Dry', 'Volume'])
score_df

Unnamed: 0,LowPorosity,HighPorosity,Fine,Coarse,Frizz,Dry,Volume
0,0.75,0.6,0.628378,0.5,0.519084,0.527273,0.875
1,0.75,0.6,0.628378,0.5,0.519084,0.527273,0.875
2,0.75,0.6,0.628378,0.5,0.519084,0.527273,0.875
3,0.75,0.6,0.628378,0.5,0.519084,0.527273,0.875
4,0.75,0.6,0.628378,0.5,0.519084,0.527273,0.875
...,...,...,...,...,...,...,...
238,0.00,0.0,0.000000,0.0,0.000000,1.000000,0.000
239,0.00,0.0,0.000000,0.0,0.000000,0.000000,0.000
240,0.00,0.0,-1.000000,0.0,0.000000,-1.000000,0.000
241,0.00,0.0,0.000000,0.0,0.000000,0.000000,0.000


In [75]:
product_df = pd.concat([product_df.reset_index(drop=True),score_df.reset_index(drop=True)], axis=1)
product_df.head()

Unnamed: 0,Product,Brand,Line,Comments,ProductType,LowPorosity,HighPorosity,Fine,Coarse,Frizz,Dry,Volume
0,"MopTop Curly Hair Custard Gel for Fine, Thick,...",Moptop,Conditioner,869,gel/custard,0.75,0.6,0.628378,0.5,0.519084,0.527273,0.875
1,"MopTop Cowash Cleansing Conditioner, Wavy, Cur...",Moptop,Conditioner,869,conditioner,0.75,0.6,0.628378,0.5,0.519084,0.527273,0.875
2,"﻿MopTop Gentle Shampoo, Natural Hair Moisturiz...",Moptop,Conditioner,869,shampoo,0.75,0.6,0.628378,0.5,0.519084,0.527273,0.875
3,"﻿MopTop Daily Conditioner, ﻿Natural Hair Moist...",Moptop,Conditioner,869,conditioner,0.75,0.6,0.628378,0.5,0.519084,0.527273,0.875
4,"﻿MopTop Curl Enhancer Gel, Reduces Frizz, Natu...",Moptop,Conditioner,869,gel/custard,0.75,0.6,0.628378,0.5,0.519084,0.527273,0.875


In [76]:
product_df['Frizz']

0      0.519084
1      0.519084
2      0.519084
3      0.519084
4      0.519084
         ...   
238    0.000000
239    0.000000
240    0.000000
241    0.000000
242    1.000000
Name: Frizz, Length: 243, dtype: float64

In [83]:
def rank_products(product_types, scores):
    tmp_df = product_df[product_df.ProductType.str.contains("|".join(product_types))]
    tmp_df['sum'] = tmp_df[scores].sum(axis=1)
    return(tmp_df.sort_values(by='sum', ascending=False))
                                                                                      

In [89]:
rank_products(['conditioner'], ['Fine', 'LowPorosity'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tmp_df['sum'] = tmp_df[scores].sum(axis=1)


Unnamed: 0,Product,Brand,Line,Comments,ProductType,LowPorosity,HighPorosity,Fine,Coarse,Frizz,Dry,Volume,sum
105,Carol's Daughter Detangling Jelly-to-Cream Con...,Carol's Daughter,Shampoos and Conditioners,132,"conditioner, gel/custard",1.0,0.0,1.000000,0.800,0.600000,0.588235,0.000000,2.000000
222,TRESemmé Pro Pure Conditioner Silicone-Free fo...,TRESemme,Curly & Wavy Hair,7,conditioner,1.0,0.0,1.000000,0.000,0.000000,0.000000,0.000000,2.000000
203,TRESemmé Conditioner Sulfate-Free for Curly Ha...,TRESemme,Curly & Wavy Hair,17,conditioner,1.0,0.0,1.000000,0.000,1.000000,0.200000,0.000000,2.000000
184,GIOVANNI Eco Chic Smooth As Silk Deeper Moistu...,Giovanni,Curly Girl Products,32,conditioner,1.0,0.0,1.000000,1.000,0.000000,1.000000,0.000000,2.000000
148,Pacifica Beauty Salty Waves Texturizing and Mo...,Pacifica,Hair,64,conditioner,1.0,0.0,1.000000,1.000,0.666667,0.428571,0.888889,2.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,"CAKE Beauty The Locks Smith Dry Styling, Hydra...",Cake Beauty,Hair,155,conditioner,0.0,0.0,-0.166667,1.000,0.600000,0.063830,1.000000,-0.166667
82,SheaMoisture 100% Virgin Coconut Oil Daily Hyd...,SheaMoisture,100% Virgin Coconut Oil,205,"shampoo, conditioner",-1.0,0.0,0.466667,-0.375,0.300000,-0.100000,1.000000,-0.533333
109,SheaMoisture Hydrate & Repair conditioner For ...,SheaMoisture,Manuka Honey and Yogurt,122,"shampoo, conditioner",0.0,0.0,-0.666667,-0.125,-0.333333,0.050000,0.000000,-0.666667
194,"Curls Blueberry Bliss Reparative Hair Wash, 8 ...",Curls,Blueberry Bliss,25,"conditioner, leave-in",0.0,1.0,-1.000000,0.000,0.000000,0.000000,0.000000,-1.000000


In [102]:
product_df.to_csv("../output/products_with_scores.csv", index=False)