# Prediction

In [10]:
import pandas as pd
import os
import re
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = [12, 6]

In [11]:
path = r'../output/amazon/' # use your path
all_files = [x for x in os.listdir(path) if x.endswith(".csv")]
li = []

for filename in all_files:
    df = pd.read_csv(path+filename, index_col=None, header=0)
    df["Brand"] = filename.split("_")[0]
    df["Line"] = filename.split("_")[1].split(".")[0]
    li.append(df)

df = pd.concat(li, axis=0, ignore_index=True)
df = df.drop(df.columns[0], axis=1)
df = df[df.Rating.str.contains("stars")]
df["RatingNumeric"]  = df.Rating.str.split(expand=True)[0]
df["Title"] = df["Title"].str.lower()
df["Review"] = df["Review"].str.lower()

In [12]:
df

Unnamed: 0,Product,Rating,Title,Review,Brand,Line,RatingNumeric
0,Maui Moisture Frizz-Free + Shea Butter Leave-i...,2.0 out of 5 stars,ughhh,i had high hopes for this after seeing reviews...,Maui Moisture,All Products,2.0
1,Maui Moisture Frizz-Free + Shea Butter Leave-i...,3.0 out of 5 stars,just okay,i've been making a point of buying only parabe...,Maui Moisture,All Products,3.0
2,Maui Moisture Frizz-Free + Shea Butter Leave-i...,5.0 out of 5 stars,literally my favorite brand,i cannot emphasize enough how amazing this bra...,Maui Moisture,All Products,5.0
4,Maui Moisture Frizz-Free + Shea Butter Leave-i...,5.0 out of 5 stars,holy grail - low porosity hair life saver!!!!,i've tried so many products from olaplex to ev...,Maui Moisture,All Products,5.0
5,Maui Moisture Frizz-Free + Shea Butter Leave-i...,1.0 out of 5 stars,awful and doesn’t condition the hair at all,this is the worst waste of money. it is worse ...,Maui Moisture,All Products,1.0
...,...,...,...,...,...,...,...
60817,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,very nice relaxing product,arrived on time. very nice relaxing product.,SheaMoisture,Coconut and Hibiscus,5.0
60818,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,five stars,"wonderful product, love it!",SheaMoisture,Coconut and Hibiscus,5.0
60819,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,like sand & silk: perfect soak,i ordered this and another scent based on the ...,SheaMoisture,Coconut and Hibiscus,5.0
60820,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,great product!,i got this as a gift for my bachelorette party...,SheaMoisture,Coconut and Hibiscus,5.0


In [13]:
brands = list(df.Brand.unique())
brands

['Maui Moisture',
 "Carol's Daughter",
 "Not Your Mother's",
 "Uncle Funky's Daughter",
 'SheaMoisture',
 'Moptop',
 'Giovanni',
 'Pacifica',
 'Cake Beauty',
 'TRESemme',
 'Curls']

In [14]:
def get_curl_pattern(c):
    curlpattern = ""
    my_split = re.split('[\W\:\(\)]+',c)
    curlpattern = " ".join(intersection(['1a','1b','1c','2a','2b','2c','3a','3b','3c','4a','4b','4c'], my_split))
    return(curlpattern)

def get_stats(c):
    stopwords = ['porosity','density','thickness']
    my_split = re.split('[\W\:\(\)]+',c)
    
    # get indices for all stopwords in comment
    indices = pd.DataFrame.from_dict([{'key':x, 'val':my_split.index(x)}  for x in stopwords if x in my_split])
    info_dict = {}
    
    if (len(indices) > 0):
        indices = indices.sort_values(by='val')
    
        # get substrings immediately before the descriptor
        prev_idx = -1
        for idx, row in indices.iterrows():
            info_dict[row['key']] = ' '.join([x for x in my_split[max(prev_idx+1, row['val']-4):row['val']] if x in ['low', 'med', 'medium', 'high', 'average']])
            prev_idx=row['val']
    # set non_descriptors to null string
    for desc in [x for x in stopwords if x not in info_dict]:
        info_dict[desc] = ""
        
    if info_dict['thickness'] == '':
        if 'fine' in c:
            info_dict['thickness'] = 'fine'
        elif 'coarse' in c or 'thick' in c:
            info_dict['thickness'] = 'coarse'
    return(info_dict)

def get_curl_pattern_multiple_columns(c):
    return (get_curl_pattern(' '.join([str(x) for x in c.values])))

def get_stats_multiple_columns(c):
    return (get_stats(' '.join([str(x) for x in c.values])))

def intersection(lst1, lst2):
    lst3 = [value for value in lst1 if value in lst2]
    return lst3

In [15]:
df['curlpattern'] = df[['Title', 'Review']].apply(get_curl_pattern_multiple_columns, axis=1)
hair_stats = pd.DataFrame.from_dict(df[['Title', 'Review']].apply(get_stats_multiple_columns, axis=1).values.tolist())
df = pd.concat([df.reset_index(drop=True),hair_stats.reset_index(drop=True)], axis=1)
df.head()

Unnamed: 0,Product,Rating,Title,Review,Brand,Line,RatingNumeric,curlpattern,porosity,density,thickness
0,Maui Moisture Frizz-Free + Shea Butter Leave-i...,2.0 out of 5 stars,ughhh,i had high hopes for this after seeing reviews...,Maui Moisture,All Products,2.0,2a 4b 4c,,,
1,Maui Moisture Frizz-Free + Shea Butter Leave-i...,3.0 out of 5 stars,just okay,i've been making a point of buying only parabe...,Maui Moisture,All Products,3.0,,,,
2,Maui Moisture Frizz-Free + Shea Butter Leave-i...,5.0 out of 5 stars,literally my favorite brand,i cannot emphasize enough how amazing this bra...,Maui Moisture,All Products,5.0,,,,
3,Maui Moisture Frizz-Free + Shea Butter Leave-i...,5.0 out of 5 stars,holy grail - low porosity hair life saver!!!!,i've tried so many products from olaplex to ev...,Maui Moisture,All Products,5.0,,low,,
4,Maui Moisture Frizz-Free + Shea Butter Leave-i...,1.0 out of 5 stars,awful and doesn’t condition the hair at all,this is the worst waste of money. it is worse ...,Maui Moisture,All Products,1.0,,,,


In [16]:
df.curlpattern.value_counts()

               51815
4c               924
3b               316
2b               279
3c               274
               ...  
1b 3b              1
2a 2b 2c 3a        1
2b 2c 3c           1
1a 1b 2a 2c        1
2c 4a              1
Name: curlpattern, Length: 71, dtype: int64

In [17]:
df.porosity = [x if x in ['low','high', ''] else 'medium' for x in df.porosity]
df.density = [x if x in ['low','high', ''] else 'medium' for x in df.density]
df.thickness = [x if x in ['low','fine', 'coarse', 'high', ''] else 'medium' for x in df.thickness]
df.thickness = ['fine' if x in ['low','fine'] else x for x in df.thickness]
df.thickness = ['coarse' if x in ['high','coarse'] else x for x in df.thickness]

In [19]:
for keyword in ['frizz', 'dry', 'volume', 'protein sensitive|protein-sensitive']:
    df[keyword] = df.Title.str.contains(keyword) | df.Review.str.contains(keyword)

In [20]:
df

Unnamed: 0,Product,Rating,Title,Review,Brand,Line,RatingNumeric,curlpattern,porosity,density,thickness,frizz,dry,volume,protein sensitive|protein-sensitive
0,Maui Moisture Frizz-Free + Shea Butter Leave-i...,2.0 out of 5 stars,ughhh,i had high hopes for this after seeing reviews...,Maui Moisture,All Products,2.0,2a 4b 4c,,,,False,False,False,False
1,Maui Moisture Frizz-Free + Shea Butter Leave-i...,3.0 out of 5 stars,just okay,i've been making a point of buying only parabe...,Maui Moisture,All Products,3.0,,,,,False,True,False,False
2,Maui Moisture Frizz-Free + Shea Butter Leave-i...,5.0 out of 5 stars,literally my favorite brand,i cannot emphasize enough how amazing this bra...,Maui Moisture,All Products,5.0,,,,,False,True,False,False
3,Maui Moisture Frizz-Free + Shea Butter Leave-i...,5.0 out of 5 stars,holy grail - low porosity hair life saver!!!!,i've tried so many products from olaplex to ev...,Maui Moisture,All Products,5.0,,low,,,True,True,False,False
4,Maui Moisture Frizz-Free + Shea Butter Leave-i...,1.0 out of 5 stars,awful and doesn’t condition the hair at all,this is the worst waste of money. it is worse ...,Maui Moisture,All Products,1.0,,,,,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55750,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,very nice relaxing product,arrived on time. very nice relaxing product.,SheaMoisture,Coconut and Hibiscus,5.0,,,,,False,False,False,False
55751,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,five stars,"wonderful product, love it!",SheaMoisture,Coconut and Hibiscus,5.0,,,,,False,False,False,False
55752,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,like sand & silk: perfect soak,i ordered this and another scent based on the ...,SheaMoisture,Coconut and Hibiscus,5.0,,,,,False,False,False,False
55753,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,great product!,i got this as a gift for my bachelorette party...,SheaMoisture,Coconut and Hibiscus,5.0,,,,,False,False,False,False


In [21]:
df['likes_product'] = (df.RatingNumeric == "4.0") | (df.RatingNumeric == "5.0")
df

Unnamed: 0,Product,Rating,Title,Review,Brand,Line,RatingNumeric,curlpattern,porosity,density,thickness,frizz,dry,volume,protein sensitive|protein-sensitive,likes_product
0,Maui Moisture Frizz-Free + Shea Butter Leave-i...,2.0 out of 5 stars,ughhh,i had high hopes for this after seeing reviews...,Maui Moisture,All Products,2.0,2a 4b 4c,,,,False,False,False,False,False
1,Maui Moisture Frizz-Free + Shea Butter Leave-i...,3.0 out of 5 stars,just okay,i've been making a point of buying only parabe...,Maui Moisture,All Products,3.0,,,,,False,True,False,False,False
2,Maui Moisture Frizz-Free + Shea Butter Leave-i...,5.0 out of 5 stars,literally my favorite brand,i cannot emphasize enough how amazing this bra...,Maui Moisture,All Products,5.0,,,,,False,True,False,False,True
3,Maui Moisture Frizz-Free + Shea Butter Leave-i...,5.0 out of 5 stars,holy grail - low porosity hair life saver!!!!,i've tried so many products from olaplex to ev...,Maui Moisture,All Products,5.0,,low,,,True,True,False,False,True
4,Maui Moisture Frizz-Free + Shea Butter Leave-i...,1.0 out of 5 stars,awful and doesn’t condition the hair at all,this is the worst waste of money. it is worse ...,Maui Moisture,All Products,1.0,,,,,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55750,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,very nice relaxing product,arrived on time. very nice relaxing product.,SheaMoisture,Coconut and Hibiscus,5.0,,,,,False,False,False,False,True
55751,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,five stars,"wonderful product, love it!",SheaMoisture,Coconut and Hibiscus,5.0,,,,,False,False,False,False,True
55752,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,like sand & silk: perfect soak,i ordered this and another scent based on the ...,SheaMoisture,Coconut and Hibiscus,5.0,,,,,False,False,False,False,True
55753,Shea Moisture Coconut & Hibiscus Dead Sea Salt...,5.0 out of 5 stars,great product!,i got this as a gift for my bachelorette party...,SheaMoisture,Coconut and Hibiscus,5.0,,,,,False,False,False,False,True


In [22]:
df.Product.value_counts()

﻿MopTop Gentle Shampoo, Natural Hair Moisturizer, Reduces Frizz, Color Safe Volumizing Shampoo - For All Hair Types, Straight, Curly, Wavy, Thin, Coily (8 oz)                                1738
MopTop Curly Hair Custard Gel for Fine, Thick, Wavy, Curly & Kinky-Coily Natural hair, Anti Frizz Curl Moisturizer, Definer & Lightweight Curl Activator w/ Aloe, great for Dry Hair, 8oz.    1738
Carol’s Daughter Coco Creme Coil Enhancing Moisture Butter for Very Dry Hair, with Coconut Oil and Mango Butter, Paraben Free and Silicone Free Butter for Curly Hair, 12 oz                  1512
Carol’s Daughter Coco Creme Curl Quenching Deep Moisture Hair Mask for Very Dry Hair, with Coconut Oil and Mango Butter, Hair Mask for Curly Hair, 12 oz                                      1512
Carol’s Daughter Coco Creme Curl Quenching Conditioner for Very Dry Hair, with Coconut Oil, Paraben Free Hair Conditioner for Curly Hair, 12 oz                                               1512
                         

In [23]:
product_df = df[['Product', 'Brand', 'Line']].value_counts().to_frame().reset_index()
product_df.head()

Unnamed: 0,Product,Brand,Line,0
0,"﻿MopTop Gentle Shampoo, Natural Hair Moisturiz...",Moptop,Shampoo,869
1,"MopTop Curly Hair Custard Gel for Fine, Thick,...",Moptop,Gel,869
2,"MopTop Light Conditioner, Wavy, Loose Curls, T...",Moptop,Conditioner,869
3,"﻿MopTop Gentle Shampoo, Natural Hair Moisturiz...",Moptop,Conditioner,869
4,"MopTop Leave-in Conditioner for Fine, Thick, W...",Moptop,Conditioner,869


In [24]:
def determine_product_type(s):
    s = s.lower()
    
    labels = []
    if 'shampoo' in s:
        labels.append('shampoo')
    if 'conditioner' in s:
        labels.append('conditioner')
    if 'leave-in' in s or 'leave in' in s:
        labels.append('leave-in')
    if 'gel' in s or 'custard' in s or 'jelly' in s:
        labels.append('gel/custard')
    if 'mousse' in s or 'foam' in s:
        labels.append('mousse/foam')
    if 'cream' in s and 'conditioner' not in s: #cream/creamy is often a descriptor for conditioner...
        labels.append('cream')
    if 'mask' in s or 'masque' in s:
        labels.append('mask')
        
    return ', '.join(labels)
    

In [25]:
product_df['ProductType'] = product_df['Product'].apply(determine_product_type)

In [26]:
product_df.columns = ['Comments' if x==0 else x for x in product_df.columns]
product_df.head()

Unnamed: 0,Product,Brand,Line,Comments,ProductType
0,"﻿MopTop Gentle Shampoo, Natural Hair Moisturiz...",Moptop,Shampoo,869,shampoo
1,"MopTop Curly Hair Custard Gel for Fine, Thick,...",Moptop,Gel,869,gel/custard
2,"MopTop Light Conditioner, Wavy, Loose Curls, T...",Moptop,Conditioner,869,conditioner
3,"﻿MopTop Gentle Shampoo, Natural Hair Moisturiz...",Moptop,Conditioner,869,shampoo
4,"MopTop Leave-in Conditioner for Fine, Thick, W...",Moptop,Conditioner,869,"conditioner, leave-in"


In [27]:
# row = product_df.iloc[0]

def get_comment_counts(row):
    subset = df[(df.Product == row.Product) & (df.Brand == row.Brand) & (df.Line == df.Line)]
    
    lowpo_pos = len(subset[(subset.porosity=='low') & ((subset.RatingNumeric == '4.0') | (subset.RatingNumeric == '5.0'))])
    lowpo_neg = len(subset[(subset.porosity=='low') & ((subset.RatingNumeric == '1.0') | (subset.RatingNumeric == '2.0'))])
    lowpo_score = (lowpo_pos - lowpo_neg)/len(subset[(subset.porosity=='low')]) if len(subset[(subset.porosity=='low')]) else 0

    highpo_pos = len(subset[(subset.porosity=='high') & ((subset.RatingNumeric == '4.0') | (subset.RatingNumeric == '5.0'))])
    highpo_neg = len(subset[(subset.porosity=='high') & ((subset.RatingNumeric == '1.0') | (subset.RatingNumeric == '2.0'))])
    highpo_score = (highpo_pos - highpo_neg)/len(subset[(subset.porosity=='high')]) if len(subset[(subset.porosity=='high')]) else 0

    fine_pos = len(subset[(subset.thickness=='fine') & ((subset.RatingNumeric == '4.0') | (subset.RatingNumeric == '5.0'))])
    fine_neg = len(subset[(subset.thickness=='fine') & ((subset.RatingNumeric == '1.0') | (subset.RatingNumeric == '2.0'))])
    fine_score = (fine_pos - fine_neg)/len(subset[(subset.thickness=='fine')]) if len(subset[(subset.thickness=='fine')]) else 0

    coarse_pos = len(subset[(subset.thickness=='coarse') & ((subset.RatingNumeric == '4.0') | (subset.RatingNumeric == '5.0'))])
    coarse_neg = len(subset[(subset.thickness=='coarse') & ((subset.RatingNumeric == '1.0') | (subset.RatingNumeric == '2.0'))])
    coarse_score = (coarse_pos - coarse_neg)/len(subset[(subset.thickness=='coarse')]) if len(subset[(subset.thickness=='coarse')]) else 0

    frizz_pos = len(subset[(subset.frizz) & ((subset.RatingNumeric == '4.0') | (subset.RatingNumeric == '5.0'))])
    frizz_neg = len(subset[(subset.frizz) & ((subset.RatingNumeric == '1.0') | (subset.RatingNumeric == '2.0'))])
    frizz_score = (frizz_pos - frizz_neg)/len(subset[(subset.frizz)]) if len(subset[(subset.frizz)]) else 0

    dry_pos = len(subset[(subset.dry) & ((subset.RatingNumeric == '4.0') | (subset.RatingNumeric == '5.0'))])
    dry_neg = len(subset[(subset.dry) & ((subset.RatingNumeric == '1.0') | (subset.RatingNumeric == '2.0'))])
    dry_score = (dry_pos - dry_neg)/len(subset[(subset.dry)]) if len(subset[(subset.dry)]) else 0

    volume_pos = len(subset[(subset.volume) & ((subset.RatingNumeric == '4.0') | (subset.RatingNumeric == '5.0'))])
    volume_neg = len(subset[(subset.volume) & ((subset.RatingNumeric == '1.0') | (subset.RatingNumeric == '2.0'))])
    volume_score = (volume_pos - volume_neg)/len(subset[(subset.volume)]) if len(subset[(subset.volume)]) else 0
    
    return([lowpo_score, highpo_score, fine_score, coarse_score, frizz_score, dry_score, volume_score])



In [28]:
score_df = pd.DataFrame(list(product_df.apply(get_comment_counts, axis=1)), columns=['LowPorosity', 'HighPorosity', 'Fine', 'Coarse', 'Frizz', 'Dry', 'Volume'])
score_df

Unnamed: 0,LowPorosity,HighPorosity,Fine,Coarse,Frizz,Dry,Volume
0,0.75,0.6,0.628378,0.5,0.519084,0.527273,0.875
1,0.75,0.6,0.628378,0.5,0.519084,0.527273,0.875
2,0.75,0.6,0.628378,0.5,0.519084,0.527273,0.875
3,0.75,0.6,0.628378,0.5,0.519084,0.527273,0.875
4,0.75,0.6,0.628378,0.5,0.519084,0.527273,0.875
...,...,...,...,...,...,...,...
269,0.00,0.0,0.000000,0.0,1.000000,1.000000,0.000
270,0.00,0.0,0.000000,0.0,0.000000,1.000000,0.000
271,0.00,0.0,0.000000,0.0,0.000000,0.000000,0.000
272,0.00,0.0,0.000000,0.0,0.000000,0.000000,0.000


In [29]:
product_df = pd.concat([product_df.reset_index(drop=True),score_df.reset_index(drop=True)], axis=1)
product_df.head()

Unnamed: 0,Product,Brand,Line,Comments,ProductType,LowPorosity,HighPorosity,Fine,Coarse,Frizz,Dry,Volume
0,"﻿MopTop Gentle Shampoo, Natural Hair Moisturiz...",Moptop,Shampoo,869,shampoo,0.75,0.6,0.628378,0.5,0.519084,0.527273,0.875
1,"MopTop Curly Hair Custard Gel for Fine, Thick,...",Moptop,Gel,869,gel/custard,0.75,0.6,0.628378,0.5,0.519084,0.527273,0.875
2,"MopTop Light Conditioner, Wavy, Loose Curls, T...",Moptop,Conditioner,869,conditioner,0.75,0.6,0.628378,0.5,0.519084,0.527273,0.875
3,"﻿MopTop Gentle Shampoo, Natural Hair Moisturiz...",Moptop,Conditioner,869,shampoo,0.75,0.6,0.628378,0.5,0.519084,0.527273,0.875
4,"MopTop Leave-in Conditioner for Fine, Thick, W...",Moptop,Conditioner,869,"conditioner, leave-in",0.75,0.6,0.628378,0.5,0.519084,0.527273,0.875


In [30]:
product_df['Frizz']

0      0.519084
1      0.519084
2      0.519084
3      0.519084
4      0.519084
         ...   
269    1.000000
270    0.000000
271    0.000000
272    0.000000
273    1.000000
Name: Frizz, Length: 274, dtype: float64

In [31]:
def rank_products(product_types, scores):
    tmp_df = product_df[product_df.ProductType.str.contains("|".join(product_types))]
    tmp_df['sum'] = tmp_df[scores].sum(axis=1)
    return(tmp_df.sort_values(by='sum', ascending=False))
                                                                                      

In [32]:
rank_products(['conditioner'], ['Fine', 'LowPorosity'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tmp_df['sum'] = tmp_df[scores].sum(axis=1)


Unnamed: 0,Product,Brand,Line,Comments,ProductType,LowPorosity,HighPorosity,Fine,Coarse,Frizz,Dry,Volume,sum
166,Pacifica Beauty Salty Waves Texturizing and Mo...,Pacifica,Hair,64,conditioner,1.0,0.0,1.000000,1.000,0.666667,0.428571,0.888889,2.000000
151,Carol’s Daughter Goddess Strength Fortifying P...,Carol's Daughter,Shampoos and Conditioners,82,conditioner,1.0,0.0,1.000000,1.000,0.600000,0.888889,0.000000,2.000000
256,TRESemmé Pro Pure Conditioner Silicone-Free fo...,TRESemme,Curly & Wavy Hair,7,conditioner,1.0,0.0,1.000000,0.000,0.000000,0.000000,0.000000,2.000000
232,TRESemmé Conditioner Sulfate-Free for Curly Ha...,TRESemme,Curly & Wavy Hair,17,conditioner,1.0,0.0,1.000000,0.000,1.000000,0.200000,0.000000,2.000000
119,Carol's Daughter Detangling Jelly-to-Cream Con...,Carol's Daughter,Shampoos and Conditioners,132,"conditioner, gel/custard",1.0,0.0,1.000000,0.800,0.600000,0.588235,0.000000,2.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,SheaMoisture Hydrate & Repair conditioner For ...,SheaMoisture,Manuka Honey and Yogurt,122,"shampoo, conditioner",0.0,0.0,-0.666667,-0.125,-0.333333,0.050000,0.000000,-0.666667
218,"Curls Blueberry Bliss Reparative Hair Wash, 8 ...",Curls,Blueberry Bliss,25,"conditioner, leave-in",0.0,1.0,-1.000000,0.000,0.000000,0.000000,0.000000,-1.000000
217,"Richee Rich Moisturizing Conditioner, 8 oz",Uncle Funky's Daughter,Conditioners,27,conditioner,-1.0,0.0,0.000000,0.000,0.000000,1.000000,0.000000,-1.000000
244,Maui Moisture Strength & Length + Castor & Nee...,Maui Moisture,All Products,12,conditioner,0.0,0.0,-1.000000,0.000,0.000000,0.333333,0.000000,-1.000000


In [34]:
path = r'../output/amazon_links/' # use your path
all_files = [x for x in os.listdir(path) if x.endswith(".csv")]
li = []

for filename in all_files:
    df = pd.read_csv(path+filename, index_col=None, header=0)
    df["Brand"] = filename.split("_")[0]
    df["Line"] = filename.split("_")[1].split(".")[0]
    li.append(df)

links_df = pd.concat(li, axis=0, ignore_index=True)


In [35]:
links_df

Unnamed: 0.1,Unnamed: 0,Product,URL,Image,Brand,Line
0,0,Maui Moisture Frizz-Free + Shea Butter Leave-i...,https://www.amazon.com/Frizz-Free-Conditioning...,https://m.media-amazon.com/images/I/81CecNEsqn...,Maui Moisture,All Products
1,1,Maui Moisture Moisture Detoxifying + Volcanic ...,https://www.amazon.com/Maui-Moisture-Volcanic-...,https://m.media-amazon.com/images/I/81wdvYaztx...,Maui Moisture,All Products
2,2,Maui Moisture Detoxifying Volcanic Ash Scalp C...,https://www.amazon.com/Maui-Moisture-Volcanic-...,https://m.media-amazon.com/images/I/81jYCQp-d3...,Maui Moisture,All Products
3,3,"Maui Moisture Detoxifying + Shampoo, Volcanic ...",https://www.amazon.com/Maui-Moisture-Volcanic-...,https://m.media-amazon.com/images/I/81jlistGLh...,Maui Moisture,All Products
4,4,Maui Moisture Curl Quench + Coconut Oil Ultra-...,https://www.amazon.com/Coconut-Ultra-Hold-Styl...,https://m.media-amazon.com/images/I/81bVSZxm3c...,Maui Moisture,All Products
...,...,...,...,...,...,...
333,19,"Sheamoisture Bar Soap for Dull, Dry Skin Cocon...",https://www.amazon.com/SheaMoisture-Soap-Cocon...,https://images-na.ssl-images-amazon.com/images...,SheaMoisture,Coconut and Hibiscus
334,20,SheaMoisture Body Scrub For Dull Skin Illumina...,https://www.amazon.com/SheaMoisture-Scrub-Exfo...,https://images-na.ssl-images-amazon.com/images...,SheaMoisture,Coconut and Hibiscus
335,21,SheaMoisture In-Shower Soap and Body Wash Cond...,https://www.amazon.com/Coconut-Hibiscus-In-Sho...,https://images-na.ssl-images-amazon.com/images...,SheaMoisture,Coconut and Hibiscus
336,22,"Shea Moisture Skin Care, Coconut & Hibiscus Il...",https://www.amazon.com/SheaMoisture-Hibiscus-I...,https://images-na.ssl-images-amazon.com/images...,SheaMoisture,Coconut and Hibiscus


In [33]:
product_df.to_csv("../output/products_with_scores.csv", index=False)