In [11]:
import pandas as pd
import re
import collections
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support
from nltk.corpus import stopwords
import numpy as np

In [2]:
data_df = pd.read_csv("data/training/data_train.csv", dtype='unicode')
data_df.fillna(value='', inplace=True)
data_df = data_df.apply(lambda x: x.astype(str).str.lower())

In [3]:
data_df[['clarity','conciseness']] = data_df[['clarity','conciseness']].apply(pd.to_numeric)

In [4]:
def cleanhtml(raw_html):
    result = re.sub(re.compile('<.*?>'), ' ', raw_html)
    return result
data_df['short_description_clear_html'] = data_df.apply(lambda row: cleanhtml(row['short_description']), axis=1)

In [5]:
train_df, test_df = train_test_split(data_df, test_size = 0.2)

In [6]:
full_categories_set = set()

In [7]:
for index, row in train_df.iterrows():
    full_category = row['category_lvl_1'] + "_" + row['category_lvl_2'] \
        + "_" + row['category_lvl_3']
    full_categories_set.add(full_category)

In [8]:
def create_bag_of_word(texts):
    return [ collections.Counter(re.findall(r'\w+', txt)) for txt in texts]

In [9]:
categories_word_most_frequence_dict = {}
categories_word_un_clarity_most_frequency_dict = {}
# categories_description_most_frequence_dict = {}

stopwords_set = set(stopwords.words('english'))

for full_category in full_categories_set:        

    categories = full_category.split(sep="_", maxsplit = 3)
    category_level_1 = categories[0]
    category_level_2 = categories[1]
    category_level_3 = categories[2]
    
    product_by_category = train_df[(train_df.category_lvl_1 == category_level_1) &
                                   (train_df.category_lvl_2 == category_level_2) &
                                   (train_df.category_lvl_3 == category_level_3) &
                                   (train_df.clarity == 1)]
    bag_of_words_title = create_bag_of_word([product_by_category.title.str.cat(sep=' ')])
  
    most_common_words = []
    for word, count in bag_of_words_title[0].most_common():           
        if (count > 10) & (not word.isnumeric()) & (word not in stopwords_set):
            if category_level_1 == "fashion":
                if word not in ['women', 'womens', 'girl', 'girls', 'woman'] and (count > 20):
                    most_common_words.append(word)
            else:
                most_common_words.append(word)
    
    categories_word_most_frequence_dict[full_category] = most_common_words
    
    
    bag_of_words_description = create_bag_of_word([product_by_category.short_description_clear_html.str.cat(sep=' ')])
  
    most_common_words_descrption = []
    for word, count in bag_of_words_description[0].most_common():           
        if (count > 10) & (not word.isnumeric()) & (word not in stopwords_set):
            most_common_words_descrption.append(word)
    categories_description_most_frequence_dict[full_category] = most_common_words_descrption
    

# Test model

In [12]:
clarities_pred = np.zeros(len (test_df))
categories_word_wrong_case_dict = {}

i = 0
for index, row in test_df.iterrows():
    
    full_category = row['category_lvl_1'] + "_" + row['category_lvl_2'] \
        + "_" + row['category_lvl_3']
    most_common_words_title = categories_word_most_frequence_dict.get(full_category)
    most_common_words_descrption = categories_description_most_frequence_dict.get(full_category)

    clarity_pred = 0
    for word in row['title'].split():
        if word in most_common_words_title:
            clarity_pred = 1
            break
            
#     if row['category_lvl_1'] == "fashion" and clarity_pred == 1:
#         for word in row['short_description'].split():
#             if word in most_common_words_descrption:
#                 clarity_pred = 1
#                 break   
                
    clarities_pred[i] = clarity_pred
    
            
    
    print("i = ", i)
    print ("title: ", row['title'])
    print ("descirpiton: ", row['short_description'])
    print ("Most common word: ", most_common_words_title)
    print ("full_category: ", full_category)
    print ("clarities_pred[i]: ", clarities_pred[i])
    print ("actual: ", row['clarity'])
    
    if clarities_pred[i] != row['clarity']:
        if categories_word_wrong_case_dict.get(full_category) == None:
            categories_word_wrong_case_dict[full_category] = 0
        categories_word_wrong_case_dict[full_category] += 1
    
    i = i + 1   
    

i =  0
title:  french kiss beauty hair removing wax set (tea tree, lemon, aloe vera and rose wax) and 110 wax strip paper witth free lavender water soluble wax
descirpiton:  <ul> <li>suitable for all skin type</li> <li>applicable in different season</li> <li>easily cleaned by water, without residues left on the skin</li> <li>suitable on various parts of the body</li> <li>provide a thin, economic application, suitable for use in standard cartridge wax heaters</li> <li>easy to use</li> <li>plant based extracts</li> <li>elegant design</li> </ul> 
Most common word:  ['hair', 'epilator', 'removal', 'electric']
full_category:  health & beauty_beauty tools_hair removal appliances
clarities_pred[i]:  1.0
actual:  1
i =  1
title:  king's cordless handheld rechargeable vaccum cleaner
descirpiton:  <ul> <li>cordless design; safe and reliable</li> <li>bionic cartoon style; streamlined design</li> <li>a variety of configurations; various functions within a machine</li> <li>light and flexible; easy 

descirpiton:  <ul> <li>size: 50*70 cm&nbsp;</li> <li>material: pvc</li> <li>suitable: painting surface, tile surface,&nbsp;</li> <li>glass surface, metal surface, wood surface,&nbsp;</li> </ul> 
Most common word:  ['wall', 'home', 'art', 'sticker', 'decor', 'stickers', 'room', 'decal', 'removable', 'intl', 'painting', 'diy', 'decoration', 'canvas', '3d', 'vinyl', 'mural', 'wallpaper', 'living', 'print', 'decals', 'oil', 'poster', 'export', 'picture', 'waterproof', 'bedroom', 'kids', 'modern', 'pvc', 'black', 'mirror', 'decorative', 'blue', 'multicolor', 'frameless', 'flower', 'tree', 'frame', 'new', 'cartoon', 'paper', 'white', 'awesome', 'printed', 'butterfly', 'center', 'plastic', '60x90cm', 'diamond', 'cross', 'kitchen', 'beautiful', 'house', 'stitch', 'store', 'acrylic', 'christmas', 'pattern', 'jane', 'quote', 'map', 'gift', '5d', 'nursery', 'pictures', 'window', 'creative', 'baby', 'vintage']
full_category:  home & living_home décor_wall art
clarities_pred[i]:  1.0
actual:  1
i =

full_category:  health & beauty_hair care_shampoos & conditioners
clarities_pred[i]:  1.0
actual:  1
i =  300
title:  women watches lucky star quartz watch (black) - intl
descirpiton:  <ul> <li>well designed</li> <li>100% brand new&nbsp;</li> <li>&nbsp;high quality</li> </ul> 
Most common word:  ['watch', 'women', 'quartz', 'leather', 'fashion', 'wrist', 'strap', 'watches', 'steel', 'analog', 'stainless', 'black', 'gold', 'intl', 'ladies', 'dial', 'bracelet', 'white', 'band', 'wristwatch', 'silver', 'geneva', 'diamond', 'luxury', 'casual', 'new', 'rose', 'rhinestone', 'men', 'blue', 'brand', 'waterproof', 'silicone', 'casio', 'export', 'red', 'brown', 'lady', 'womens', 'dress', 'female', 'crystal', 'faux', 'unisex', 'pink', 'elegant', 'roman', 'free', 'classic', 'pu', 'digital', 'green', 'high', 'sports', 'quality', 'case', 'couple', 'flower', 'belt', 'design', 'resin', 'business', 'kors', 'michael', 'hot', 'display', 'tone', 'vintage', 'sport', 'retro', 'numerals', 'round', 'led', 'cl

full_category:  mobiles & tablets_accessories_phone cases
clarities_pred[i]:  1.0
actual:  1
i =  465
title:  4pcs tulip flower room window curtain door sheer voile panel drapes scarfs valances
descirpiton:  <ul> <li>reasonable price</li> <li>durable and practical</li> <li>top sales item</li> <li>lovely flower curtain</li> <li>beautiful</li> <li>practical</li> <li>be used as a background for a shop window display</li> <li>creat a pretty yet modern addition for your home decor</li> </ul> 
Most common word:  ['curtain', 'window', 'door', 'curtains', 'voile', 'x', 'room', 'panel', 'drape', 'blind', 'home', 'intl']
full_category:  home & living_home décor_curtains, blinds & shades
clarities_pred[i]:  1.0
actual:  1
i =  466
title:  cherry mobile c2i with free smart sim
descirpiton:  <ul> <li>sim: dual sim</li> <li>gsm: quad band</li> <li>display: 2.8" tft lcd</li> <li>camera: 1.3mp camera</li> <li>radio: fm radio</li> <li>player: multimedia player</li> <li>bluetooth: btv2.1</li> <li>expand

actual:  1
i =  651
title:  zenfone max case, asus zenfone max case, lucky clover pu leather flip magnet wallet stand card slots case cover for asus zenfone max (zc550kl) (purple) - intl
descirpiton:  <ul> <li>high quality leather,lovely gifts</li> <li>unique lucky clover design</li> <li>a free hand strap</li> <li>with support function and slots for cards and cash</li> </ul> 
Most common word:  ['case', 'cover', 'phone', 'samsung', 'iphone', 'galaxy', 'back', 'leather', 'tpu', 'flip', 'multicolor', 'intl', 'hard', 'black', 'pu', 'plus', 'soft', 'protective', 'apple', 'pc', 'wallet', 'pattern', '6s', 'stand', 'plastic', 'huawei', 'card', 'note', 'ultra', 'hybrid', 'blue', '5s', 'armor', 'se', 'sony', 'export', 'red', 'xiaomi', 'gold', 'xperia', 'thin', 'lg', 'protector', 'amp', 'slim', 'shockproof', 'silicone', 'pink', 'shell', 'white', 'clear', 'moonmini', 'inch', 'metal', '3d', 'color', 'luxury', 'bumper', 'buildphone', 'screen', 'edge', 'htc', 'design', 'glass', 'holder', 's6', 'fram

Most common word:  ['eau', 'de', 'toilette', '100ml', 'spray', 'women', 'edt']
full_category:  health & beauty_fragrances_women - eau de toilette
clarities_pred[i]:  1.0
actual:  1
i =  796
title:  micro usb noodle data cable 1 meter (red)
descirpiton:  <div> <ul> <li>fast charging rate up to 2.1a</li> <li>fast transfer rate up to 480mbps</li> <li>length: 1 meter / 2 meter</li> <li>cable type: micro usb</li> <li>10 colours available - white, black, orange, red, purple, dark pink, light pink, green, blue, yellow</li> </ul> </div> 
Most common word:  ['usb', 'cable', 'case', 'ipad', 'samsung', 'black', 'iphone', 'cover', 'charger', 'data', 'galaxy', 'leather', 'phone', 'intl', 'white', 'apple', 'mini', 'charging', 'micro', 'stand', 'plus', 'screen', 'tablet', 'tab', 'sync', 'adapter', 'blue', 'touch', 'pu', 'universal', 'flip', 'battery', 'amp', 'air', 'smart', 'selfie', 'stick', '6s', 'mobile', 'android', 'replacement', 'pink', 'red', '1m', 'gold', 'c', 'note', 'holder', '5s', 'type', '

title:  digital lcd electronic pocket luggage scale weight 50kg/10g hanging strap
descirpiton:  <ul> <li>high quality</li> <li>lightweight and portable</li> <li>digital scale</li> </ul> 
Most common word:  ['scale', 'body', 'digital', 'weighing', 'fat', 'electronic']
full_category:  health & beauty_medical supplies_scale & body fat analyzers
clarities_pred[i]:  1.0
actual:  1
i =  921
title:  high quality pencil usb flash drive u disk pen stylus pen fashion ballpoint pen drive memory cards pen drive red 4gb
descirpiton:  features:<br> <ul> <li>touchable pen u disk, touchable pen u disk, mobile phone touchable screen u disk pen + usb2.0 u disk + ball pen function</li> <li>fast speed, usb 2.0 specification, fast speed of data transfering</li> <li>easy to use, no need to connect to power supply</li> <li>secure and reliable, use flash electronic storage media</li> </ul> 
Most common word:  ['usb', 'drive', 'flash', 'memory', 'stick', 'disk', 'pen', 'u', 'intl', 'thumb', '16gb', 'metal', 's

i =  1035
title:  waterproof dustproof zipper golf bag rain cover (white)
descirpiton:  <ul><li>color: black, clear. material: matte pvc. </li><li>length: 110cm /43.3 inch. maximum width: 63cm /24.8 inch. </li><li>zipper design for convenient use. </li><li>waterproof and dustproof design, perfect for avoiding your golf bag luggage from rain or dust. </li></ul>
Most common word:  ['light', 'led', 'bulb', 'white', 'lamp', 'e27', 'warm', 'smd', 'intl', 'ac', 'power', 'energy', 'saving', 'e14', '12v', 'strip', 'corn', '3w', '220v', 'export', 'adapter', '265v', 'home', 'leds', 'spotlight', 'base', 'lights', '5w', 'cool', 'dc', 'dimmable']
full_category:  home & living_lighting_lighting bulbs & components
clarities_pred[i]:  0.0
actual:  1
i =  1036
title:  phone case for iphone 5/5s/se windows logo on red computer cover for apple iphone se
descirpiton:  <ul> <li>high quality phone case for iphone 5/5s/se</li> <li>fashion designs</li> <li>high quality printed</li> <li>hard plastics</li> <li>

descirpiton:  <ul> <li>brand:meco</li> <li>capacity: 4gb</li> <li>color: white</li> <li>size: approx. 65x17x8mm</li> <li>fast r ＆ w speed</li> <li>compatible with usb 1.1/2.0/3.0.</li> <li>compatible with pc, notebook, mac.</li> </ul> 
Most common word:  ['usb', 'drive', 'flash', 'memory', 'stick', 'disk', 'pen', 'u', 'intl', 'thumb', '16gb', 'metal', 'storage', '8gb', 'pendrive', '32gb', 'black', '64gb', 'mini', 'key', 'silver', 'otg', '4gb', '128gb', 'iphone', 'waterproof', 'white', 'export', 'micro', 'cartoon', 'pc', 'sandisk', 'high', 'computer', 'red']
full_category:  computers & laptops_storage_flash drives
clarities_pred[i]:  1.0
actual:  1
i =  1210
title:  new usb bluetooth wireless audio receiver adapter dongle for car smartphone high quality - intl
descirpiton:  <ul> <li>bluetooth adapter</li> <li>usb</li> <li>white</li> </ul> 
Most common word:  ['tv', 'box', 'android', 'core', 'quad', '4k', 'wifi', 'smart', 'amlogic', 'player', '8gb', 'kodi', 'hd', 'plug', 's905']
full_cat

Most common word:  ['hair', 'shaver', 'trimmer', 'electric', 'clipper', 'razor', 'men', 'rechargeable', 'intl', 'professional', 'beard']
full_category:  health & beauty_men's care_shaving
clarities_pred[i]:  1.0
actual:  1
i =  1393
title:  elba 1.7l jug kettle ejkc-1721gbk
descirpiton:  <ul> <li>1.7l</li> <li>360 degree rotation</li> <li>clear glass jug kettle</li> <li>safety locking lid</li> </ul> 
Most common word:  ['kettle', 'electric', '7l']
full_category:  home appliances_small kitchen appliances_electric kettles & thermo pots
clarities_pred[i]:  1.0
actual:  1
i =  1394
title:  outdoor crossbody single shoulder small square shoulder bag pu leather (beige)
descirpiton:  <ul> <li>fashion button rotation</li> <li>scratch resistant.</li> <li>high-grade metal chain</li> </ul> 
Most common word:  ['bag', 'shoulder', 'leather', 'bags', 'fashion', 'black', 'handbag', 'tote', 'backpack', 'intl', 'pu', 'crossbody', 'canvas', 'blue', 'new', 'cross', 'messenger', 'casual', 'red', 'body', '

full_category:  fashion_men_clothing
clarities_pred[i]:  1.0
actual:  1
i =  1602
title:  s &amp; f mini neutral leather wallet red- intl
descirpiton:  <ul style="font-size: 15px; line-height: 27.027px;"> <li>fashion</li> <li>stylish</li> <li>brand new</li> </ul> 
Most common word:  ['wallet', 'purse', 'leather', 'card', 'holder', 'long', 'bag', 'intl', 'fashion', 'clutch', 'black', 'coin', 'zipper', 'pu']
full_category:  fashion_women_wallets & accessories
clarities_pred[i]:  1.0
actual:  1
i =  1603
title:  fatima hamsa hand good luck eye pendant key chain ring keychain keyring silver
descirpiton:  <ul> <li>100% brand new and high quality</li> <li>material: alloy</li> <li>it is a good gift for your lover,family,friend and coworkers</li> </ul> 
Most common word:  ['necklace', 'gold', 'silver', 'earrings', 'women', 'jewelry', 'plated', 'crystal', 'fashion', 'intl', 'ring', 'pendant', 'bracelet', 'chain', 'white', '18k', 'export', 'rhinestone', 'heart', 'rose', 'new', 'set', 'stud', 'bl

descirpiton:  <ul> <li>stylish dot matrix retro look and design&nbsp;</li> <li>optimal protection&nbsp;</li> <li>dot view see-through notification technology&nbsp;</li> </ul> 
Most common word:  ['case', 'cover', 'phone', 'samsung', 'iphone', 'galaxy', 'back', 'leather', 'tpu', 'flip', 'multicolor', 'intl', 'hard', 'black', 'pu', 'plus', 'soft', 'protective', 'apple', 'pc', 'wallet', 'pattern', '6s', 'stand', 'plastic', 'huawei', 'card', 'note', 'ultra', 'hybrid', 'blue', '5s', 'armor', 'se', 'sony', 'export', 'red', 'xiaomi', 'gold', 'xperia', 'thin', 'lg', 'protector', 'amp', 'slim', 'shockproof', 'silicone', 'pink', 'shell', 'white', 'clear', 'moonmini', 'inch', 'metal', '3d', 'color', 'luxury', 'bumper', 'buildphone', 'screen', 'edge', 'htc', 'design', 'glass', 'holder', 's6', 'frame', 'tempered', 'skin', 'magnetic', 'fashion', 'redmi', 'lenovo', 'mooncase', 'dual', 'full', 'slot', 'kickstand', 'rose', 'oppo', 'transparent', 'one', 'aluminum', 'silver', 'honor', 's5', 'asus', 'mobi

clarities_pred[i]:  1.0
actual:  1
i =  1974
title:  jh tempered glass screen protector for iphone6 plus 025
descirpiton:  <ul> <li>real tempered glass for maximum screen protection</li> <li>prevent fingerprints</li> <li>touch sensitive</li> <li>9h hardness</li> <li>anti-scratch</li> <li>anti-shattered film</li> <li>ultra thin</li> <li>oleophobic coating</li> </ul> 
Most common word:  ['screen', 'protector', 'glass', 'tempered', 'clear', 'samsung', 'film', 'galaxy', '9h', 'iphone', 'hd', 'anti', 'premium', 'plus', 'note', 'full', 'apple', 'guard', 'xiaomi', '5d', 'cover', 'huawei', 'sony', 'lenovo', 'black', '6s', 'edge', '3d', 'free', 'ultra', 'xperia', '3mm', 'white', 'intl', 'asus', 'pro', 'nillkin', 'redmi', 'super', 'privacy', 'explosion', 'touch', 'curved', 'phone', 'zenfone', 'export', 'inch', 'blue', 'mi', 's7', 'high', 'protective']
full_category:  mobiles & tablets_accessories_screen protectors
clarities_pred[i]:  1.0
actual:  1
i =  1975
title:  women's peep toe square heel 

actual:  1
i =  2138
title:  buildphone 3d relif pu leather flip phone cover case for xiaomi mi max (multicolor) - intl
descirpiton:  <ul> <li>material: pu leather</li> <li>3d relif</li> <li>come with a temper glass</li> <li>perfect fit</li> <li>well protected</li> </ul> <p><br></p> 
Most common word:  ['case', 'cover', 'phone', 'samsung', 'iphone', 'galaxy', 'back', 'leather', 'tpu', 'flip', 'multicolor', 'intl', 'hard', 'black', 'pu', 'plus', 'soft', 'protective', 'apple', 'pc', 'wallet', 'pattern', '6s', 'stand', 'plastic', 'huawei', 'card', 'note', 'ultra', 'hybrid', 'blue', '5s', 'armor', 'se', 'sony', 'export', 'red', 'xiaomi', 'gold', 'xperia', 'thin', 'lg', 'protector', 'amp', 'slim', 'shockproof', 'silicone', 'pink', 'shell', 'white', 'clear', 'moonmini', 'inch', 'metal', '3d', 'color', 'luxury', 'bumper', 'buildphone', 'screen', 'edge', 'htc', 'design', 'glass', 'holder', 's6', 'frame', 'tempered', 'skin', 'magnetic', 'fashion', 'redmi', 'lenovo', 'mooncase', 'dual', 'full', 

descirpiton:  <ul> <li>material: synthetic material</li> <li>easy to insert or remove</li> <li>with card solt</li> <li>with magnetic closure</li> <li>with hand strap</li> </ul> 
Most common word:  ['case', 'cover', 'phone', 'samsung', 'iphone', 'galaxy', 'back', 'leather', 'tpu', 'flip', 'multicolor', 'intl', 'hard', 'black', 'pu', 'plus', 'soft', 'protective', 'apple', 'pc', 'wallet', 'pattern', '6s', 'stand', 'plastic', 'huawei', 'card', 'note', 'ultra', 'hybrid', 'blue', '5s', 'armor', 'se', 'sony', 'export', 'red', 'xiaomi', 'gold', 'xperia', 'thin', 'lg', 'protector', 'amp', 'slim', 'shockproof', 'silicone', 'pink', 'shell', 'white', 'clear', 'moonmini', 'inch', 'metal', '3d', 'color', 'luxury', 'bumper', 'buildphone', 'screen', 'edge', 'htc', 'design', 'glass', 'holder', 's6', 'frame', 'tempered', 'skin', 'magnetic', 'fashion', 'redmi', 'lenovo', 'mooncase', 'dual', 'full', 'slot', 'kickstand', 'rose', 'oppo', 'transparent', 'one', 'aluminum', 'silver', 'honor', 's5', 'asus', 'mo

actual:  1
i =  2495
title:  ooplm 15 years of the new saint to small fashion color appearance rose bright pu belt white-collar female watches wholesale
descirpiton:  <ul> <li>quality assurance qc before shipment</li> <li>reasonable factory price</li> <li>full refund if there is quality problem</li> </ul> 
Most common word:  ['watch', 'women', 'quartz', 'leather', 'fashion', 'wrist', 'strap', 'watches', 'steel', 'analog', 'stainless', 'black', 'gold', 'intl', 'ladies', 'dial', 'bracelet', 'white', 'band', 'wristwatch', 'silver', 'geneva', 'diamond', 'luxury', 'casual', 'new', 'rose', 'rhinestone', 'men', 'blue', 'brand', 'waterproof', 'silicone', 'casio', 'export', 'red', 'brown', 'lady', 'womens', 'dress', 'female', 'crystal', 'faux', 'unisex', 'pink', 'elegant', 'roman', 'free', 'classic', 'pu', 'digital', 'green', 'high', 'sports', 'quality', 'case', 'couple', 'flower', 'belt', 'design', 'resin', 'business', 'kors', 'michael', 'hot', 'display', 'tone', 'vintage', 'sport', 'retro', '

Most common word:  ['case', 'cover', 'phone', 'samsung', 'iphone', 'galaxy', 'back', 'leather', 'tpu', 'flip', 'multicolor', 'intl', 'hard', 'black', 'pu', 'plus', 'soft', 'protective', 'apple', 'pc', 'wallet', 'pattern', '6s', 'stand', 'plastic', 'huawei', 'card', 'note', 'ultra', 'hybrid', 'blue', '5s', 'armor', 'se', 'sony', 'export', 'red', 'xiaomi', 'gold', 'xperia', 'thin', 'lg', 'protector', 'amp', 'slim', 'shockproof', 'silicone', 'pink', 'shell', 'white', 'clear', 'moonmini', 'inch', 'metal', '3d', 'color', 'luxury', 'bumper', 'buildphone', 'screen', 'edge', 'htc', 'design', 'glass', 'holder', 's6', 'frame', 'tempered', 'skin', 'magnetic', 'fashion', 'redmi', 'lenovo', 'mooncase', 'dual', 'full', 'slot', 'kickstand', 'rose', 'oppo', 'transparent', 'one', 'aluminum', 'silver', 'honor', 's5', 'asus', 'mobile', 'zenfone', 'layer', 's7', 'mi', 'cases', 'printing', 'painting', 'purple', 'anti', 'slots', 'heavy', 'duty', 'bling', 'mini', 'green', 'shock', 'gel', 'plate', 'brown', 'g

i =  2858
title:  summer sexy women's spaghetti strap nightgown sleepwear female robe lounge set bathrobe+nightdress(khaki ) - intl
descirpiton:  <div> <ul> <li>&nbsp; &nbsp; material:chiffon</li> <li>&nbsp; &nbsp;collar:deep v-neck</li> <li>&nbsp; &nbsp; decroation: lace,embroidery</li> <li>&nbsp; &nbsp; robe with dress together</li> <li>&nbsp; &nbsp; size:one size<br></li> <li>&nbsp; &nbsp; beautiful package</li> <li>&nbsp;&nbsp;&nbsp; wonderful gift</li> <li>&nbsp; &nbsp; 5 colors : wine red/khaki/brown/black/red</li> </ul> </div> 
Most common word:  ['sexy', 'bra', 'lace', 'lingerie', 'black', 'waist', 'underwear', 'set', 'sleepwear', 'intl', 'dress', 'body', 'shaper', 'seamless', 'red', 'panties', 'size']
full_category:  fashion_women_lingerie, sleep & lounge
clarities_pred[i]:  1.0
actual:  0
i =  2859
title:  asus laptop charger 19v 4.74a for asus a7 asus a8 asus f3 asus f5 asus f6 asus f8 asus f9 asus g1 asus g2 series
descirpiton:  <ul> <li>input: 100-240v, 50-60hz</li> <li>ou

actual:  1
i =  3007
title:  mooncase hard protective printing back plate case cover for samsung galaxy s3 i9300 no.5003270
descirpiton:  <ul> <li>excellent quality and fashion design</li> <li>cute pattern</li> <li>secure fit for your case</li> </ul> 
Most common word:  ['case', 'cover', 'phone', 'samsung', 'iphone', 'galaxy', 'back', 'leather', 'tpu', 'flip', 'multicolor', 'intl', 'hard', 'black', 'pu', 'plus', 'soft', 'protective', 'apple', 'pc', 'wallet', 'pattern', '6s', 'stand', 'plastic', 'huawei', 'card', 'note', 'ultra', 'hybrid', 'blue', '5s', 'armor', 'se', 'sony', 'export', 'red', 'xiaomi', 'gold', 'xperia', 'thin', 'lg', 'protector', 'amp', 'slim', 'shockproof', 'silicone', 'pink', 'shell', 'white', 'clear', 'moonmini', 'inch', 'metal', '3d', 'color', 'luxury', 'bumper', 'buildphone', 'screen', 'edge', 'htc', 'design', 'glass', 'holder', 's6', 'frame', 'tempered', 'skin', 'magnetic', 'fashion', 'redmi', 'lenovo', 'mooncase', 'dual', 'full', 'slot', 'kickstand', 'rose', 'opp

title:  mooncase hard protective printing back plate case cover for apple iphone 5c no.3008314
descirpiton:  <ul><li>excellent quality and fashion design</li><li>cute pattern</li><li>secure fit for your case</li></ul>
Most common word:  ['case', 'cover', 'phone', 'samsung', 'iphone', 'galaxy', 'back', 'leather', 'tpu', 'flip', 'multicolor', 'intl', 'hard', 'black', 'pu', 'plus', 'soft', 'protective', 'apple', 'pc', 'wallet', 'pattern', '6s', 'stand', 'plastic', 'huawei', 'card', 'note', 'ultra', 'hybrid', 'blue', '5s', 'armor', 'se', 'sony', 'export', 'red', 'xiaomi', 'gold', 'xperia', 'thin', 'lg', 'protector', 'amp', 'slim', 'shockproof', 'silicone', 'pink', 'shell', 'white', 'clear', 'moonmini', 'inch', 'metal', '3d', 'color', 'luxury', 'bumper', 'buildphone', 'screen', 'edge', 'htc', 'design', 'glass', 'holder', 's6', 'frame', 'tempered', 'skin', 'magnetic', 'fashion', 'redmi', 'lenovo', 'mooncase', 'dual', 'full', 'slot', 'kickstand', 'rose', 'oppo', 'transparent', 'one', 'aluminu

Most common word:  ['case', 'cover', 'phone', 'samsung', 'iphone', 'galaxy', 'back', 'leather', 'tpu', 'flip', 'multicolor', 'intl', 'hard', 'black', 'pu', 'plus', 'soft', 'protective', 'apple', 'pc', 'wallet', 'pattern', '6s', 'stand', 'plastic', 'huawei', 'card', 'note', 'ultra', 'hybrid', 'blue', '5s', 'armor', 'se', 'sony', 'export', 'red', 'xiaomi', 'gold', 'xperia', 'thin', 'lg', 'protector', 'amp', 'slim', 'shockproof', 'silicone', 'pink', 'shell', 'white', 'clear', 'moonmini', 'inch', 'metal', '3d', 'color', 'luxury', 'bumper', 'buildphone', 'screen', 'edge', 'htc', 'design', 'glass', 'holder', 's6', 'frame', 'tempered', 'skin', 'magnetic', 'fashion', 'redmi', 'lenovo', 'mooncase', 'dual', 'full', 'slot', 'kickstand', 'rose', 'oppo', 'transparent', 'one', 'aluminum', 'silver', 'honor', 's5', 'asus', 'mobile', 'zenfone', 'layer', 's7', 'mi', 'cases', 'printing', 'painting', 'purple', 'anti', 'slots', 'heavy', 'duty', 'bling', 'mini', 'green', 'shock', 'gel', 'plate', 'brown', 'g

i =  3601
title:  1.8m braided hdmi cable + mini &amp; micro hdmi adapter v1.4 hd 3d for xbox hdtv - intl
descirpiton:  <ul> <li>hdmi head: skidproof gold-plated plug head.</li> <li>wire: silver coated copper wire inside to reduce interference.</li> <li>cable cover: 4layers shielding, high-precision standard polyethylene insulating material.</li> <li>fully hdcp compliant to provide highest level of signal quality.</li> <li>supports uncompressed audio/video signals for hdtv, hdtv receiving box, for playstation 3 and new version of xbox360.</li> </ul> 
Most common word:  ['hdmi', 'cable', 'tv', 'remote', 'control', 'adapter', 'male', 'black', 'hdtv', 'intl', '1080p', 'box', 'android', 'mini', 'export', 'female', 'micro']
full_category:  tv, audio / video, gaming & wearables_tv accessories_
clarities_pred[i]:  1.0
actual:  1
i =  3602
title:  jia hua at200 wifi sport camera diving wide angle lens (black)
descirpiton:  <ul> <li>50 meters waterproof</li> <li>1.5"" hd tft display</li> <li>wa

full_category:  fashion_women_bags
clarities_pred[i]:  1.0
actual:  1
i =  3781
title:  pc plastic mural painting case for oppo r9 pink
descirpiton:  <ul> <li>high quality: 100% brand new high quality case and very durable</li> <li>dirt-resistant: this stylish design make your phone more attractive and provides the maximum protection against scratches and scuffs</li> <li>easy to use: just snap it on and snap it off</li> <li>designs printed using quality high-resolution printing technology</li> <li>unique design:protects and covers the back of your phone</li> </ul> 
Most common word:  ['case', 'cover', 'phone', 'samsung', 'iphone', 'galaxy', 'back', 'leather', 'tpu', 'flip', 'multicolor', 'intl', 'hard', 'black', 'pu', 'plus', 'soft', 'protective', 'apple', 'pc', 'wallet', 'pattern', '6s', 'stand', 'plastic', 'huawei', 'card', 'note', 'ultra', 'hybrid', 'blue', '5s', 'armor', 'se', 'sony', 'export', 'red', 'xiaomi', 'gold', 'xperia', 'thin', 'lg', 'protector', 'amp', 'slim', 'shockproof

actual:  1
i =  3994
title:  metal wine grape leaf wine bottle holder - intl
descirpiton:  <ul> <li class="">color: bronze</li> <li class="">material: iron</li> <li class="">dimensions: 27 × 11 × 29cm / 10.63 "* 4.33" * 11.42 ";</li> <li class="">beautiful and practical;</li> <li class="">handmade plating process;</li> </ul> 
Most common word:  ['set', 'stainless', 'cup', 'steel', 'mug', 'wine', 'bottle', 'intl', 'glass', 'coffee', 'black', 'spoon', 'tea', 'free', 'white', 'red', 'beer', 'opener', 'silver']
full_category:  home & living_kitchen & dining_tableware
clarities_pred[i]:  1.0
actual:  1
i =  3995
title:  women baroque style embroidered mirror girl pattern black leather backpack travel daypack school bag
descirpiton:  <ul style="white-space:normal;"> <li>condition: 100% brand new, sealed in official branded retail packaging</li> <li>material: anti-friction, anti-aging, soft pu leather,</li> <li>lining: anti-wrinkled, anti-wear polyester fiber.</li> <li>hardware: anti-oxidant,

i =  4173
title:  matte hard case for 12" apple macbook retina a1534 pink (export)
descirpiton:  <ul> <li>high quality</li> <li>hard</li> <li>light weight</li> <li>apple logo seen through the case.</li> <li>15 colors available</li> </ul> 
Most common word:  ['macbook', 'case', 'apple', 'cover', 'hard', 'pro', 'inch', 'air', 'laptop', 'retina', 'protective', 'shell', 'plastic', 'mac', 'keyboard', 'protector', 'black']
full_category:  computers & laptops_computer accessories_mac accessories
clarities_pred[i]:  1.0
actual:  1
i =  4174
title:  classic manual shaver double edge blade razor with box
descirpiton:  <ul> <li>this razor has a butterfly opening</li> <li>you only need to twist the handle and the razor head opens</li> <li>easily load and unload razor blades,easy to use</li> <li>without handling tool rest</li> <li>select the best angle for shaving</li> <li>it fits any standard double edged razor blades such as: gillette, astra, dorco, feather...etc.</li> </ul> 
Most common word:  [

descirpiton:  <ul> <li>24.2mp dx-format cmos sensor</li> <li>expeed 4 image processor</li> <li>3.2" 1.037m-dot vari-angle touchscreen</li> <li>full hd 1080p video recording at 60 fps</li> <li>multi-cam 4800dx 39-point af sensor</li> <li>iso 100-25600 and 5 fps shooting</li> <li>snapbridge bluetooth and wi-fi with nfc</li> <li>time-lapse movie recording</li> <li>af-s dx 18-140mm f/3.5-5.6g ed vr lens</li> </ul> 
Most common word:  ['canon', 'kit', 'camera', '55mm', 'eos', 'lens', 'nikon', 'black', 'digital', 'vr']
full_category:  cameras_dslr_sets
clarities_pred[i]:  1.0
actual:  1
i =  4352
title:  jewelmine catherine jewelry set (gold)
descirpiton:  <ul> <li>style: pendant necklace, ring, and earrings</li> <li>material: brass</li> <li>finish: shinny</li> <li>stones: cubic zircon</li> <li>yellow gold plated shinny</li> <li>for women of all ages</li> <li>perfect use for any occasion</li> </ul> 
Most common word:  ['necklace', 'gold', 'silver', 'earrings', 'women', 'jewelry', 'plated', '

clarities_pred[i]:  1.0
actual:  1
i =  4564
title:  leptin advanced apple cider (twin pack)
descirpiton:  not for children<br /> natural ingredients<br /> burns fat<br /> antioxidant<br />buy 2 free 1<br />drink in cool<br />
Most common word:  ['formula', 'herbalife', 'tea']
full_category:  health & beauty_food supplements_weight management
clarities_pred[i]:  0.0
actual:  1
i =  4565
title:  huawei b315 b315s607 4g 150mbps direct sim card router 4 lan 32 wifi 1 tel port
descirpiton:  <ul> <li>can make call / receiving call and same time using internet (hotspot)</li> <li>sms support</li> <li>excellent work with singtel starhub and m1 ++ other oversea network&nbsp;(provided 4g/3g band is matched)</li> <li>strong wifi range ( ~ 250m)</li> <li>cat 4 150mbps high speed 4g lte</li> <li>advance it and security feature</li> <li>support huawei app ( can view statistic from android and ios phone)</li> <li>singapore local warranty</li> </ul> <div>&nbsp;</div> <div>&nbsp; <div>&nbsp;</div> </di

descirpiton:  <ul> <li>compatible for bowens mount studio flash, 80 * 80 cm softbox.</li> <li>high reflective material, help to balance and soften the light.</li> <li>foldable design, can be fold easily with its flexible spring.</li> <li>comes with a portable carry bag for convenient storage.</li> <li>with inner soft cloth, better to diffuse the light.</li> </ul> 
Most common word:  ['studio', 'photo', 'photography', 'background', 'light', 'backdrop', 'vinyl', 'intl', 'video', '5x7ft', 'camera', 'props', 'wall', 'portable', 'led']
full_category:  cameras_camera accessories_lighting & studio equipment
clarities_pred[i]:  1.0
actual:  1
i =  4742
title:  fashion men bags men messenger crossbody bag - vertical style - black - big size
descirpiton:  <ul> <li>vertical style and horizontal style to choose, this link is vertical style</li> <li>big size and small size to choose, this link is big size</li> <li>brown color and black color for you, this link is black color</li> </ul> 
Most common

descirpiton:  <div> <ul> <li>for fashion design</li> <li>has a unique temperament</li> <li>with the world trend.</li> <li>vintage sweet elegant</li> </ul> </div> 
Most common word:  ['dress', 'kids']
full_category:  fashion_girls_clothing
clarities_pred[i]:  1.0
actual:  1
i =  4923
title:  battery for hp compaq presario v3000 / v3100 / v3500 / v3600
descirpiton:  compaq: 436281-241 / 452057-001 / 62337-001 / hstnn-db42 / hstnn-lb42<br> hp: 411462-141<br> product type: replacement battery<br> battery type: li-ion<br> cells quantity: 6<br> voltage(v): 10.8v<br> capacity: 4400mah
Most common word:  ['battery', 'laptop', 'replacement', 'hp', 'pavilion', 'series', 'cells', 'toshiba', 'satellite', 'notebook', 'replacements', 'compaq', 'acer', 'oem', 'lenovo']
full_category:  computers & laptops_computer accessories_batteries
clarities_pred[i]:  1.0
actual:  1
i =  4924
title:  beyond healing force professional scalp shampoo 250ml
descirpiton:  <ul><li>shampoo</li><li>make hair healthy</li><

i =  5066
title:  swisstech jamaica case for iphone 6 plus (yellow green)
descirpiton:  <ul style="line-height: 18px;"> <li>designed for apple iphone 6 plus</li> <li>impact resistant and lightweight</li> <li>access to all ports, controls and sensor</li> </ul> 
Most common word:  ['case', 'cover', 'phone', 'samsung', 'iphone', 'galaxy', 'back', 'leather', 'tpu', 'flip', 'multicolor', 'intl', 'hard', 'black', 'pu', 'plus', 'soft', 'protective', 'apple', 'pc', 'wallet', 'pattern', '6s', 'stand', 'plastic', 'huawei', 'card', 'note', 'ultra', 'hybrid', 'blue', '5s', 'armor', 'se', 'sony', 'export', 'red', 'xiaomi', 'gold', 'xperia', 'thin', 'lg', 'protector', 'amp', 'slim', 'shockproof', 'silicone', 'pink', 'shell', 'white', 'clear', 'moonmini', 'inch', 'metal', '3d', 'color', 'luxury', 'bumper', 'buildphone', 'screen', 'edge', 'htc', 'design', 'glass', 'holder', 's6', 'frame', 'tempered', 'skin', 'magnetic', 'fashion', 'redmi', 'lenovo', 'mooncase', 'dual', 'full', 'slot', 'kickstand', 'ro

Most common word:  ['eau', 'de', 'toilette', '100ml', 'spray', 'women', 'edt']
full_category:  health & beauty_fragrances_women - eau de toilette
clarities_pred[i]:  1.0
actual:  1
i =  5245
title:  tiny floral rectangle oven dish
descirpiton:  <ul> <li>great for lagsane / baked rice / baked pasta!</li> <li><strong>oven, microwave dishwasher safe</strong></li> <li>dimension: <strong>20cm x 13cm x 6cm depth</strong></li> <li>not for stove use</li> <li>material: stoneware</li> </ul> 
Most common word:  ['pan', 'set', 'pot', 'stainless', 'cookware', 'steel']
full_category:  home & living_kitchen & dining_cookware
clarities_pred[i]:  0.0
actual:  1
i =  5246
title:  digital lcd magnetic kitchen cooking timer black
descirpiton:  <ul> <li>size: 7.4*6.3*2.0cm</li> <li>timer range: 1minute - 99 minutes 59 seconds</li> <li>with magnet on rear for attaching to fridge or freezer</li> <li>powered by 1 x aaa battery (not included)</li> </ul> 
Most common word:  ['kitchen', 'stainless', 'steel', 'in

i =  5458
title:  cetaphil moisturizing cream 100g
descirpiton:  <ul> <li>100% original</li> <li>high quality</li> <li>sold by ray wellness pharmacy</li> </ul> 
Most common word:  ['cream', 'skin', '50ml']
full_category:  health & beauty_skin care_moisturizers and cream
clarities_pred[i]:  1.0
actual:  1
i =  5459
title:  5m 9.6w/m 300led smd 5050 led strip waterproof ip66 220v (white) - intl
descirpiton:  <ul> <li>5m 9.6w/m 300led smd 5050 led&nbsp;</li> <li>strip waterproof ip66 220v (white)</li> <li>100% new product</li> </ul> 
Most common word:  ['light', 'led', 'lamp', 'string', 'lights', 'night', 'fairy', 'party', 'christmas', 'white', 'wedding', 'color', 'intl', '5m', 'battery']
full_category:  home & living_lighting_specialty lights
clarities_pred[i]:  1.0
actual:  1
i =  5460
title:  asus ze550kl zenfone 2 laser 16gb (silver)
descirpiton:  <div> <ul> <li>android 5.0 (lollipop)</li> <li>5.5" 1280x720 hd ips display</li> <li>qualcomm snapdragon m8916 quad-core 1.2ghz</li> <li>13

i =  5659
title:  cool boy clear edge soft silicon painting back cover case for oppo neo 5 a31 (multicolor) - intl
descirpiton:  <ul> <li>compatibility for oppo neo 5 /neo 5s /a31</li> <li>unique design</li> <li>ultra thin</li> <li>transparente edge</li> </ul> 
Most common word:  ['case', 'cover', 'phone', 'samsung', 'iphone', 'galaxy', 'back', 'leather', 'tpu', 'flip', 'multicolor', 'intl', 'hard', 'black', 'pu', 'plus', 'soft', 'protective', 'apple', 'pc', 'wallet', 'pattern', '6s', 'stand', 'plastic', 'huawei', 'card', 'note', 'ultra', 'hybrid', 'blue', '5s', 'armor', 'se', 'sony', 'export', 'red', 'xiaomi', 'gold', 'xperia', 'thin', 'lg', 'protector', 'amp', 'slim', 'shockproof', 'silicone', 'pink', 'shell', 'white', 'clear', 'moonmini', 'inch', 'metal', '3d', 'color', 'luxury', 'bumper', 'buildphone', 'screen', 'edge', 'htc', 'design', 'glass', 'holder', 's6', 'frame', 'tempered', 'skin', 'magnetic', 'fashion', 'redmi', 'lenovo', 'mooncase', 'dual', 'full', 'slot', 'kickstand', 'r

Most common word:  ['necklace', 'gold', 'silver', 'earrings', 'women', 'jewelry', 'plated', 'crystal', 'fashion', 'intl', 'ring', 'pendant', 'bracelet', 'chain', 'white', '18k', 'export', 'rhinestone', 'heart', 'rose', 'new', 'set', 'stud', 'blue', 'flower', 'black', 'diamond', 'zircon', 'charm', 'steel', 'wedding', 'alloy', 'love', 'gift', 'style', 'amp', 'free', 'sterling', 'drop', 'austria', 'pearl', 'beads', 'purple', 'bracelets', 'earring', 'ear', 'red', 'stainless', 'f', 'color', 'shape', 'round', 'rings', 'long', 'size', 'elegant', 'vintage', 'lady', 'green', 'bangle', 'pink', 'one', 'leaf', 'studs', 'design', 'dangle', 'high', 'yellow', 'quality', 'simple', 'couple', 'accessories', 'glass', 'pair', 'leather', 'ball', 'retro', 'brooch', 'cubic', 'zirconia', 'hoop', 'cto', 'men', 'charms', 'key', 'cz', 'hollow', 'necklaces', 'stone', 'wholesale', 'nickle', 'swarovski', 'real', 'girls', 'keychain', 'champagne', 'platinum', 'bluelans', 'diy', 'party', 'colorful', 'lucky', 'pendants

clarities_pred[i]:  1.0
actual:  1
i =  6009
title:  i-techie 701 4gb (black) with free 7" leather case (green) and screen protector
descirpiton:  <ul> <li>andriod 4.2 jelly bean</li> <li>7" tft lcd capacitive touchscreen display</li> <li>4gb internal storage (up to 32gb)</li> <li>512mb ram</li> <li>1.2ghz dual core a8 cpu</li> <li>2.0mp rear and front camera</li> <li>features wi-fi</li> <li>280mah battery</li> </ul> 
Most common word:  ['tablet', 'white', '16gb', 'black', 'tab', 'samsung', '8gb', 'wifi', 'free']
full_category:  mobiles & tablets_tablets_
clarities_pred[i]:  1.0
actual:  1
i =  6010
title:  mengs a7 a7r 1/4 inch mounting screw camera l-shaped quick releaseplate for sony a7 a7r
descirpiton:  <ul class=""> <li class="">material: aluminium alloy</li> <li class="">color: matt black</li> <li class="">length: 138mm</li> <li class="">width: 39/50mm</li> <li class="">height: 74mm</li> <li class="">self weight: 130g</li> <li class="">mounting screw diameter: 1/4</li> </ul> 
Mos

actual:  1
i =  6208
title:  stickers sumikko gurashi stationery goodie bag christmas
descirpiton:  <ul> <li>stickers sumikko gurashi stationery goodie bag christmas</li> <li>stationery</li> <li>fancy stationery</li> </ul> 
Most common word:  ['pencil', 'paper', 'case', 'bag', 'tape', 'holder', 'red', 'black']
full_category:  home & living_stationery_school & office accessories
clarities_pred[i]:  1.0
actual:  1
i =  6209
title:  aveda dry remedy moisturizing conditioner - for drenches dry, brittle hair (new packaging) 1000ml/33.8oz (export)
descirpiton:  <ul> <li>a moisture-boosting conditioner for dry brittle hair.</li> <li>patent-pending deep moisture complex goes deep into hair.</li> <li>delivers intense moisture to hydrate even the driest most brittle hair.</li> </ul> 
Most common word:  ['shampoo', 'hair', 'conditioner', '250ml']
full_category:  health & beauty_hair care_shampoos & conditioners
clarities_pred[i]:  1.0
actual:  1
i =  6210
title:  ujs mirror clip usb digital mp3 m

i =  6388
title:  curren 8148 stainless steel strap men wrist quartz watch black
descirpiton:  <ul style= "color: rgb(51, 51, 51); font-family: arial, 'helvetica neue', helvetica, sans-serif, zawgyi-one; font-size: 15px; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: 18px; orphans: auto; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; widows: 1; word-spacing: 0px; -webkit-text-stroke-width: 0px;"> <li>stylish quartz wrist watch for men with date display</li> <li>solid stainless steel front and back case</li> <li>normal water resistant: hand washing or in the rain</li> <li>tungsten steel alloy strap</li> </ul> 
Most common word:  ['watch', 'women', 'quartz', 'leather', 'fashion', 'wrist', 'strap', 'watches', 'steel', 'analog', 'stainless', 'black', 'gold', 'intl', 'ladies', 'dial', 'bracelet', 'white', 'band', 'wristwatch', 'silver', 'geneva', 'diamond', 'luxury', 'casual', 'new', 'rose', 'rhinestone', 

Most common word:  ['controller', 'black', 'game', 'ps4', 'gamepad', 'nintendo', 'wireless', 'joystick', 'case', 'bluetooth', 'xbox', 'intl', 'cover', 'playstation', 'skin', 'white', 'sony', 'console', 'silicone', 'pc', 'remote', 'usb', 'replacement', '3ds', 'slim', 'new']
full_category:  tv, audio / video, gaming & wearables_gaming_gaming accessories
clarities_pred[i]:  1.0
actual:  1
i =  6495
title:  supercart cute flower skin case cover hard pc protector for apple iphone 6 plus 5.5" (blue)(export)
descirpiton:  <div class="prdcontent"> <ul> <li>condition:new</li> <li>material: pc</li> <li>suitable for: iphone 6 plus 5.5"</li> <li>durable hard pc back to protect your device</li> <li>precisely engineered to fit your phone perfectly</li> <li>easy to clean</li> <li>can be installed and removed by hands directly</li> </ul> </div> 
Most common word:  ['usb', 'cable', 'case', 'ipad', 'samsung', 'black', 'iphone', 'cover', 'charger', 'data', 'galaxy', 'leather', 'phone', 'intl', 'white', '

i =  6644
title:  sexy nightgown lace spa womens robes sleepwear sexy costumes bathrobe ankle-length ladies robe female nightwear(white)
descirpiton:  <ul> <li>&nbsp; &nbsp; material:chiffon</li> <li>&nbsp; &nbsp; collar:v-neck</li> <li>&nbsp; &nbsp; decroation: lace,embroidery</li> <li>&nbsp; &nbsp; size:one size</li> <li>&nbsp; &nbsp; beautiful package</li> <li>&nbsp; &nbsp; wonderful gift</li> <li>&nbsp; &nbsp; 3 colors :white/black/red</li> </ul> 
Most common word:  ['sexy', 'bra', 'lace', 'lingerie', 'black', 'waist', 'underwear', 'set', 'sleepwear', 'intl', 'dress', 'body', 'shaper', 'seamless', 'red', 'panties', 'size']
full_category:  fashion_women_lingerie, sleep & lounge
clarities_pred[i]:  1.0
actual:  0
i =  6645
title:  holbrook sunglasses 2015 new vr46 moto gp for men women sun glasses(white/orange)
descirpiton:  <div> <ul> <li>high quality men holbrook sunglasses</li> <li>plastic</li> <li>summer, spring, autumn</li> <li>cool, uv400</li> <li>outdoor/ skating/ fishing</li>

i =  6815
title:  japan.esprit black stainless-steel case leather ladies nwt + warranty es106512001
descirpiton:  <ul> <li>esprit es106512001</li> <li>38 (with crown) x 36 (w) x 8 mm (thickness)</li> <li>wr: 30m</li> <li>nwt + esprit box + warranty</li> <li>stainless-steel case</li> <li>leather</li> <li>mineral glass</li> </ul> 
Most common word:  ['watch', 'women', 'quartz', 'leather', 'fashion', 'wrist', 'strap', 'watches', 'steel', 'analog', 'stainless', 'black', 'gold', 'intl', 'ladies', 'dial', 'bracelet', 'white', 'band', 'wristwatch', 'silver', 'geneva', 'diamond', 'luxury', 'casual', 'new', 'rose', 'rhinestone', 'men', 'blue', 'brand', 'waterproof', 'silicone', 'casio', 'export', 'red', 'brown', 'lady', 'womens', 'dress', 'female', 'crystal', 'faux', 'unisex', 'pink', 'elegant', 'roman', 'free', 'classic', 'pu', 'digital', 'green', 'high', 'sports', 'quality', 'case', 'couple', 'flower', 'belt', 'design', 'resin', 'business', 'kors', 'michael', 'hot', 'display', 'tone', 'vintag

i =  6997
title:  mini silicone leather phone cover for xiaomi note (white)
descirpiton:  <ul> <li>type: phone case</li> <li>compatible phone model: xiaomi 3, xiaomi 4, xiaomi redmi, xiaomi redmi note</li> <li>color: rose red, brown, white, black, blue</li> <li>material: leather</li> <li>style: business, leisure, simple, fashion</li> <li>feature: mini invisible sucker, phone bracket, double windows</li> </ul> 
Most common word:  ['case', 'cover', 'phone', 'samsung', 'iphone', 'galaxy', 'back', 'leather', 'tpu', 'flip', 'multicolor', 'intl', 'hard', 'black', 'pu', 'plus', 'soft', 'protective', 'apple', 'pc', 'wallet', 'pattern', '6s', 'stand', 'plastic', 'huawei', 'card', 'note', 'ultra', 'hybrid', 'blue', '5s', 'armor', 'se', 'sony', 'export', 'red', 'xiaomi', 'gold', 'xperia', 'thin', 'lg', 'protector', 'amp', 'slim', 'shockproof', 'silicone', 'pink', 'shell', 'white', 'clear', 'moonmini', 'inch', 'metal', '3d', 'color', 'luxury', 'bumper', 'buildphone', 'screen', 'edge', 'htc', 'desi

Most common word:  ['case', 'cover', 'phone', 'samsung', 'iphone', 'galaxy', 'back', 'leather', 'tpu', 'flip', 'multicolor', 'intl', 'hard', 'black', 'pu', 'plus', 'soft', 'protective', 'apple', 'pc', 'wallet', 'pattern', '6s', 'stand', 'plastic', 'huawei', 'card', 'note', 'ultra', 'hybrid', 'blue', '5s', 'armor', 'se', 'sony', 'export', 'red', 'xiaomi', 'gold', 'xperia', 'thin', 'lg', 'protector', 'amp', 'slim', 'shockproof', 'silicone', 'pink', 'shell', 'white', 'clear', 'moonmini', 'inch', 'metal', '3d', 'color', 'luxury', 'bumper', 'buildphone', 'screen', 'edge', 'htc', 'design', 'glass', 'holder', 's6', 'frame', 'tempered', 'skin', 'magnetic', 'fashion', 'redmi', 'lenovo', 'mooncase', 'dual', 'full', 'slot', 'kickstand', 'rose', 'oppo', 'transparent', 'one', 'aluminum', 'silver', 'honor', 's5', 'asus', 'mobile', 'zenfone', 'layer', 's7', 'mi', 'cases', 'printing', 'painting', 'purple', 'anti', 'slots', 'heavy', 'duty', 'bling', 'mini', 'green', 'shock', 'gel', 'plate', 'brown', 'g

In [13]:
from sklearn.metrics import mean_squared_error
np.sqrt(mean_squared_error(test_df.clarity.as_matrix(), clarities_pred))

0.38055970036906306

In [14]:
max(categories_word_wrong_case_dict, key=categories_word_wrong_case_dict.get)

'fashion_women_clothing'