In [1]:
import pandas as pd
import numpy as np
import nltk
from nltk.stem import SnowballStemmer
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
import re

encoding = "ISO-8859-1"
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package stopwords to /home/eugene/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/eugene/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/eugene/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [2]:
stemmer = SnowballStemmer('english')
stop_words = set(stopwords.words('english'))
stop_words.add('xby')

typos = {
    'ennox': 'lennox',
    'garde': 'garden',
    'unsulation': 'insulation',
    'carboy': 'carbon',
    'grqss': 'grass',
    'spaonges': 'sponges',
    'fen': 'fern',
    'free shipping': 'null'
}


def replaces(s):
    if isinstance(s, str):
        s = s.lower()
        if s in typos:
            return typos[s]
        
        s = re.sub(r"(\w)[\.:]?([A-Z])", r"\1 \2", s)
    
        s = s.replace(" x "," xby ")
        s = s.replace("*"," xby ")
        s = s.replace(" by "," xby")
        s = s.replace("x0"," xby 0")
        s = s.replace("x1"," xby 1")
        s = s.replace("x2"," xby 2")
        s = s.replace("x3"," xby 3")
        s = s.replace("x4"," xby 4")
        s = s.replace("x5"," xby 5")
        s = s.replace("x6"," xby 6")
        s = s.replace("x7"," xby 7")
        s = s.replace("x8"," xby 8")
        s = s.replace("x9"," xby 9")
        s = s.replace("0x","0 xby ")
        s = s.replace("1x","1 xby ")
        s = s.replace("2x","2 xby ")
        s = s.replace("3x","3 xby ")
        s = s.replace("4x","4 xby ")
        s = s.replace("5x","5 xby ")
        s = s.replace("6x","6 xby ")
        s = s.replace("7x","7 xby ")
        s = s.replace("8x","8 xby ")
        s = s.replace("9x","9 xby ")
        
        s = re.sub(r"([0-9])( *)\.( *)([0-9])", r"\1.\4", s)
        s = re.sub(r"([0-9]+)( *)(inches|inch|in|')\.?", r"\1 in. ", s)
        s = re.sub(r"([0-9]+)( *)(foot|feet|ft|'')\.?", r"\1 ft. ", s)
        s = re.sub(r"([0-9]+)( *)(pounds|pound|lbs|lb)\.?", r"\1 lb. ", s)
        s = re.sub(r"([0-9]+)( *)(square|sq) ?\.?(feet|foot|ft)\.?", r"\1sq.ft. ", s)
        s = re.sub(r"([0-9]+)( *)(cubic|cu) ?\.?(feet|foot|ft)\.?", r"\1cu.ft. ", s)
        s = re.sub(r"([0-9]+)( *)(gallons|gallon|gal)\.?", r"\1gal. ", s)
        s = re.sub(r"([0-9]+)( *)(ounces|ounce|oz)\.?", r"\1oz. ", s)
        s = re.sub(r"([0-9]+)( *)(centimeters|cm)\.?", r"\1cm. ", s)
        s = re.sub(r"([0-9]+)( *)(milimeters|mm)\.?", r"\1mm. ", s)
        s = s.replace("°"," degrees ")
        s = re.sub(r"([0-9]+)( *)(degrees|degree)\.?", r"\1deg. ", s)
        s = s.replace(" v "," volts ")
        s = re.sub(r"([0-9]+)( *)(volts|volt)\.?", r"\1volt. ", s)
        s = re.sub(r"([0-9]+)( *)(watts|watt)\.?", r"\1watt. ", s)
        s = re.sub(r"([0-9]+)( *)(amperes|ampere|amps|amp)\.?", r"\1amp. ", s)
        
        s = s.replace("toliet","toilet")
        s = s.replace("airconditioner","air conditioner")
        s = s.replace("vinal","vinyl")
        s = s.replace("vynal","vinyl")
        s = s.replace("skill","skil")
        s = s.replace("snowbl","snow bl")
        s = s.replace("plexigla","plexi gla")
        s = s.replace("rustoleum","rust-oleum")
        s = s.replace("whirpool","whirlpool")
        s = s.replace("whirlpoolga", "whirlpool ga")
        s = s.replace("whirlpoolstainless","whirlpool stainless")
        s = s.replace("whirlpoolstainless","whirlpool stainless")
        s = s.replace('&amp;', '')
        s = s.replace("moulding","molding")

        s = s.replace("  "," ")
        
        s = s.replace("/", " ")
        s = s.replace("-", " ")
        return " ".join([re.sub('[^A-Za-z0-9./]', ' ', word)
                         for word in s.split()
                         if word.isdigit() or len(word)>1])
    else:
        return "null"

@np.vectorize
def stem_sentence(s): 
    s = replaces(s)   
    return str(" ".join([stemmer.stem(word)
                         for word in s.split()
                         if word not in stop_words]))


In [3]:
encoding = "ISO-8859-1"

products = pd.read_csv('zip/product_descriptions.csv.gz', encoding=encoding, compression='gzip',
                       index_col=['product_uid'])
products['orig_descr'] = products['product_description'].astype(str)
products['descr'] = stem_sentence(products['orig_descr'].astype(str))

In [4]:
products = products.drop(columns=['product_description'])
products.to_csv('zip/descr3.csv.gz', compression='gzip')

In [5]:
train = pd.read_csv('zip/train.csv.gz', index_col=['id'], compression='gzip', encoding=encoding)
train.head()

Unnamed: 0_level_0,product_uid,product_title,search_term,relevance
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,100001,Simpson Strong-Tie 12-Gauge Angle,angle bracket,3.0
3,100001,Simpson Strong-Tie 12-Gauge Angle,l bracket,2.5
9,100002,BEHR Premium Textured DeckOver 1-gal. #SC-141 ...,deck over,3.0
16,100005,Delta Vero 1-Handle Shower Only Faucet Trim Ki...,rain shower head,2.33
17,100005,Delta Vero 1-Handle Shower Only Faucet Trim Ki...,shower only faucet,2.67


In [6]:
train['orig_search_term'] = train['search_term'].astype(str)
train['search_term'] = stem_sentence(train['orig_search_term'].astype(str) )
train.loc[train['search_term'] == '', 'search_term'] = 'null'
train['search_term'] = train['search_term'].astype(str)

train['orig_product_title'] = train['product_title'].astype(str)
train['product_title'] = stem_sentence(train['product_title'].astype(str) )
train['product_title'] = train['product_title'].astype(str)

train['orig_descr'] = train.product_uid.map(products['orig_descr'])
train['descr'] = train.product_uid.map(products['descr'])

train.head()

Unnamed: 0_level_0,product_uid,product_title,search_term,relevance,orig_search_term,orig_product_title,orig_descr,descr
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,100001,simpson strong tie 12 gaug angl,angl bracket,3.0,angle bracket,Simpson Strong-Tie 12-Gauge Angle,"Not only do angles make joints stronger, they ...",angl make joint stronger also provid consist s...
3,100001,simpson strong tie 12 gaug angl,bracket,2.5,l bracket,Simpson Strong-Tie 12-Gauge Angle,"Not only do angles make joints stronger, they ...",angl make joint stronger also provid consist s...
9,100002,behr premium textur deckov 1 gal. sc 141 tugbo...,deck,3.0,deck over,BEHR Premium Textured DeckOver 1-gal. #SC-141 ...,BEHR Premium Textured DECKOVER is an innovativ...,behr premium textur deckov innov solid color c...
16,100005,delta vero 1 handl shower faucet trim kit chro...,rain shower head,2.33,rain shower head,Delta Vero 1-Handle Shower Only Faucet Trim Ki...,Update your bathroom with the Delta Vero Singl...,updat bathroom delta vero singl handl shower f...
17,100005,delta vero 1 handl shower faucet trim kit chro...,shower faucet,2.67,shower only faucet,Delta Vero 1-Handle Shower Only Faucet Trim Ki...,Update your bathroom with the Delta Vero Singl...,updat bathroom delta vero singl handl shower f...


In [7]:
train.to_csv('zip/train8.csv.gz', compression='gzip')

In [8]:
from fuzzywuzzy import fuzz


@np.vectorize
def word_match_count(search_term, text):
    count = 0
    search_term = search_term.split()
    text = text.split()
    if len(search_term) == 0:
        return 0
    for word in set(search_term):
        count += 1 if text.count(word) > 0 else 0
    return count / len(search_term)


@np.vectorize
def word_match_std(search_term, text):
    indexes = []
    search_term = search_term.split()
    text = text.split()    
    n = len(text)
    if len(search_term) == 0:
        return 0
    for word in set(search_term):
        if word in text:
            indexes.append(text.index(word))
    if len(indexes) > 0:
        return np.var(indexes)
    else:
        return 0

    
@np.vectorize
def match_last_word(search_term, text):
    last_word = search_term.split()[-1]
    if text.find(last_word) > -1:
        return 1
    else:
        return 0


@np.vectorize    
def word_match_index(search_term, text):
    count = 0
    search_term = search_term.split()
    text = text.split()
    if len(search_term) == 0:
        return 0
    min_ind = len(text)
    for word in set(search_term):
        if word in text:
            min_ind = min(min_ind, text.index(word))
    return min_ind / len(text) 


@np.vectorize 
def match_word_n_pos(text, search_term):
    text = nltk.pos_tag(replaces(text).split())
    search_pos = nltk.pos_tag(replaces(search_term).split())
    text = ["%s/%s" % (word[0].lower(), word[1]) for word in text]
    search_pos = ["%s/%s" % (word[0].lower(), word[1]) for word in search_pos]
    
    if len(search_pos) == 0:
        return 0
    matches = 0
    for word in set(text):
        if word in search_pos:
            matches += 1
    return matches / len(search_pos)


@np.vectorize
def match_first_word(search_term, text):
    last_word = search_term.split()[0]
    if text.find(last_word) > -1:
        return 1
    else:
        return 0

    
@np.vectorize
def match_numbers(search_term, text):
    search_term_numbers = set(re.findall(r'\d+', search_term))
    len_ = len(search_term_numbers)
    if len_ == 0:
        return 0
    text_numbers = set(re.findall(r'\d+', text))
    match = len([1 for i in search_term_numbers if i in text_numbers])
    return match / len_


@np.vectorize
def fuzzy_match(search_term, field):
    if search_term == 'null':
        return 0
    return fuzz.token_set_ratio(search_term, field)


@np.vectorize
def fuzzy_match_partial(search_term, field):
    if search_term == 'null':
        return 0
    return fuzz.partial_token_set_ratio(search_term, field)

In [9]:
attrs = pd.read_csv('zip/attributes.csv.gz', compression='gzip', encoding=encoding)
brand = attrs[attrs['name'] == 'MFG Brand Name']
brand.index = brand['product_uid']
brand['value'] = stem_sentence(brand['value'].astype(str))
brand['value'] = brand['value'].astype(str)
brand.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


Unnamed: 0_level_0,product_uid,name,value
product_uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
100001.0,100001.0,MFG Brand Name,simpson strong tie
100002.0,100002.0,MFG Brand Name,behr premium textur deckov
100003.0,100003.0,MFG Brand Name,sterl
100004.0,100004.0,MFG Brand Name,grape solar
100005.0,100005.0,MFG Brand Name,delta


In [10]:
brand.drop(columns=['product_uid', 'name'])
brand.to_csv('zip/brands8.csv.gz', compression='gzip')

In [11]:
%%time

for col in ['product_title', 'descr']:
    train['match_' + col] = word_match_count(train['search_term'], train[col])
    
train['len_search_term_words'] = train['search_term'].apply(lambda x: len(x.split()))
train['len_search_term_letters'] = train['search_term'].str.len()

train['match_pos_title'] = match_word_n_pos(train['orig_product_title'], train['orig_search_term'])
train['match_pos_descr'] = match_word_n_pos(train['orig_descr'], train['orig_search_term'])

train['prod_title_ind'] = word_match_index(train['search_term'], train['product_title'])
train['descr_ind'] = word_match_index(train['search_term'], train['descr'])

train['words_std_title'] = word_match_std(train['search_term'], train['product_title'])
train['words_std_descr'] = word_match_std(train['search_term'], train['descr'])

train['len_title'] = train['product_title'].apply(lambda x: len(x.split()))
train['len_descr'] = train['descr'].apply(lambda x: len(x.split()))

train['last_word_title'] = match_last_word(train['search_term'], train['product_title'])  
train['last_word_descr'] = match_last_word(train['search_term'], train['descr']) 

train['match_orig_title'] = word_match_count(train['orig_search_term'], train['orig_product_title'])
train['match_orig_descr'] = word_match_count(train['orig_search_term'], train['orig_descr'])

train['fuzzy_title'] = fuzzy_match(train['search_term'], train['product_title'])
train['fuzzy_descr'] = fuzzy_match(train['search_term'], train['descr'])

train['fuzzy_orig_title'] = fuzzy_match(train['orig_search_term'], train['orig_product_title'])
train['fuzzy_orig_descr'] = fuzzy_match(train['orig_search_term'], train['orig_descr'])

train['part_fuzzy_title'] = fuzzy_match_partial(train['search_term'], train['product_title'])
train['part_fuzzy_descr'] = fuzzy_match_partial(train['search_term'], train['descr'])

train['part_fuzzy_orig_title'] = fuzzy_match_partial(train['orig_search_term'], train['orig_product_title'])
train['part_fuzzy_orig_descr'] = fuzzy_match_partial(train['orig_search_term'], train['orig_descr'])

train['first_word_title'] = match_first_word(train['search_term'], train['product_title'])  
train['first_word_descr'] = match_first_word(train['search_term'], train['descr']) 

train['match_numbers_title'] = match_numbers(train['search_term'], train['product_title'])  

train['brand'] = train.product_uid.map(brand['value']).astype(str)
train['brand'] = train['brand'].fillna('').astype(str)
train['fuzzy_brand'] = fuzzy_match_partial(train['brand'], train['search_term'])

CPU times: user 7min 54s, sys: 1.43 s, total: 7min 55s
Wall time: 7min 54s


In [12]:
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_columns', None)

In [13]:
train.to_csv('zip/train8.csv.gz', compression='gzip')
train.head()

Unnamed: 0_level_0,product_uid,product_title,search_term,relevance,orig_search_term,orig_product_title,orig_descr,descr,match_product_title,match_descr,len_search_term_words,len_search_term_letters,match_pos_title,match_pos_descr,prod_title_ind,descr_ind,words_std_title,words_std_descr,len_title,len_descr,last_word_title,last_word_descr,match_orig_title,match_orig_descr,fuzzy_title,fuzzy_descr,fuzzy_orig_title,fuzzy_orig_descr,part_fuzzy_title,part_fuzzy_descr,part_fuzzy_orig_title,part_fuzzy_orig_descr,first_word_title,first_word_descr,match_numbers_title,brand,fuzzy_brand
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
2,100001,simpson strong tie 12 gaug angl,angl bracket,3.0,angle bracket,Simpson Strong-Tie 12-Gauge Angle,"Not only do angles make joints stronger, they also provide more consistent, straight corners. Simpson Strong-Tie offers a wide variety of angles in various sizes and thicknesses to handle light-duty jobs or projects where a structural connection is needed. Some can be bent (skewed) to match the project. For outdoor projects or those where moisture is present, use our ZMAX zinc-coated connectors, which provide extra resistance against corrosion (look for a ""Z"" at the end of the model number).Versatile connector for various 90 connections and home repair projectsStronger than angled nailing or screw fastening aloneHelp ensure joints are consistently straight and strongDimensions: 3 in. x 3 in. x 1-1/2 in.Made from 12-Gauge steelGalvanized for extra corrosion resistanceInstall with 10d common nails or #9 x 1-1/2 in. Strong-Drive SD screws",angl make joint stronger also provid consist straight corners. simpson strong tie offer wide varieti angl various size thick handl light duti job project structur connect needed. bent skew match project. outdoor project moistur present use zmax zinc coat connector provid extra resist corros look z end model number .versatil connector various 90 connect home repair projectsstrong angl nail screw fasten alonehelp ensur joint consist straight strongdimens 3 in. 3 in. 1 1 2 in. made 12 gaug steelgalvan extra corros resistanceinstal 10d common nail 9 1 1 2 in. strong drive sd screw,0.5,0.5,2,12,0.5,0.0,0.833333,0.0,0.0,0.0,6,94,0,0,0.0,0.0,50,50,56,4,100,100,100,62,1,1,0,simpson strong tie,33
3,100001,simpson strong tie 12 gaug angl,bracket,2.5,l bracket,Simpson Strong-Tie 12-Gauge Angle,"Not only do angles make joints stronger, they also provide more consistent, straight corners. Simpson Strong-Tie offers a wide variety of angles in various sizes and thicknesses to handle light-duty jobs or projects where a structural connection is needed. Some can be bent (skewed) to match the project. For outdoor projects or those where moisture is present, use our ZMAX zinc-coated connectors, which provide extra resistance against corrosion (look for a ""Z"" at the end of the model number).Versatile connector for various 90 connections and home repair projectsStronger than angled nailing or screw fastening aloneHelp ensure joints are consistently straight and strongDimensions: 3 in. x 3 in. x 1-1/2 in.Made from 12-Gauge steelGalvanized for extra corrosion resistanceInstall with 10d common nails or #9 x 1-1/2 in. Strong-Drive SD screws",angl make joint stronger also provid consist straight corners. simpson strong tie offer wide varieti angl various size thick handl light duti job project structur connect needed. bent skew match project. outdoor project moistur present use zmax zinc coat connector provid extra resist corros look z end model number .versatil connector various 90 connect home repair projectsstrong angl nail screw fasten alonehelp ensur joint consist straight strongdimens 3 in. 3 in. 1 1 2 in. made 12 gaug steelgalvan extra corros resistanceinstal 10d common nail 9 1 1 2 in. strong drive sd screw,0.0,0.0,1,7,0.0,0.0,1.0,1.0,0.0,0.0,6,94,0,0,0.0,0.0,11,3,19,3,14,43,33,44,0,0,0,simpson strong tie,29
9,100002,behr premium textur deckov 1 gal. sc 141 tugboat wood concret coat,deck,3.0,deck over,BEHR Premium Textured DeckOver 1-gal. #SC-141 Tugboat Wood and Concrete Coating,"BEHR Premium Textured DECKOVER is an innovative solid color coating. It will bring your old, weathered wood or concrete back to life. The advanced 100% acrylic resin formula creates a durable coating for your tired and worn out deck, rejuvenating to a whole new look. For the best results, be sure to properly prepare the surface using other applicable BEHR products displayed above.California residents: see&nbsp;Proposition 65 informationRevives wood and composite decks, railings, porches and boat docks, also great for concrete pool decks, patios and sidewalks100% acrylic solid color coatingResists cracking and peeling and conceals splinters and cracks up to 1/4 in.Provides a durable, mildew resistant finishCovers up to 75 sq. ft. in 2 coats per gallonCreates a textured, slip-resistant finishFor best results, prepare with the appropriate BEHR product for your wood or concrete surfaceActual paint colors may vary from on-screen and printer representationsColors available to be tinted in most storesOnline Price includes Paint Care fee in the following states: CA, CO, CT, ME, MN, OR, RI, VT",behr premium textur deckov innov solid color coating. bring old weather wood concret back life. advanc 100 acryl resin formula creat durabl coat tire worn deck rejuven whole new look. best result sure proper prepar surfac use applic behr product display above.california resid see nbsp proposit 65 in. formationrev wood composit deck rail porch boat dock also great concret pool deck patio sidewalks100 acryl solid color coatingresist crack peel conceal splinter crack 1 4 in. provid durabl mildew resist finishcov 75 sq. ft. 2 coat per galloncr textur slip resist finishfor best result prepar appropri behr product wood concret surfaceactu paint color may vari screen printer representationscolor avail tint storesonlin price includ paint care fee follow state ca co ct mn ri vt,0.0,1.0,1,4,0.0,0.5,1.0,0.203252,0.0,0.0,12,123,1,1,0.0,0.0,12,100,19,62,100,100,89,100,1,1,0,behr premium textur deckov,100
16,100005,delta vero 1 handl shower faucet trim kit chrome valv includ,rain shower head,2.33,rain shower head,Delta Vero 1-Handle Shower Only Faucet Trim Kit in Chrome (Valve Not Included),"Update your bathroom with the Delta Vero Single-Handle Shower Faucet Trim Kit in Chrome. It has a sleek, modern and minimalistic aesthetic. The MultiChoice universal valve keeps the water temperature within +/-3 degrees Fahrenheit to help prevent scalding.California residents: see&nbsp;Proposition 65 informationIncludes the trim kit only, the rough-in kit (R10000-UNBX) is sold separatelyIncludes the handleMaintains a balanced pressure of hot and cold water even when a valve is turned on or off elsewhere in the systemDue to WaterSense regulations in the state of New York, please confirm your shipping zip code is not restricted from use of items that do not meet WaterSense qualifications",updat bathroom delta vero singl handl shower faucet trim kit chrome. sleek modern minimalist aesthetic. multichoic univers valv keep water temperatur within 3deg. fahrenheit help prevent scalding.california resid see nbsp proposit 65 in. formationinclud trim kit rough kit r10000 unbx sold separatelyinclud handlemaintain balanc pressur hot cold water even valv turn elsewher systemdu watersens regul state new york pleas confirm ship zip code restrict use item meet watersens qualif,0.333333,0.333333,3,16,0.333333,0.0,0.363636,0.086957,0.0,0.0,11,69,0,0,0.0,0.0,55,55,55,55,100,100,100,100,0,0,0,delta,40
17,100005,delta vero 1 handl shower faucet trim kit chrome valv includ,shower faucet,2.67,shower only faucet,Delta Vero 1-Handle Shower Only Faucet Trim Kit in Chrome (Valve Not Included),"Update your bathroom with the Delta Vero Single-Handle Shower Faucet Trim Kit in Chrome. It has a sleek, modern and minimalistic aesthetic. The MultiChoice universal valve keeps the water temperature within +/-3 degrees Fahrenheit to help prevent scalding.California residents: see&nbsp;Proposition 65 informationIncludes the trim kit only, the rough-in kit (R10000-UNBX) is sold separatelyIncludes the handleMaintains a balanced pressure of hot and cold water even when a valve is turned on or off elsewhere in the systemDue to WaterSense regulations in the state of New York, please confirm your shipping zip code is not restricted from use of items that do not meet WaterSense qualifications",updat bathroom delta vero singl handl shower faucet trim kit chrome. sleek modern minimalist aesthetic. multichoic univers valv keep water temperatur within 3deg. fahrenheit help prevent scalding.california resid see nbsp proposit 65 in. formationinclud trim kit rough kit r10000 unbx sold separatelyinclud handlemaintain balanc pressur hot cold water even valv turn elsewher systemdu watersens regul state new york pleas confirm ship zip code restrict use item meet watersens qualif,1.0,1.0,2,13,0.666667,0.333333,0.363636,0.086957,0.25,0.25,11,69,1,1,0.0,0.0,100,100,100,100,100,100,100,100,1,1,0,delta,40


Same procedure for the test dataset

In [14]:
test = pd.read_csv('zip/test.csv.gz', index_col=['id'], compression='gzip', encoding=encoding)
test.head()

Unnamed: 0_level_0,product_uid,product_title,search_term
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,100001,Simpson Strong-Tie 12-Gauge Angle,90 degree bracket
4,100001,Simpson Strong-Tie 12-Gauge Angle,metal l brackets
5,100001,Simpson Strong-Tie 12-Gauge Angle,simpson sku able
6,100001,Simpson Strong-Tie 12-Gauge Angle,simpson strong ties
7,100001,Simpson Strong-Tie 12-Gauge Angle,simpson strong tie hcc668


In [15]:
test['orig_search_term'] = test['search_term'].astype(str)
test['search_term'] = stem_sentence(test['orig_search_term'].astype(str) )
test.loc[test['search_term'] == '', 'search_term'] = 'null'
test['search_term'] = test['search_term'].astype(str)

test['orig_product_title'] = test['product_title'].astype(str)
test['product_title'] = stem_sentence(test['product_title'].astype(str) )
test['product_title'] = test['product_title'].astype(str)

test['orig_descr'] = test.product_uid.map(products['orig_descr'])
test['descr'] = test.product_uid.map(products['descr'])

test.head()

Unnamed: 0_level_0,product_uid,product_title,search_term,orig_search_term,orig_product_title,orig_descr,descr
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,100001,simpson strong tie 12 gaug angl,90deg. bracket,90 degree bracket,Simpson Strong-Tie 12-Gauge Angle,"Not only do angles make joints stronger, they also provide more consistent, straight corners. Simpson Strong-Tie offers a wide variety of angles in various sizes and thicknesses to handle light-duty jobs or projects where a structural connection is needed. Some can be bent (skewed) to match the project. For outdoor projects or those where moisture is present, use our ZMAX zinc-coated connectors, which provide extra resistance against corrosion (look for a ""Z"" at the end of the model number).Versatile connector for various 90 connections and home repair projectsStronger than angled nailing or screw fastening aloneHelp ensure joints are consistently straight and strongDimensions: 3 in. x 3 in. x 1-1/2 in.Made from 12-Gauge steelGalvanized for extra corrosion resistanceInstall with 10d common nails or #9 x 1-1/2 in. Strong-Drive SD screws",angl make joint stronger also provid consist straight corners. simpson strong tie offer wide varieti angl various size thick handl light duti job project structur connect needed. bent skew match project. outdoor project moistur present use zmax zinc coat connector provid extra resist corros look z end model number .versatil connector various 90 connect home repair projectsstrong angl nail screw fasten alonehelp ensur joint consist straight strongdimens 3 in. 3 in. 1 1 2 in. made 12 gaug steelgalvan extra corros resistanceinstal 10d common nail 9 1 1 2 in. strong drive sd screw
4,100001,simpson strong tie 12 gaug angl,metal bracket,metal l brackets,Simpson Strong-Tie 12-Gauge Angle,"Not only do angles make joints stronger, they also provide more consistent, straight corners. Simpson Strong-Tie offers a wide variety of angles in various sizes and thicknesses to handle light-duty jobs or projects where a structural connection is needed. Some can be bent (skewed) to match the project. For outdoor projects or those where moisture is present, use our ZMAX zinc-coated connectors, which provide extra resistance against corrosion (look for a ""Z"" at the end of the model number).Versatile connector for various 90 connections and home repair projectsStronger than angled nailing or screw fastening aloneHelp ensure joints are consistently straight and strongDimensions: 3 in. x 3 in. x 1-1/2 in.Made from 12-Gauge steelGalvanized for extra corrosion resistanceInstall with 10d common nails or #9 x 1-1/2 in. Strong-Drive SD screws",angl make joint stronger also provid consist straight corners. simpson strong tie offer wide varieti angl various size thick handl light duti job project structur connect needed. bent skew match project. outdoor project moistur present use zmax zinc coat connector provid extra resist corros look z end model number .versatil connector various 90 connect home repair projectsstrong angl nail screw fasten alonehelp ensur joint consist straight strongdimens 3 in. 3 in. 1 1 2 in. made 12 gaug steelgalvan extra corros resistanceinstal 10d common nail 9 1 1 2 in. strong drive sd screw
5,100001,simpson strong tie 12 gaug angl,simpson sku abl,simpson sku able,Simpson Strong-Tie 12-Gauge Angle,"Not only do angles make joints stronger, they also provide more consistent, straight corners. Simpson Strong-Tie offers a wide variety of angles in various sizes and thicknesses to handle light-duty jobs or projects where a structural connection is needed. Some can be bent (skewed) to match the project. For outdoor projects or those where moisture is present, use our ZMAX zinc-coated connectors, which provide extra resistance against corrosion (look for a ""Z"" at the end of the model number).Versatile connector for various 90 connections and home repair projectsStronger than angled nailing or screw fastening aloneHelp ensure joints are consistently straight and strongDimensions: 3 in. x 3 in. x 1-1/2 in.Made from 12-Gauge steelGalvanized for extra corrosion resistanceInstall with 10d common nails or #9 x 1-1/2 in. Strong-Drive SD screws",angl make joint stronger also provid consist straight corners. simpson strong tie offer wide varieti angl various size thick handl light duti job project structur connect needed. bent skew match project. outdoor project moistur present use zmax zinc coat connector provid extra resist corros look z end model number .versatil connector various 90 connect home repair projectsstrong angl nail screw fasten alonehelp ensur joint consist straight strongdimens 3 in. 3 in. 1 1 2 in. made 12 gaug steelgalvan extra corros resistanceinstal 10d common nail 9 1 1 2 in. strong drive sd screw
6,100001,simpson strong tie 12 gaug angl,simpson strong tie,simpson strong ties,Simpson Strong-Tie 12-Gauge Angle,"Not only do angles make joints stronger, they also provide more consistent, straight corners. Simpson Strong-Tie offers a wide variety of angles in various sizes and thicknesses to handle light-duty jobs or projects where a structural connection is needed. Some can be bent (skewed) to match the project. For outdoor projects or those where moisture is present, use our ZMAX zinc-coated connectors, which provide extra resistance against corrosion (look for a ""Z"" at the end of the model number).Versatile connector for various 90 connections and home repair projectsStronger than angled nailing or screw fastening aloneHelp ensure joints are consistently straight and strongDimensions: 3 in. x 3 in. x 1-1/2 in.Made from 12-Gauge steelGalvanized for extra corrosion resistanceInstall with 10d common nails or #9 x 1-1/2 in. Strong-Drive SD screws",angl make joint stronger also provid consist straight corners. simpson strong tie offer wide varieti angl various size thick handl light duti job project structur connect needed. bent skew match project. outdoor project moistur present use zmax zinc coat connector provid extra resist corros look z end model number .versatil connector various 90 connect home repair projectsstrong angl nail screw fasten alonehelp ensur joint consist straight strongdimens 3 in. 3 in. 1 1 2 in. made 12 gaug steelgalvan extra corros resistanceinstal 10d common nail 9 1 1 2 in. strong drive sd screw
7,100001,simpson strong tie 12 gaug angl,simpson strong tie hcc668,simpson strong tie hcc668,Simpson Strong-Tie 12-Gauge Angle,"Not only do angles make joints stronger, they also provide more consistent, straight corners. Simpson Strong-Tie offers a wide variety of angles in various sizes and thicknesses to handle light-duty jobs or projects where a structural connection is needed. Some can be bent (skewed) to match the project. For outdoor projects or those where moisture is present, use our ZMAX zinc-coated connectors, which provide extra resistance against corrosion (look for a ""Z"" at the end of the model number).Versatile connector for various 90 connections and home repair projectsStronger than angled nailing or screw fastening aloneHelp ensure joints are consistently straight and strongDimensions: 3 in. x 3 in. x 1-1/2 in.Made from 12-Gauge steelGalvanized for extra corrosion resistanceInstall with 10d common nails or #9 x 1-1/2 in. Strong-Drive SD screws",angl make joint stronger also provid consist straight corners. simpson strong tie offer wide varieti angl various size thick handl light duti job project structur connect needed. bent skew match project. outdoor project moistur present use zmax zinc coat connector provid extra resist corros look z end model number .versatil connector various 90 connect home repair projectsstrong angl nail screw fasten alonehelp ensur joint consist straight strongdimens 3 in. 3 in. 1 1 2 in. made 12 gaug steelgalvan extra corros resistanceinstal 10d common nail 9 1 1 2 in. strong drive sd screw


In [16]:
%%time

for col in ['product_title', 'descr']:
    test['match_' + col] = word_match_count(test['search_term'], test[col])
    
test['len_search_term_words'] = test['search_term'].apply(lambda x: len(x.split()))
test['len_search_term_letters'] = test['search_term'].str.len()

test['match_pos_title'] = match_word_n_pos(test['orig_product_title'], test['orig_search_term'])
test['match_pos_descr'] = match_word_n_pos(test['orig_descr'], test['orig_search_term'])

test['prod_title_ind'] = word_match_index(test['search_term'], test['product_title'])
test['descr_ind'] = word_match_index(test['search_term'], test['descr'])

test['words_std_title'] = word_match_std(test['search_term'], test['product_title'])
test['words_std_descr'] = word_match_std(test['search_term'], test['descr'])

test['len_title'] = test['product_title'].apply(lambda x: len(x.split()))
test['len_descr'] = test['descr'].apply(lambda x: len(x.split()))

test['last_word_title'] = match_last_word(test['search_term'], test['product_title'])  
test['last_word_descr'] = match_last_word(test['search_term'], test['descr']) 

test['match_orig_title'] = word_match_count(test['orig_search_term'], test['orig_product_title'])
test['match_orig_descr'] = word_match_count(test['orig_search_term'], test['orig_descr'])

test['fuzzy_title'] = fuzzy_match(test['search_term'], test['product_title'])
test['fuzzy_descr'] = fuzzy_match(test['search_term'], test['descr'])

test['fuzzy_orig_title'] = fuzzy_match(test['orig_search_term'], test['orig_product_title'])
test['fuzzy_orig_descr'] = fuzzy_match(test['orig_search_term'], test['orig_descr'])

test['part_fuzzy_title'] = fuzzy_match_partial(test['search_term'], test['product_title'])
test['part_fuzzy_descr'] = fuzzy_match_partial(test['search_term'], test['descr'])

test['part_fuzzy_orig_title'] = fuzzy_match_partial(test['orig_search_term'], test['orig_product_title'])
test['part_fuzzy_orig_descr'] = fuzzy_match_partial(test['orig_search_term'], test['orig_descr'])

test['first_word_title'] = match_first_word(test['search_term'], test['product_title'])  
test['first_word_descr'] = match_first_word(test['search_term'], test['descr']) 

test['match_numbers_title'] = match_numbers(test['search_term'], test['product_title'])  

test['brand'] = test.product_uid.map(brand['value']).astype(str)
test['brand'] = test['brand'].fillna('').astype(str)
test['fuzzy_brand'] = fuzzy_match_partial(test['brand'], test['search_term'])

CPU times: user 17min 40s, sys: 4.25 s, total: 17min 44s
Wall time: 17min 43s


In [17]:
test.to_csv('zip/test8.csv.gz', compression='gzip')
test.head()

Unnamed: 0_level_0,product_uid,product_title,search_term,orig_search_term,orig_product_title,orig_descr,descr,match_product_title,match_descr,len_search_term_words,len_search_term_letters,match_pos_title,match_pos_descr,prod_title_ind,descr_ind,words_std_title,words_std_descr,len_title,len_descr,last_word_title,last_word_descr,match_orig_title,match_orig_descr,fuzzy_title,fuzzy_descr,fuzzy_orig_title,fuzzy_orig_descr,part_fuzzy_title,part_fuzzy_descr,part_fuzzy_orig_title,part_fuzzy_orig_descr,first_word_title,first_word_descr,match_numbers_title,brand,fuzzy_brand
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1
1,100001,simpson strong tie 12 gaug angl,90deg. bracket,90 degree bracket,Simpson Strong-Tie 12-Gauge Angle,"Not only do angles make joints stronger, they also provide more consistent, straight corners. Simpson Strong-Tie offers a wide variety of angles in various sizes and thicknesses to handle light-duty jobs or projects where a structural connection is needed. Some can be bent (skewed) to match the project. For outdoor projects or those where moisture is present, use our ZMAX zinc-coated connectors, which provide extra resistance against corrosion (look for a ""Z"" at the end of the model number).Versatile connector for various 90 connections and home repair projectsStronger than angled nailing or screw fastening aloneHelp ensure joints are consistently straight and strongDimensions: 3 in. x 3 in. x 1-1/2 in.Made from 12-Gauge steelGalvanized for extra corrosion resistanceInstall with 10d common nails or #9 x 1-1/2 in. Strong-Drive SD screws",angl make joint stronger also provid consist straight corners. simpson strong tie offer wide varieti angl various size thick handl light duti job project structur connect needed. bent skew match project. outdoor project moistur present use zmax zinc coat connector provid extra resist corros look z end model number .versatil connector various 90 connect home repair projectsstrong angl nail screw fasten alonehelp ensur joint consist straight strongdimens 3 in. 3 in. 1 1 2 in. made 12 gaug steelgalvan extra corros resistanceinstal 10d common nail 9 1 1 2 in. strong drive sd screw,0.0,0.0,2,14,0.0,0.0,1.0,1.0,0,0,6,94,0,0,0.0,0.333333,18,6,28,21,23,38,35,100,0,0,0.0,simpson strong tie,23
4,100001,simpson strong tie 12 gaug angl,metal bracket,metal l brackets,Simpson Strong-Tie 12-Gauge Angle,"Not only do angles make joints stronger, they also provide more consistent, straight corners. Simpson Strong-Tie offers a wide variety of angles in various sizes and thicknesses to handle light-duty jobs or projects where a structural connection is needed. Some can be bent (skewed) to match the project. For outdoor projects or those where moisture is present, use our ZMAX zinc-coated connectors, which provide extra resistance against corrosion (look for a ""Z"" at the end of the model number).Versatile connector for various 90 connections and home repair projectsStronger than angled nailing or screw fastening aloneHelp ensure joints are consistently straight and strongDimensions: 3 in. x 3 in. x 1-1/2 in.Made from 12-Gauge steelGalvanized for extra corrosion resistanceInstall with 10d common nails or #9 x 1-1/2 in. Strong-Drive SD screws",angl make joint stronger also provid consist straight corners. simpson strong tie offer wide varieti angl various size thick handl light duti job project structur connect needed. bent skew match project. outdoor project moistur present use zmax zinc coat connector provid extra resist corros look z end model number .versatil connector various 90 connect home repair projectsstrong angl nail screw fasten alonehelp ensur joint consist straight strongdimens 3 in. 3 in. 1 1 2 in. made 12 gaug steelgalvan extra corros resistanceinstal 10d common nail 9 1 1 2 in. strong drive sd screw,0.0,0.0,2,13,0.0,0.0,1.0,1.0,0,0,6,94,0,0,0.0,0.0,18,6,24,4,23,46,25,50,0,0,0.0,simpson strong tie,23
5,100001,simpson strong tie 12 gaug angl,simpson sku abl,simpson sku able,Simpson Strong-Tie 12-Gauge Angle,"Not only do angles make joints stronger, they also provide more consistent, straight corners. Simpson Strong-Tie offers a wide variety of angles in various sizes and thicknesses to handle light-duty jobs or projects where a structural connection is needed. Some can be bent (skewed) to match the project. For outdoor projects or those where moisture is present, use our ZMAX zinc-coated connectors, which provide extra resistance against corrosion (look for a ""Z"" at the end of the model number).Versatile connector for various 90 connections and home repair projectsStronger than angled nailing or screw fastening aloneHelp ensure joints are consistently straight and strongDimensions: 3 in. x 3 in. x 1-1/2 in.Made from 12-Gauge steelGalvanized for extra corrosion resistanceInstall with 10d common nails or #9 x 1-1/2 in. Strong-Drive SD screws",angl make joint stronger also provid consist straight corners. simpson strong tie offer wide varieti angl various size thick handl light duti job project structur connect needed. bent skew match project. outdoor project moistur present use zmax zinc coat connector provid extra resist corros look z end model number .versatil connector various 90 connect home repair projectsstrong angl nail screw fasten alonehelp ensur joint consist straight strongdimens 3 in. 3 in. 1 1 2 in. made 12 gaug steelgalvan extra corros resistanceinstal 10d common nail 9 1 1 2 in. strong drive sd screw,0.333333,0.333333,3,15,0.333333,0.333333,0.0,0.095745,0,0,6,94,0,0,0.0,0.0,64,64,61,61,100,100,100,100,1,1,0.0,simpson strong tie,100
6,100001,simpson strong tie 12 gaug angl,simpson strong tie,simpson strong ties,Simpson Strong-Tie 12-Gauge Angle,"Not only do angles make joints stronger, they also provide more consistent, straight corners. Simpson Strong-Tie offers a wide variety of angles in various sizes and thicknesses to handle light-duty jobs or projects where a structural connection is needed. Some can be bent (skewed) to match the project. For outdoor projects or those where moisture is present, use our ZMAX zinc-coated connectors, which provide extra resistance against corrosion (look for a ""Z"" at the end of the model number).Versatile connector for various 90 connections and home repair projectsStronger than angled nailing or screw fastening aloneHelp ensure joints are consistently straight and strongDimensions: 3 in. x 3 in. x 1-1/2 in.Made from 12-Gauge steelGalvanized for extra corrosion resistanceInstall with 10d common nails or #9 x 1-1/2 in. Strong-Drive SD screws",angl make joint stronger also provid consist straight corners. simpson strong tie offer wide varieti angl various size thick handl light duti job project structur connect needed. bent skew match project. outdoor project moistur present use zmax zinc coat connector provid extra resist corros look z end model number .versatil connector various 90 connect home repair projectsstrong angl nail screw fasten alonehelp ensur joint consist straight strongdimens 3 in. 3 in. 1 1 2 in. made 12 gaug steelgalvan extra corros resistanceinstal 10d common nail 9 1 1 2 in. strong drive sd screw,1.0,1.0,3,18,0.666667,0.666667,0.0,0.095745,0,0,6,94,1,1,0.0,0.0,100,100,85,85,100,100,100,100,1,1,0.0,simpson strong tie,100
7,100001,simpson strong tie 12 gaug angl,simpson strong tie hcc668,simpson strong tie hcc668,Simpson Strong-Tie 12-Gauge Angle,"Not only do angles make joints stronger, they also provide more consistent, straight corners. Simpson Strong-Tie offers a wide variety of angles in various sizes and thicknesses to handle light-duty jobs or projects where a structural connection is needed. Some can be bent (skewed) to match the project. For outdoor projects or those where moisture is present, use our ZMAX zinc-coated connectors, which provide extra resistance against corrosion (look for a ""Z"" at the end of the model number).Versatile connector for various 90 connections and home repair projectsStronger than angled nailing or screw fastening aloneHelp ensure joints are consistently straight and strongDimensions: 3 in. x 3 in. x 1-1/2 in.Made from 12-Gauge steelGalvanized for extra corrosion resistanceInstall with 10d common nails or #9 x 1-1/2 in. Strong-Drive SD screws",angl make joint stronger also provid consist straight corners. simpson strong tie offer wide varieti angl various size thick handl light duti job project structur connect needed. bent skew match project. outdoor project moistur present use zmax zinc coat connector provid extra resist corros look z end model number .versatil connector various 90 connect home repair projectsstrong angl nail screw fasten alonehelp ensur joint consist straight strongdimens 3 in. 3 in. 1 1 2 in. made 12 gaug steelgalvan extra corros resistanceinstal 10d common nail 9 1 1 2 in. strong drive sd screw,0.75,0.75,4,25,0.5,0.75,0.0,0.095745,0,0,6,94,0,0,0.0,0.0,84,84,84,84,100,100,100,100,1,1,0.0,simpson strong tie,100


Проверим score на данных

In [18]:
import lightgbm as lgb
from sklearn.model_selection import GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline, FeatureUnion

class cust_regression_vals(BaseEstimator, TransformerMixin):
    def fit(self, x, y=None):
        return self
    def transform(self, train):
        return train[tree_cols].values


class cust_txt_col(BaseEstimator, TransformerMixin):
    def __init__(self, col):
        self.col = col
    def fit(self, x, y=None):
        return self
    def transform(self, dataset):
        return dataset[self.col].apply(str)

def fitter(tsvd1=80, tsvd2=80, tsvd3=60, tsvd4=30):
    clf = Pipeline([
    ('union', FeatureUnion(
                transformer_list = [
                    ('cst',  cust_regression_vals()),  
                    ('txt1', Pipeline([('s1', cust_txt_col(col='search_term')),
                                       ('tfidf1', TfidfVectorizer(stop_words='english')),
                                       ('tsvd1', TruncatedSVD(n_components=tsvd1))])),
                    ('txt2', Pipeline([('s2', cust_txt_col(col='product_title')),
                                       ('tfidf2', TfidfVectorizer(stop_words='english')),
                                       ('tsvd2', TruncatedSVD(n_components=tsvd2))])),
                    ('txt3', Pipeline([('s3', cust_txt_col(col='descr')),
                                       ('tfidf3', TfidfVectorizer(stop_words='english')),
                                       ('tsvd3', TruncatedSVD(n_components=tsvd3))])),
                    ('txt4', Pipeline([('s4', cust_txt_col(col='brand')),
                                       ('tfidf4', TfidfVectorizer(stop_words='english')),
                                       ('tsvd4', TruncatedSVD(n_components=tsvd4))]))
                    ]
            ))])
    clf.fit(train[use_cols])
    return clf

def write_submission(model, test_data):
    y_pred = model.predict(test_data)
    y_pred[y_pred>3.] = 3.
    y_pred[y_pred<1.] = 1.
    results = pd.DataFrame({'id':test.index.values, 'relevance':y_pred})
    results.to_csv('lgbm5.csv', header=True, index=False)

In [19]:
tree_cols = ['match_product_title', 'match_descr',
             'len_search_term_words', 'len_search_term_letters',
             'match_pos_title', 'match_pos_descr',
             'prod_title_ind', 'descr_ind',
             'words_std_title', 'words_std_descr',
             'len_title', 'len_descr',
             'last_word_title', 'last_word_descr',
             'match_orig_title', 'match_orig_descr',
             'fuzzy_title', 'fuzzy_descr', 'fuzzy_orig_title', 'fuzzy_orig_descr',
             'part_fuzzy_title', 'part_fuzzy_descr', 'part_fuzzy_orig_title', 'part_fuzzy_orig_descr',
             'first_word_title', 'first_word_descr',
             'match_numbers_title',
             'fuzzy_brand']


use_cols = tree_cols + ['search_term', 'product_title', 'descr', 'brand']

In [20]:
y = train['relevance'].values

In [21]:
clf = fitter()
X_train = clf.transform(train[use_cols])
X_test = clf.transform(test[use_cols])

gbm = lgb.LGBMRegressor(learning_rate=0.03, colsample_bytree=0.7, num_leaves=80, n_estimators=1000)
gbm.fit(X_train, y)

tc_len = len(tree_cols)
print(dict(zip(tree_cols, gbm.feature_importances_[:tc_len]) ))

write_submission(gbm, X_test)

{'match_product_title': 296, 'match_descr': 140, 'len_search_term_words': 251, 'len_search_term_letters': 691, 'match_pos_title': 143, 'match_pos_descr': 175, 'prod_title_ind': 681, 'descr_ind': 486, 'words_std_title': 222, 'words_std_descr': 309, 'len_title': 423, 'len_descr': 375, 'last_word_title': 91, 'last_word_descr': 66, 'match_orig_title': 108, 'match_orig_descr': 145, 'fuzzy_title': 576, 'fuzzy_descr': 435, 'fuzzy_orig_title': 598, 'fuzzy_orig_descr': 393, 'part_fuzzy_title': 183, 'part_fuzzy_descr': 105, 'part_fuzzy_orig_title': 178, 'part_fuzzy_orig_descr': 85, 'first_word_title': 80, 'first_word_descr': 53, 'match_numbers_title': 119, 'fuzzy_brand': 308}


0.46838

In [22]:
train['pred_diff'] = train['relevance'] - gbm.predict(X_train)
train.sort_values(['pred_diff'], ascending=False)[:50]

Unnamed: 0_level_0,product_uid,product_title,search_term,relevance,orig_search_term,orig_product_title,orig_descr,descr,match_product_title,match_descr,len_search_term_words,len_search_term_letters,match_pos_title,match_pos_descr,prod_title_ind,descr_ind,words_std_title,words_std_descr,len_title,len_descr,last_word_title,last_word_descr,match_orig_title,match_orig_descr,fuzzy_title,fuzzy_descr,fuzzy_orig_title,fuzzy_orig_descr,part_fuzzy_title,part_fuzzy_descr,part_fuzzy_orig_title,part_fuzzy_orig_descr,first_word_title,first_word_descr,match_numbers_title,brand,fuzzy_brand,pred_diff
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1
207064,194599,home legend distress lennox hickori 3 8 in. thick 2 1 8 in. wide 78 in. length hardwood carpet reduc mold,lennox,3.0,ennox,Home Legend Distressed Lennox Hickory 3/8 in. Thick x 2-1/8 in. Wide x 78 in. Length Hardwood Carpet Reducer Molding,"78 in. length, hardwood veneer carpet reducer is an overlap molding. It is used to provide a smooth and safe transition between floor coverings of different heights and constructions. It can also be used to as borders around fire place hearth and at sliding door threshold. The prefinished molding can be nailed or glued with its coordinating style of floor.California residents: see&nbsp;Proposition 65 informationWood Species: Hickory3/8 in. Thickness x 2-1/8 in. Width x 78 in. Length MoldingItem coordinates with 3/8 in. thick hardwood flooringConstruction: High Density Fiberboard (HDF) MoldingPrefinished Lennox Hickory, Dark ColorJanka wood hardness rating/resistance to denting: 1820Appropriate Grade for Installation: All GradesInstallation: GlueResidential useAll online orders for this item ship via parcel ground and may arrive in multiple boxes",78 in. length hardwood veneer carpet reduc overlap molding. use provid smooth safe transit floor cover differ height constructions. also use border around fire place hearth slide door threshold. prefinish mold nail glu coordin style floor.california resid see nbsp proposit 65 in. formationwood speci hickory3 8 in. thick 2 1 8 in. width 78 in. length moldingitem coordin 3 8 in. thick hardwood flooringconstruct high densiti fiberboard hdf moldingprefinish lennox hickori dark colorjanka wood hard rate resist dent 1820appropri grade instal gradesinstal glueresidenti useal onlin order item ship via parcel ground may arriv multipl box,1.0,1.0,1,6,0.0,0.0,0.142857,0.726316,0.0,0.0,21,95,1,1,0.0,0.0,100,100,9,1,100,100,100,40,1,1,0,home legend,50,1.109605
203914,192088,behr premium plus 600e 2 harbor mist zero voc interior paint,locit 2 plus 1,3.0,locite 2 plus 1,BEHR Premium Plus #600E-2 Harbor Mist Zero VOC Interior Paint,"BEHR Premium Plus Zero VOC, Self-Priming Interior Flat is 100% acrylic and provides a mildew resistant finish. This easy-clean flat provides excellent touch-up, ultimate durability and exceptional hide. The finish displays a non-reflective, matte appearance.California residents: see&nbsp;Proposition 65 informationIdeal for family rooms, living rooms, dining rooms, bedrooms and ceilings100% acrylic finish for easy cleaningTough and durable matte finishExceptional hide, 1 can covers up to 400 sq. ft. depending on color selected and surface porosityEasy soap and water clean-upExceptional latex formula provides a mildew resistant finishSelf priming finish that seals the surfaceActual paint colors may vary from on-screen and printer representationsOnline Price includes Paint Care fee in the following states: CA, CO, CT, ME, MN, OR, RI, VT",behr premium plus zero voc self prime interior flat 100 acryl provid mildew resist finish. easi clean flat provid excel touch ultim durabl except hide. finish display non reflect matt appearance.california resid see nbsp proposit 65 in. formationid famili room live room dine room bedroom ceilings100 acryl finish easi cleaningtough durabl matt finishexcept hide 1 cover 400 sq. ft. depend color select surfac porosityeasi soap water clean upexcept latex formula provid mildew resist finishself prime finish seal surfaceactu paint color may vari screen printer representationsonlin price includ paint care fee follow state ca co ct mn ri vt,0.5,0.5,4,14,0.5,0.5,0.181818,0.020408,1.0,676.0,11,98,0,1,0.0,0.25,60,60,57,57,100,100,100,100,0,0,0,behr premium plus,100,1.107707
185512,178376,behr premium plus ultra 1 gal. p460 5 fiji semi gloss enamel exterior paint,fija,3.0,fija,BEHR Premium Plus Ultra 1-gal. #P460-5 Fiji Semi-Gloss Enamel Exterior Paint,"For a perfect pop of color that will give your home some serious curb appeal, choose BEHR Premium Plus Ultra Semi-Gloss Enamel Exterior Paint. This radiant sheen will create a sleek appearance on your home's exterior accents and is ideal for trim, shutters and doors, including garage doors, as well as windows and outdoor furniture. It is formulated to withstand wear and will also resist moisture, fading and stains.California residents: see&nbsp;Proposition 65 informationSuitable for application on wood, vinyl, stucco, aluminum, metals, steel, doors, windows, trim, shutters, garage doors, outdoor furniture, railing and wrought iron100% acrylic base provides exceptional adhesion and durabilityExtra-protective shell guards against damage caused by UV rays and severe weather conditions for a long-lasting freshly painted look with minimal maintenanceEnhanced mildew resistant finishExceptional hide, 1 can covers up to 400 sq. ft. depending on color selected and surface porositySemi-gloss enamel sheen offers a radiant, sleek appearanceFormulated for easy clean-up with soap and waterActual paint colors may vary from on-screen and printer representationsOnline Price includes Paint Care fee in the following states: CA, CO, CT, ME, MN, OR, RI, VT",perfect pop color give home serious curb appeal choos behr premium plus ultra semi gloss enamel exterior paint. radiant sheen creat sleek appear home exterior accent ideal trim shutter door includ garag door well window outdoor furniture. formul withstand wear also resist moistur fade stains.california resid see nbsp proposit 65 in. formationsuit applic wood vinyl stucco aluminum metal steel door window trim shutter garag door outdoor furnitur rail wrought iron100 acryl base provid except adhes durabilityextra protect shell guard damag caus xbyuv ray sever weather condit long last fresh paint look minim maintenanceenhanc mildew resist finishexcept hide 1 cover 400 sq. ft. depend color select surfac porositysemi gloss enamel sheen offer radiant sleek appearanceformul easi clean soap wateractu paint color may vari screen printer representationsonlin price includ paint care fee follow state ca co ct mn ri vt,0.0,0.0,1,4,0.0,0.0,1.0,1.0,0.0,0.0,14,138,0,0,0.0,0.0,10,1,10,1,75,25,75,50,0,0,0,behr premium plus ultra,25,1.079868
193001,183692,lichtenberg white no. 918 millenni ryan heather textur sheer curtain panel 40 in. 95 in.,918,3.0,w g 918,"LICHTENBERG White No. 918 Millennial Ryan Heathered Texture Sheer Curtain Panel, 40 in. W x 95 in. L","No. 918 Millennial Ryan heathered texture semi-sheer curtain is a casual solid that adds freshness and a finishing touch to any decor setting. Enhances privacy while allowing light to gently filter through. Clean, simple one-pocket pole top design can be used with a standard or decorative curtain rod. Mix and match with other solids and prints for a look that is all your own.Sheer panel, gently filters lightNo header pole top panelMachine washableWide array of colors to choose from100% polyesterContains 1-curtain panel",no. 918 millenni ryan heather textur semi sheer curtain casual solid add fresh finish touch decor setting. enhanc privaci allow light gentl filter through. clean simpl one pocket pole top design use standard decor curtain rod. mix match solid print look own.sheer panel gentl filter lightno header pole top panelmachin washablewid array color choos from100 polyestercontain 1 curtain panel,1.0,1.0,1,3,1.0,1.0,0.2,0.016949,0.0,0.0,15,59,1,1,0.333333,0.333333,100,100,83,60,100,100,100,100,1,1,1,lichtenberg,0,1.014248
150259,156172,pittsburgh corn 24 in. 24 in. lightwis icescap pattern aluminum clad glass block window,242 24 6,3.0,242x24x6,Pittsburgh Corning 24 in. x 24 in. LightWise IceScapes Pattern Aluminum-Clad Glass Block Window,Use the LightWise Pittsburgh Corning Aluminum-Clad Glass Block Window either in new construction or as a replacement. The Aluminum-Clad window comes with a setback nail fin for use with stucco and other standard finishes that can be removed for renovation installations. The window comes with Aluminum-Cladding on the exterior with a natural pine interior for staining or painting.Made in AmericaWill not scratch or discolorThree choices of pattern10-Year warranty,use lightwis pittsburgh corn aluminum clad glass block window either new construct replacement. aluminum clad window come setback nail fin use stucco standard finish remov renov installations. window come aluminum clad exterior natur pine interior stain painting.mad americawil scratch discolorthre choic pattern10 year warranti,0.333333,0.0,3,8,0.4,0.0,0.142857,1.0,0.0,0.0,14,44,0,0,0.0,0.0,40,1,6,1,100,12,25,12,0,0,0,,0,1.003341
194147,184559,lenmar nickel metal hydrid 1200mah 3.6 volt cordless phone replac batteri,polish batteri metal,3.0,polisher battery metal,Lenmar Nickel-Metal Hydride 1200mAh/3.6-Volt Cordless Phone Replacement Battery,"Power your Panasonic, American Telecom, Tele-Phone, Key Phone, Audiovox, Uniden, Universal, and Zeta cordless phones with Lenmar's Nickel-Metal Hydride cordless phone replacement battery. This 3.6-Volt/1200mAh battery will provide reliable, long-lasting power. Compatible with Panasonic KX-A150, American Telecom 2110, Tele-Phone TEL-1000, Key Phone 920-WD, Audiovox AT-12A, Uniden EX-3300, Universal 2000, and Zeta 510.Fits: Panasonic KX-A150, American Telecom 2110, Tele-Phone TEL-1000, Key Phone 920-WD, Audiovox AT-12A, Uniden EX-3300, Universal 2000, Zeta 510Replaces: Dantona 3AA, Empire CPB-400, General Electric BT-17, Panasonic P-P341PA, Recoton T121, Uniden BA-3003.6-volt / 1200mAhNickel-metal hydride1 year warranty",power panason american telecom tele phone key phone audiovox uniden univers zeta cordless phone lenmar nickel metal hydrid cordless phone replac battery. 3.6 volt 1200mah batteri provid reliabl long last power. compat panason kx a150 american telecom 2110 tele phone tel 1000 key phone 920 wd audiovox 12a uniden ex 3300 univers 2000 zeta 510.fit panason kx a150 american telecom 2110 tele phone tel 1000 key phone 920 wd audiovox 12a uniden ex 3300 univers 2000 zeta 510replac dantona 3aa empir cpb 400 general electr bt 17 panason p341pa recoton t121 uniden ba 3003.6 volt 1200mahnickel metal hydride1 year warranti,0.666667,0.666667,3,20,0.666667,0.666667,0.181818,0.16,16.0,20.25,11,100,1,1,0.0,0.333333,79,79,74,74,100,100,100,100,0,0,0,lenmar,33,0.979019
211452,198172,splashback tile cleveland bendem mini brick 10 in. 11 in. 8mm. mix materi mosaic floor wall tile,cleveland ohio mini excav,3.0,cleveland ohio mini excavator,Splashback Tile Cleveland Bendemeer Mini Brick 10 in. x 11 in. x 8 mm Mixed Materials Mosaic Floor and Wall Tile,"An ideal tile for a kitchen backsplash or bathroom surfaces, the glass and stone combination of Splashback Tile Cleveland Bendemeer Mini Brick creates a beautifully multi-dimensionally effect. The mesh backing not only simplifies installation, it also allows the tiles to be separated which adds to their design flexibility. Sample size tiles available. See item 204278996 for sample size tile.1 square foot per piece, each piece sold individually. Piece weight equals 3 lbs.Grade 1, first-quality stone and glass tile for wall and top use12 in. width x 12 in. length x 0.33 in. thickGlazed smooth finish with some random shade and tone variationP.E.I. Rating III is suitable for medium-duty residential floors including kitchens, halls, corridors, balconies, terraces and areas used more often with normal footwear and small amounts of dirtImpervious flooring has water absorption of less than 0.5% for indoor or outdoor use and is frost proofC.O.F. greater than .50 is recommended for standard residential applications and is marginally skid resistant. Indoor useCompletely frost resistant for indoor or outdoor applications; use a latex-modified thinset for acceptable bond strengthResidential and commercial useLearn how to get a Lifetime Warranty by using Custom Building Products from The Home Depot. Visit www.homedepot.com/CBPDon't forget your coordinating trim pieces, grout, backerboard, thinset and installation toolsAll online orders for this item ship via ground and may arrive in multiple boxesIt is recommended you purchase a minimum of 10% overage to account for design cuts and patterns",ideal tile kitchen backsplash bathroom surfac glass stone combin splashback tile cleveland bendem mini brick creat beauti multi dimension effect. mesh back simplifi instal also allow tile separ add design flexibility. sampl size tile available. see item 204278996 sampl size tile.1sq.ft. per piec piec sold individually. piec weight equal 3 lb. grade 1 first qualiti stone glass tile wall top use12 in. width 12 in. length 0.33 in. thickglaz smooth finish random shade tone variationp.e.i. rate iii suitabl medium duti residenti floor includ kitchen hall corridor balconi terrac area use often normal footwear small amount dirtimpervi floor water absorpt less 0.5 indoor outdoor use frost proofc.o.f. greater .50 recommend standard residenti applic margin skid resistant. indoor usecomplet frost resist indoor outdoor applic use latex modifi thinset accept bond strengthresidenti commerci uselearn get lifetim warranti xbyus custom build product home depot. visit www.homedepot.com cbpdon forget coordin trim piec grout backerboard thinset instal toolsal onlin order item ship via ground may arriv multipl boxesit recommend purchas minimum 10 overag account design cut pattern,0.5,0.5,4,25,0.25,0.25,0.117647,0.064327,1.0,1.0,17,171,0,0,0.0,0.0,72,72,65,65,100,100,100,100,1,1,0,splashback tile,33,0.974302
218645,204191,fan essenti 1 ft. 1 1 2 ft. bois state univers 2 side garden flag 3 2 3 ft. metal flagpol,metal garden cts,3.0,metal garden cts,Fan Essentials 1 ft. x 1-1/2 ft. Boise State University 2-Sided Garden Flag with 3-2/3 ft. Metal Flagpole,This officially licensed Fan Essentials NCAA 18 in. x 12.5 in. sleeve style Garden Flag with Decorative Stand helps show your school pride. The decorative flag is protected against UV radiation for outdoor durability. Flag is designed to be viewed from either side for more versatility in placement.California residents: see&nbsp;Proposition 65 informationHeavy-duty polyester construction helps resist the wear and tear from frequent use2-sided design on flag provides viewing from either directionDouble stitching on sleeve to provide lasting strengthWide sleeve slips easily onto flagpoleRust-resistant metal helps the flagpole maintain its shine,offici licens fan essenti ncaa 18 in. 12.5 in. sleev style garden flag decor stand help show school pride. decor flag protect uv radiat outdoor durability. flag design view either side versatil placement.california resid see nbsp proposit 65 in. formationheavi duti polyest construct help resist wear tear frequent use2 side design flag provid view either directiondoubl stitch sleev provid last strengthwid sleev slip easili onto flagpolerust resist metal help flagpol maintain shine,0.666667,0.666667,3,16,0.666667,0.666667,0.619048,0.152778,9.0,784.0,21,72,0,0,0.0,0.333333,86,86,86,86,100,100,100,100,1,1,0,fan essenti,36,0.964897
72911,120678,ekena millwork 1 1 4 in. 80 in. 5 1 2 in. polyurethan standard crosshead mold,1 1 4 pvcsch 80,3.0,1 1/4 pvcsch 80,Ekena Millwork 1-1/4 in. x 80 in. x 5-1/2 in. Polyurethane Standard Crosshead Moulding,"When making plans for design improvements to the home or office, one of the easiest and most cost effective ways to increase visual appeal and value is to add Crossheads to your doorways and windows. Whether you're working with indoor or outdoor applications, Crossheads are an excellent design improvement to the outer casings of doorways and windows. Often used in tandem with other architectural trim items like Pilasters, Crossheads provide outstanding visual improvement without spending a lot of money. Our Crossheads are made of a highly dense foam product called Urethane. Urethane has the same basic feel of Pine wood without having to deal with the typical weathering issues of a wood product. They are water proof and unappealing to insects and will not expand or contract with fluctuating temperatures so they will not crack your paint. This makes our Crossheads ideal for both indoor and outdoor applications. Our Crossheads come in a variety of sizes and are easy for anyone to install. They can be trimmed with common woodworking tools and installed with simple construction adhesives and wood applicable hardware.Eye-catching style for your door or windowsLightweight and easy to installLow maintenance productAn answer for both indoor and outdoor applicationsOptional trim to add further depth and personalityAdd pilasters and pediments for an extraordinary look",make plan design improv home offic one easiest cost effect way increas visual appeal valu add crosshead doorway windows. whether work indoor outdoor applic crosshead excel design improv outer case doorway windows. often use tandem architectur trim item like pilast crosshead provid outstand visual improv without spend lot money. crosshead made high dens foam product call urethane. urethan basic feel pine wood without deal typic weather issu wood product. water proof unapp insect expand contract fluctuat temperatur crack paint. make crosshead ideal indoor outdoor applications. crosshead come varieti size easi anyon install. trim common woodwork tool instal simpl construct adhes wood applic hardware.ey catch style door windowslightweight easi installlow mainten productan answer indoor outdoor applicationsopt trim add depth personalityadd pilast pediment extraordinari look,0.6,0.0,5,15,0.6,0.0,0.125,1.0,2.666667,0.0,16,123,1,0,0.25,0.0,63,3,63,2,100,31,100,31,1,0,1,,0,0.960938
149723,155877,speedi grill 24 in. 8 in. base board return air vent grill fix blade white,2.4 white board,3.0,2.4 white board,"SPEEDI-GRILLE 24 in. x 8 in. Base Board Return Air Vent Grille with Fixed Blades, White","BBRAG (Base Board Return Grille) is out of a durable all steel construction with 1/2 in. fan shaped louvers and includes a volume damper. It has a white paintable powder coat finish. BBRAG also comes in other sizes as and styles which includes floor, return air and filter versions which are available on this web site. BBRAG stays in place and reduces frustration and headaches. You will be amazed at how easy installing BBRAG is and how much time/labor you will save. Grille trim screws must be installed for secure installation. (BBRAG can be installed in a non SPEEDI-BOOT installation).BBRAG creates a precise fit every time with SPEEDI-BOOT to improve air flow and your comfort levelBBRAG models (ceiling/sidewall) is made of durable all steel construction with stamped single-directional louversCome with paintable durable powder coat white finish that provides years of maintenance free lifeQuick and ease of installation reduces labor that save time and moneySPEEDI-GRILLE comes in many sizes and styles to choose from which are available on this web site",bbrag base board return grill durabl steel construct 1 2 in. fan shape louver includ volum damper. white paintabl powder coat finish. bbrag also come size style includ floor return air filter version avail web site. bbrag stay place reduc frustrat headaches. amaz easi instal bbrag much time labor save. grill trim screw must instal secur installation. bbrag instal non speedi boot instal .bbrag creat precis fit everi time speedi boot improv air flow comfort levelbbrag model ceil sidewal made durabl steel construct stamp singl direct louverscom paintabl durabl powder coat white finish provid year mainten free lifequick eas instal reduc labor save time moneyspeedi grill come mani size style choos avail web site,0.666667,0.666667,3,15,0.666667,0.666667,0.466667,0.017544,12.25,56.25,15,114,1,1,0.0,0.333333,85,93,85,93,100,100,100,100,0,0,0,,33,0.956141


In [23]:
train.sort_values(['pred_diff'], ascending=True)[:50]

Unnamed: 0_level_0,product_uid,product_title,search_term,relevance,orig_search_term,orig_product_title,orig_descr,descr,match_product_title,match_descr,len_search_term_words,len_search_term_letters,match_pos_title,match_pos_descr,prod_title_ind,descr_ind,words_std_title,words_std_descr,len_title,len_descr,last_word_title,last_word_descr,match_orig_title,match_orig_descr,fuzzy_title,fuzzy_descr,fuzzy_orig_title,fuzzy_orig_descr,part_fuzzy_title,part_fuzzy_descr,part_fuzzy_orig_title,part_fuzzy_orig_descr,first_word_title,first_word_descr,match_numbers_title,brand,fuzzy_brand,pred_diff
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1
69679,119478,romano 4 ft. boxwood spiral topiari tree,topiari tree,1.0,topiary tree,Romano 4 ft. Boxwood Spiral Topiary Tree,"Enhance your home with a Romano Boxwood Spiral Topiary Tree. This wonderful full bodied Boxwood Spiral Topiary Tree keeps its colors when all the rest lose their color and leaves. You can always look forward to the rich green tones of the Romano Boxwood Spiral Topiary Tree in your home. If you are looking for a High quality artificial tree to make a bold statement in your home, this is it.Light and mobile designOver 700 leavesIndoor/outdoorIron frameUV resistant rubber leaves7 in. potEnhances any room in your house",enhanc home romano boxwood spiral topiari tree. wonder full bodi boxwood spiral topiari tree keep color rest lose color leaves. alway look forward rich green tone romano boxwood spiral topiari tree home. look high qualiti artifici tree make bold statement home it.light mobil designov 700 leavesindoor outdooriron frameuv resist rubber leaves7 in. potenh room hous,1.0,1.0,2,12,1.0,1.0,0.714286,0.090909,0.25,16.0,7,55,1,1,0.0,0.5,100,100,100,100,100,100,100,100,1,1,0,,33,-1.571493
136808,148897,earthquak 212cc tiller rear tine crt side shield,side shield,1.0,side shields,Earthquake 212cc Tiller Rear Tine CRT with Side Shields,"The Earthquake 6015V rear tine rototiller delivers the ultimate combination of power and size. Rear-mounted tines till stubborn dirt easily with a high-performance 212cc Viper engine. We have engineered a compact rear tine rototiller with the same power as its larger counterparts.Easy - single-handed control through sodEarthquake 6015V rear tineCompact - full-sized power in a compact frameDurable - cast iron, bronze gear drive transmission",earthquak 6015v rear tine rototil deliv ultim combin power size. rear mount tine till stubborn dirt easili high perform 212cc viper engine. engin compact rear tine rototil power larger counterparts.easi singl hand control sodearthquak 6015v rear tinecompact full size power compact framedur cast iron bronz gear drive transmiss,1.0,0.0,2,11,1.0,0.0,0.75,1.0,0.25,0.0,8,48,1,0,0.0,0.0,100,8,100,6,100,64,100,67,1,0,0,,0,-1.422191
120624,140844,rachael ray 10 qt. cover stockpot,rachael ray,1.0,rachael ray,Rachael Ray 10 qt. Covered Stockpot,"Whether you're boiling pasta, making a batch of chili or cooking grains, this large, 10 quart stockpot is just the right size. When it's not on the stovetop, its two short side handles help it take up less space in your cabinet. The hard-anodized construction is preferred for its even heat distribution throughout the pan and up the sides. The nonstick cooking surface means you don't need to worry about food sticking or about scrubbing dirty pans - cleanup is a breeze. In fact, Rachael's stockpot is dishwasher safe for added convenience.Hard-anodized cookware is exceptionally durable, plus it heats quickly and evenly, reducing ""hot spots"" that can burn foodsA clear coat exterior makes this cookware dishwasher safe for convenienceLong-lasting nonstick interior lets food slide off with ease and makes cleanup quick and easyFun rubberized handles provide a comfortable grip and are oven safe to 350 degrees FahrenheitTempered glass lid to watch your foods cook without having to remove the lid to keep heat and moisture locked inLifetime limited warranty",whether boil pasta make batch chili cook grain larg 10 quart stockpot right size. stovetop two short side handl help take less space cabinet. hard anod construct prefer even heat distribut throughout pan sides. nonstick cook surfac mean need worri food stick scrub dirti pan cleanup breeze. fact rachael stockpot dishwash safe ad convenience.hard anod cookwar except durabl plus heat quick even reduc hot spot burn foodsa clear coat exterior make cookwar dishwash safe conveniencelong last nonstick interior let food slide eas make cleanup quick easyfun rubber handl provid comfort grip oven safe 350deg. fahrenheittemp glass lid watch food cook without remov lid keep heat moistur lock inlifetim limit warranti,1.0,0.5,2,11,1.0,0.5,0.0,0.436364,0.25,0.0,6,110,1,0,0.0,0.0,100,78,100,78,100,100,100,100,1,1,0,,33,-1.416008
9508,101618,sure comfort 40gal. tall 3 year 34 000 btu natur gas water heater,hot water tank gas,1.0,hot water tank gas,"Sure Comfort 40 Gal. Tall 3 Year 34,000 BTU Natural Gas Water Heater","The Sure Comfort 40 Gal. Natural Gas Tall Water Heater comes with a 34,000 BTU/hour environmentally friendly low NOx burner that provides hot water for households with 2 to 4 people. The piezo ignition system ensures safe and easy startup. Its maintenance free combustion air intake system means there are no filters to clean and replace. A patented magnesium anode rod design protects the tanks from rust, providing longer tank life. Factory installed temperature and pressure relief valve and 3/4 in. water connections are included with the water heater for easy installation.California residents: due to requirements from air quality management districts in the state of California, please use the map below to confirm whether you live in an area that requires the purchase of ultra low NOx water heaters40 Gal. tank provides hot water for households with 2 to 4 peopleNatural gas water heater costs less to operate than electric modelsDelivers 67 Gal. of hot water in the first hour3 year limited warranty on tank and 1 year limited warranty on parts for your convenienceMaintenance free - no filter to clean or replacePiezo ignition system simplifies the startup processRated for installations up to 8,400 ft.Narrow tank profile fits in a variety of indoor spacesDesigned for easy replacement of your existing water heaterCall 1-855-400-2552 before noon for same-day service in most areas, or Visit our Water Heater Installation page for more information.Meets new 2015 NAECA StandardsCalifornia Residents - See if your county requires water heaters that meet Air Quality Management District Ultra low NOx (ULN) requirements:",sure comfort 40gal. natur gas tall water heater come 34 000 btu hour environment friend low nox burner provid hot water household 2 4 people. piezo ignit system ensur safe easi startup. mainten free combust air intak system mean filter clean replace. patent magnesium anod rod design protect tank rust provid longer tank life. factori instal temperatur pressur relief valv 3 4 in. water connect includ water heater easi installation.california resid due requir air qualiti manag district state california pleas use map confirm whether live area requir purchas ultra low nox water heaters40gal. tank provid hot water household 2 4 peoplenatur gas water heater cost less oper electr modelsdeliv 67gal. hot water first hour3 year limit warranti tank 1 year limit warranti part conveniencemainten free filter clean replacepiezo ignit system simplifi startup processr instal 8 400 ft. narrow tank profil fit varieti indoor spacesdesign easi replac exist water heatercal 1 855 400 2552 noon day servic area visit water heater instal page information.meet new 2015 naeca standardscalifornia resid see counti requir water heater meet air qualiti manag district ultra low nox uln requir,0.5,1.0,4,18,0.5,1.0,0.769231,0.021858,0.25,308.6875,13,183,1,1,0.0,1.0,67,100,67,100,100,100,100,100,0,1,0,sure comfort,33,-1.298128
17236,103041,ortho home defens max 1.33gal. perimet indoor insect killer wand,ant killer,1.0,ant killer,Ortho Home Defense Max 1.33 Gal. Perimeter and Indoor Insect Killer with Wand,"The Ortho Home Defense Max 1.33 Gal. Ready-to-Use Perimeter and Indoor Insect Killer is designed for interior and exterior use to kill ants, roaches, spiders and other pests and to help keep new ones from entering your home. This product features a sprayer for application of the fast-drying, non-staining formula.Kills interior bugs to help keep areas free of pestsHelps to prevent exterior bugs from entering your homeExtended reach ergonomic comfort wand with multiple spray settings offers one touch continuous sprayFormula is non-staining, odor free and quick drying1 application provides up to 12 months of interior protection against crickets, carpet beetles, earwigs, firebrats, moths, silverfish and certain spidersExtended reach wand with multiple spray settings",ortho home defens max 1.33gal. readi use perimet indoor insect killer design interior exterior use kill ant roach spider pest help keep new one enter home. product featur sprayer applic fast dri non stain formula.kil interior bug help keep area free pestshelp prevent exterior bug enter homeextend reach ergonom comfort wand multipl spray set offer one touch continu sprayformula non stain odor free quick drying1 applic provid 12 month interior protect cricket carpet beetl earwig firebrat moth silverfish certain spidersextend reach wand multipl spray set,0.5,1.0,2,10,0.5,0.5,0.8,0.117647,0.0,9.0,10,85,1,1,0.0,0.0,75,100,75,75,100,100,100,100,0,1,0,ortho,20,-1.272946
176724,172318,green matter 3 light mahogani bronz vaniti fixtur,bronz green,1.0,bronze green,Green Matters 3-Light Mahogany Bronze Vanity Fixture,"The cornerstone of this collection is quality, and this incandescent vanity is no exception. Combining a classic mahogany bronze finish and posh styling, you will find no better way to highlight the charm of your home. With the superb craftsmanship and affordable price this fixture is sure to tastefully indulge your extravagant side.Energy Star qualified to meet or exceed federal guidelines for energy efficiency for year-round energy and money savingsMetal construction with a mahogany bronze finishChampagne washed linen glass shadesUses (3) CFL bulbs, 13-Watt maximum (included)Easy installation instructions and template enclosed for convenient setupIdeal for providing illumination in bathroomsUL listedIncludes installation hardwareHome Depot Protection Plan:Click image to enlarge: br bra href=""https://idm.homedepot.com/assets/image/52/52993f59-930d-4515-bdfa-d6a69de7228b.jpg"" target=""_blank""img src=""https://idm.homedepot.com/assets/image/52/52993f59-930d-4515-bdfa-d6a69de7228b.jpg"" width =""416"" height = ""416""/abr",cornerston collect qualiti incandesc vaniti exception. combin classic mahogani bronz finish posh style find better way highlight charm home. superb craftsmanship afford price fixtur sure tast indulg extravag side.energi star qualifi meet exceed feder guidelin energi effici year round energi money savingsmet construct mahogani bronz finishchampagn wash linen glass shadesus 3 cfl bulb 13 watt maximum includ easi instal instruct templat enclos conveni setupid provid illumin bathroomsul listedinclud instal hardwarehom depot protect plan click imag enlarg br bra href https idm.homedepot.com asset imag 52 52993f59 930d 4515 bdfa d6a69de7228b.jpg target blank img src https idm.homedepot.com asset imag 52 52993f59 930d 4515 bdfa d6a69de7228b.jpg width 416 height 416 abr,1.0,0.5,2,11,0.5,0.5,0.0,0.083333,6.25,0.0,8,108,1,0,0.0,0.5,100,62,100,67,100,100,100,100,1,1,0,green matter,100,-1.265791
204530,192555,heath bird stop blue ceram wild bird feeder,bird stop,1.0,bird stops,Heath Bird Stop Blue Ceramic Wild Bird Feeder,"The Bird Stop Blue Ceramic Wild Bird Feeder features a blend of ceramic and metal construction. The circular openings are ideal for feeding sunflower seeds, shelled peanuts or mealworms. Holds up to 1 lb. of seed. Drainage holes in the tray help keep seed dry. The top removes for refilling.Sleek, attractive design pops in any gardenVersatile design holds mealworms, shelled peanuts or sunflower seedsCeramic and metal constructionFully disassembles for cleaning",bird stop blue ceram wild bird feeder featur blend ceram metal construction. circular open ideal feed sunflow seed shell peanut mealworms. hold 1 lb. seed. drainag hole tray help keep seed dry. top remov refilling.sleek attract design pop gardenversatil design hold mealworm shell peanut sunflow seedsceram metal construct disassembl clean,1.0,1.0,2,9,0.5,0.5,0.125,0.0,0.25,0.25,8,50,1,1,0.0,0.0,100,100,57,57,100,100,100,100,1,1,0,heath,20,-1.243026
156918,160006,werner 14 ft. fiberglass round rung straight ladder 375 lb. load capac type iaa duti rate,14 ft. ladder,1.0,14 ft ladder,Werner 14 ft. Fiberglass Round Rung Straight Ladder with 375 lb. Load Capacity Type IAA Duty Rating,"The 7114-1 one-section round rung 14 ft. Straight Ladder is made with non-conductive fiberglass rails. Easy to transport and maneuver, these ladders bear a duty rating of 375 lbs. making them durable and long lasting. The slip-resistant, TRACTION-TRED rungs are round, and the durable rail shields and shoe brackets help to protect the rails from damage. The plate and rung assemblies are riveted at four points to the side rails.ALFLO TWIST-PROOF performanceMar resistant end capsSlip resistant round rungsShoe with pad and spur plateCannot ship to home, available for store delivery only",7114 1 one section round rung 14 ft. straight ladder made non conduct fiberglass rails. easi transport maneuv ladder bear duti rate 375 lb. make durabl long lasting. slip resist traction tred rung round durabl rail shield shoe bracket help protect rail damage. plate rung assembl rivet four point side rails.alflo twist proof performancemar resist end capsslip resist round rungssho pad spur platecannot ship home avail store deliveri,1.0,1.0,3,13,1.0,1.0,0.0625,0.088235,6.888889,1.555556,16,68,1,1,0.333333,0.333333,100,100,100,100,100,100,100,100,1,1,1,,33,-1.214842
146217,153944,coleman roadtrip grill wheel carri case,coleman grill,1.33,coleman grill,Coleman RoadTrip Grill Wheeled Carrying Case,"With inner straps for a secure hold and inner pockets that accommodate condiments or other accessories, the Coleman RoadTrip Grill Wheeled Carrying Case makes it easy to transport your grill.Allows you to transport your grill between cooking venuesFor use with Coleman propane-powered roadtrip grillsHeavy duty, weather-resistant, 600-denier constructionSide handles and easy-glide wheels for portability; durable zipper and inner straps secure your grill and stand in the case while you travelInner pockets accommodate condiments or other accessories on the go",inner strap secur hold inner pocket accommod condiment accessori coleman roadtrip grill wheel carri case make easi transport grill.allow transport grill cook venuesfor use coleman propan power roadtrip grillsheavi duti weather resist 600 denier constructionsid handl easi glide wheel portabl durabl zipper inner strap secur grill stand case travelinn pocket accommod condiment accessori go,1.0,1.0,2,13,1.0,1.0,0.0,0.166667,1.0,1.0,6,54,1,1,0.0,0.5,100,100,100,100,100,100,100,100,1,1,0,,67,-1.203606
72456,120519,sticki pix remov reposition ultim wall sticker mini mural appliqu peac,sticker,1.33,sticker,Sticky Pix Removable and Repositionable Ultimate Wall Sticker Mini Mural Appliques Peace,"With Sticky Pix Home Decor you can think of life as a verb capturing the joy, the surprise and the fun of living through images that stand out, emotionally connect people and bring a smile. Here's a unique way to personalize your home, locker, dorm or office that shows off your personal style. For use on walls, doors, glass or any smooth, flat and clean interior surface.Ultimate mini mural wall stickerPremiere photo real imageryRemovableRepositionableWon't harm wallsEasy to applyDo it yourselfAppeals to all ages",sticki pix home decor think life verb captur joy surpris fun live imag stand emot connect peopl bring smile. uniqu way person home locker dorm offic show person style. use wall door glass smooth flat clean interior surface.ultim mini mural wall stickerpremier photo real imageryremovablerepositionablewon harm wallseasi applydo yourselfapp age,1.0,0.0,1,7,1.0,0.0,0.545455,1.0,0.0,0.0,11,50,1,1,0.0,0.0,100,4,100,3,100,57,100,57,1,1,0,,0,-1.19133


In [24]:
@np.vectorize
def word_match_std(search_term, text):
    indexes = []
    search_term = search_term.split()
    text = text.split()    
    n = len(text)
    if len(search_term) == 0:
        return 0
    for word in set(search_term):
        if word in text:
            indexes.append(text.index(word))
    if len(indexes) > 0:
        if len(search_term) == 1:
            return 1
        return np.var(indexes)
    else:
        return 0


train['words_std_title'] = word_match_std(train['search_term'], train['product_title'])
train['words_std_descr'] = word_match_std(train['search_term'], train['descr'])

test['words_std_title'] = word_match_std(test['search_term'], test['product_title'])
test['words_std_descr'] = word_match_std(test['search_term'], test['descr'])

X_train = clf.transform(train[use_cols])
X_test = clf.transform(test[use_cols])

gbm = lgb.LGBMRegressor(learning_rate=0.03, colsample_bytree=0.7, num_leaves=80, n_estimators=1000)
gbm.fit(X_train, y)

tc_len = len(tree_cols)
print(dict(zip(tree_cols, gbm.feature_importances_[:tc_len]) ))

write_submission(gbm, X_test)

{'match_product_title': 293, 'match_descr': 159, 'len_search_term_words': 271, 'len_search_term_letters': 714, 'match_pos_title': 159, 'match_pos_descr': 188, 'prod_title_ind': 680, 'descr_ind': 489, 'words_std_title': 231, 'words_std_descr': 280, 'len_title': 446, 'len_descr': 389, 'last_word_title': 98, 'last_word_descr': 81, 'match_orig_title': 115, 'match_orig_descr': 140, 'fuzzy_title': 591, 'fuzzy_descr': 406, 'fuzzy_orig_title': 531, 'fuzzy_orig_descr': 372, 'part_fuzzy_title': 167, 'part_fuzzy_descr': 87, 'part_fuzzy_orig_title': 208, 'part_fuzzy_orig_descr': 79, 'first_word_title': 102, 'first_word_descr': 57, 'match_numbers_title': 109, 'fuzzy_brand': 342}


0.46786

In [25]:
bullets_names = ['Bullet0' + str(i) for i in range(1, 10)]
bullets = attrs[attrs['name'].isin(bullets_names)]
bullets = bullets.groupby(['product_uid'])['value'].apply(lambda x: ' '.join([str(a) for a in x]))
bullets = bullets.apply(stem_sentence)

bullets

product_uid
100001.0    versatil connector various 90 degre connect home repair project stronger angl nail screw fasten alon help ensur joint consist straight strong dimens 3 in. 3 in. 1 1 2 in. made 12 gaug steel galvan extra corros resist instal 10d common nail 9 1 1 2 in. strong drive sd screw                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        
100002.0    reviv wood composit deck rail porch boat dock also great concret pool deck patio sidewalk 100 acryl solid color coat resis

In [27]:
train['bullets'] = train.product_uid.map(bullets).astype(str)
train['match_bullets'] = word_match_count(train['bullets'], train['search_term'])

test['bullets'] = test.product_uid.map(bullets).astype(str)
test['match_bullets'] = word_match_count(test['bullets'], test['search_term'])

In [28]:
tree_cols = ['match_product_title', 'match_descr',
             'len_search_term_words', 'len_search_term_letters',
             'match_pos_title', 'match_pos_descr',
             'prod_title_ind', 'descr_ind',
             'words_std_title', 'words_std_descr',
             'len_title', 'len_descr',
             'last_word_title', 'last_word_descr',
             'match_orig_title', 'match_orig_descr',
             'fuzzy_title', 'fuzzy_descr', 'fuzzy_orig_title', 'fuzzy_orig_descr',
             'part_fuzzy_title', 'part_fuzzy_descr', 'part_fuzzy_orig_title', 'part_fuzzy_orig_descr',
             'first_word_title', 'first_word_descr',
             'match_numbers_title',
             'fuzzy_brand',
             'match_bullets']


use_cols = tree_cols + ['search_term', 'product_title', 'descr', 'brand']

clf = fitter()
X_train = clf.transform(train[use_cols])
X_test = clf.transform(test[use_cols])

gbm = lgb.LGBMRegressor(learning_rate=0.03, colsample_bytree=0.7, num_leaves=80, n_estimators=1000)
gbm.fit(X_train, y)

tc_len = len(tree_cols)
print(dict(zip(tree_cols, gbm.feature_importances_[:tc_len]) ))

write_submission(gbm, X_test)

{'match_product_title': 297, 'match_descr': 137, 'len_search_term_words': 256, 'len_search_term_letters': 676, 'match_pos_title': 163, 'match_pos_descr': 177, 'prod_title_ind': 662, 'descr_ind': 430, 'words_std_title': 219, 'words_std_descr': 296, 'len_title': 415, 'len_descr': 345, 'last_word_title': 102, 'last_word_descr': 86, 'match_orig_title': 118, 'match_orig_descr': 160, 'fuzzy_title': 551, 'fuzzy_descr': 435, 'fuzzy_orig_title': 565, 'fuzzy_orig_descr': 387, 'part_fuzzy_title': 189, 'part_fuzzy_descr': 84, 'part_fuzzy_orig_title': 205, 'part_fuzzy_orig_descr': 82, 'first_word_title': 92, 'first_word_descr': 50, 'match_numbers_title': 115, 'fuzzy_brand': 317, 'match_bullets': 257}


0.46752

In [30]:
def fitter5(tsvd1=80, tsvd2=80, tsvd3=60, tsvd4=30, tsvd5=30):
    clf = Pipeline([
    ('union', FeatureUnion(
                transformer_list = [
                    ('cst',  cust_regression_vals()),  
                    ('txt1', Pipeline([('s1', cust_txt_col(col='search_term')),
                                       ('tfidf1', TfidfVectorizer(stop_words='english')),
                                       ('tsvd1', TruncatedSVD(n_components=tsvd1))])),
                    ('txt2', Pipeline([('s2', cust_txt_col(col='product_title')),
                                       ('tfidf2', TfidfVectorizer(stop_words='english')),
                                       ('tsvd2', TruncatedSVD(n_components=tsvd2))])),
                    ('txt3', Pipeline([('s3', cust_txt_col(col='descr')),
                                       ('tfidf3', TfidfVectorizer(stop_words='english')),
                                       ('tsvd3', TruncatedSVD(n_components=tsvd3))])),
                    ('txt4', Pipeline([('s4', cust_txt_col(col='brand')),
                                       ('tfidf4', TfidfVectorizer(stop_words='english')),
                                       ('tsvd4', TruncatedSVD(n_components=tsvd4))])),
                    ('txt5', Pipeline([('s5', cust_txt_col(col='bullets')),
                                       ('tfidf5', TfidfVectorizer(stop_words='english')),
                                       ('tsvd5', TruncatedSVD(n_components=tsvd5))]))
                    ]
            ))])
    clf.fit(train[use_cols])
    return clf

tree_cols = ['match_product_title', 'match_descr',
             'len_search_term_words', 'len_search_term_letters',
             'match_pos_title', 'match_pos_descr',
             'prod_title_ind', 'descr_ind',
             'words_std_title', 'words_std_descr',
             'len_title', 'len_descr',
             'last_word_title', 'last_word_descr',
             'match_orig_title', 'match_orig_descr',
             'fuzzy_title', 'fuzzy_descr', 'fuzzy_orig_title', 'fuzzy_orig_descr',
             'part_fuzzy_title', 'part_fuzzy_descr', 'part_fuzzy_orig_title', 'part_fuzzy_orig_descr',
             'first_word_title', 'first_word_descr',
             'match_numbers_title',
             'fuzzy_brand',
             'match_bullets']


use_cols = tree_cols + ['search_term', 'product_title', 'descr', 'brand', 'bullets']

clf = fitter()
X_train = clf.transform(train[use_cols])
X_test = clf.transform(test[use_cols])

gbm = lgb.LGBMRegressor(learning_rate=0.03, colsample_bytree=0.7, num_leaves=80, n_estimators=1000)
gbm.fit(X_train, y)

tc_len = len(tree_cols)
print(dict(zip(tree_cols, gbm.feature_importances_[:tc_len]) ))
print(gbm.feature_importances_[tc_len:])

write_submission(gbm, X_test)

{'match_product_title': 290, 'match_descr': 147, 'len_search_term_words': 264, 'len_search_term_letters': 676, 'match_pos_title': 182, 'match_pos_descr': 158, 'prod_title_ind': 656, 'descr_ind': 444, 'words_std_title': 194, 'words_std_descr': 294, 'len_title': 425, 'len_descr': 371, 'last_word_title': 89, 'last_word_descr': 84, 'match_orig_title': 130, 'match_orig_descr': 165, 'fuzzy_title': 545, 'fuzzy_descr': 426, 'fuzzy_orig_title': 561, 'fuzzy_orig_descr': 368, 'part_fuzzy_title': 179, 'part_fuzzy_descr': 89, 'part_fuzzy_orig_title': 200, 'part_fuzzy_orig_descr': 87, 'first_word_title': 75, 'first_word_descr': 41, 'match_numbers_title': 118, 'fuzzy_brand': 326, 'match_bullets': 244}
[495 336 350 306 343 276 274 243 257 307 306 236 259 324 321 261 290 259
 246 300 323 326 304 288 312 300 306 263 271 298 270 274 286 375 297 282
 251 292 320 313 248 318 342 299 297 309 267 308 322 341 319 305 288 341
 303 271 342 329 303 299 341 371 322 341 320 295 342 356 318 389 318 305
 336 335 311

0.46747

In [31]:
train['pred_diff'] = train['relevance'] - gbm.predict(X_train)
train.sort_values(['pred_diff'], ascending=False)[:50]

Unnamed: 0_level_0,product_uid,product_title,search_term,relevance,orig_search_term,orig_product_title,orig_descr,descr,match_product_title,match_descr,len_search_term_words,len_search_term_letters,match_pos_title,match_pos_descr,prod_title_ind,descr_ind,words_std_title,words_std_descr,len_title,len_descr,last_word_title,last_word_descr,match_orig_title,match_orig_descr,fuzzy_title,fuzzy_descr,fuzzy_orig_title,fuzzy_orig_descr,part_fuzzy_title,part_fuzzy_descr,part_fuzzy_orig_title,part_fuzzy_orig_descr,first_word_title,first_word_descr,match_numbers_title,brand,fuzzy_brand,pred_diff,bullets,match_bullets
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1
203914,192088,behr premium plus 600e 2 harbor mist zero voc interior paint,locit 2 plus 1,3.0,locite 2 plus 1,BEHR Premium Plus #600E-2 Harbor Mist Zero VOC Interior Paint,"BEHR Premium Plus Zero VOC, Self-Priming Interior Flat is 100% acrylic and provides a mildew resistant finish. This easy-clean flat provides excellent touch-up, ultimate durability and exceptional hide. The finish displays a non-reflective, matte appearance.California residents: see&nbsp;Proposition 65 informationIdeal for family rooms, living rooms, dining rooms, bedrooms and ceilings100% acrylic finish for easy cleaningTough and durable matte finishExceptional hide, 1 can covers up to 400 sq. ft. depending on color selected and surface porosityEasy soap and water clean-upExceptional latex formula provides a mildew resistant finishSelf priming finish that seals the surfaceActual paint colors may vary from on-screen and printer representationsOnline Price includes Paint Care fee in the following states: CA, CO, CT, ME, MN, OR, RI, VT",behr premium plus zero voc self prime interior flat 100 acryl provid mildew resist finish. easi clean flat provid excel touch ultim durabl except hide. finish display non reflect matt appearance.california resid see nbsp proposit 65 in. formationid famili room live room dine room bedroom ceilings100 acryl finish easi cleaningtough durabl matt finishexcept hide 1 cover 400 sq. ft. depend color select surfac porosityeasi soap water clean upexcept latex formula provid mildew resist finishself prime finish seal surfaceactu paint color may vari screen printer representationsonlin price includ paint care fee follow state ca co ct mn ri vt,0.5,0.5,4,14,0.5,0.5,0.181818,0.020408,1.0,676.0,11,98,0,1,0.0,0.25,60,60,57,57,100,100,100,100,0,0,0,behr premium plus,100,1.13576,ideal famili room live room dine room bedroom ceil 100 acryl finish easi clean tough durabl matt finish except hide 1 cover 400 sq. ft. depend color select surfac poros easi soap water clean except latex formula provid mildew resist finish self prime finish seal surfac actual paint color may vari screen printer represent,0.018519
194147,184559,lenmar nickel metal hydrid 1200mah 3.6 volt cordless phone replac batteri,polish batteri metal,3.0,polisher battery metal,Lenmar Nickel-Metal Hydride 1200mAh/3.6-Volt Cordless Phone Replacement Battery,"Power your Panasonic, American Telecom, Tele-Phone, Key Phone, Audiovox, Uniden, Universal, and Zeta cordless phones with Lenmar's Nickel-Metal Hydride cordless phone replacement battery. This 3.6-Volt/1200mAh battery will provide reliable, long-lasting power. Compatible with Panasonic KX-A150, American Telecom 2110, Tele-Phone TEL-1000, Key Phone 920-WD, Audiovox AT-12A, Uniden EX-3300, Universal 2000, and Zeta 510.Fits: Panasonic KX-A150, American Telecom 2110, Tele-Phone TEL-1000, Key Phone 920-WD, Audiovox AT-12A, Uniden EX-3300, Universal 2000, Zeta 510Replaces: Dantona 3AA, Empire CPB-400, General Electric BT-17, Panasonic P-P341PA, Recoton T121, Uniden BA-3003.6-volt / 1200mAhNickel-metal hydride1 year warranty",power panason american telecom tele phone key phone audiovox uniden univers zeta cordless phone lenmar nickel metal hydrid cordless phone replac battery. 3.6 volt 1200mah batteri provid reliabl long last power. compat panason kx a150 american telecom 2110 tele phone tel 1000 key phone 920 wd audiovox 12a uniden ex 3300 univers 2000 zeta 510.fit panason kx a150 american telecom 2110 tele phone tel 1000 key phone 920 wd audiovox 12a uniden ex 3300 univers 2000 zeta 510replac dantona 3aa empir cpb 400 general electr bt 17 panason p341pa recoton t121 uniden ba 3003.6 volt 1200mahnickel metal hydride1 year warranti,0.666667,0.666667,3,20,0.666667,0.666667,0.181818,0.16,16.0,20.25,11,100,1,1,0.0,0.333333,79,79,74,74,100,100,100,100,0,0,0,lenmar,33,1.058255,fit panason kx a150 american telecom 2110 tele phone tel 1000 key phone 920 wd audiovox 12a uniden ex 3300 univers 2000 zeta 510 replac dantona 3aa empir cpb 400 general electr bt 17 panason p341pa recoton t121 uniden ba 300 3.6 volt 1200mah nickel metal hydrid 1 year warranti,0.02
207064,194599,home legend distress lennox hickori 3 8 in. thick 2 1 8 in. wide 78 in. length hardwood carpet reduc mold,lennox,3.0,ennox,Home Legend Distressed Lennox Hickory 3/8 in. Thick x 2-1/8 in. Wide x 78 in. Length Hardwood Carpet Reducer Molding,"78 in. length, hardwood veneer carpet reducer is an overlap molding. It is used to provide a smooth and safe transition between floor coverings of different heights and constructions. It can also be used to as borders around fire place hearth and at sliding door threshold. The prefinished molding can be nailed or glued with its coordinating style of floor.California residents: see&nbsp;Proposition 65 informationWood Species: Hickory3/8 in. Thickness x 2-1/8 in. Width x 78 in. Length MoldingItem coordinates with 3/8 in. thick hardwood flooringConstruction: High Density Fiberboard (HDF) MoldingPrefinished Lennox Hickory, Dark ColorJanka wood hardness rating/resistance to denting: 1820Appropriate Grade for Installation: All GradesInstallation: GlueResidential useAll online orders for this item ship via parcel ground and may arrive in multiple boxes",78 in. length hardwood veneer carpet reduc overlap molding. use provid smooth safe transit floor cover differ height constructions. also use border around fire place hearth slide door threshold. prefinish mold nail glu coordin style floor.california resid see nbsp proposit 65 in. formationwood speci hickory3 8 in. thick 2 1 8 in. width 78 in. length moldingitem coordin 3 8 in. thick hardwood flooringconstruct high densiti fiberboard hdf moldingprefinish lennox hickori dark colorjanka wood hard rate resist dent 1820appropri grade instal gradesinstal glueresidenti useal onlin order item ship via parcel ground may arriv multipl box,1.0,1.0,1,6,0.0,0.0,0.142857,0.726316,1.0,1.0,21,95,1,1,0.0,0.0,100,100,9,1,100,100,100,40,1,1,0,home legend,50,1.039499,wood speci hickori 3 8 in. thick 2 1 8 in. width 78 in. length mold item coordin 3 8 in. thick hardwood floor construct high densiti fiberboard hdf mold prefinish lennox hickori dark color janka wood hard rate resist dent 1820 appropri grade instal grade instal glue residenti use,0.02
193001,183692,lichtenberg white no. 918 millenni ryan heather textur sheer curtain panel 40 in. 95 in.,918,3.0,w g 918,"LICHTENBERG White No. 918 Millennial Ryan Heathered Texture Sheer Curtain Panel, 40 in. W x 95 in. L","No. 918 Millennial Ryan heathered texture semi-sheer curtain is a casual solid that adds freshness and a finishing touch to any decor setting. Enhances privacy while allowing light to gently filter through. Clean, simple one-pocket pole top design can be used with a standard or decorative curtain rod. Mix and match with other solids and prints for a look that is all your own.Sheer panel, gently filters lightNo header pole top panelMachine washableWide array of colors to choose from100% polyesterContains 1-curtain panel",no. 918 millenni ryan heather textur semi sheer curtain casual solid add fresh finish touch decor setting. enhanc privaci allow light gentl filter through. clean simpl one pocket pole top design use standard decor curtain rod. mix match solid print look own.sheer panel gentl filter lightno header pole top panelmachin washablewid array color choos from100 polyestercontain 1 curtain panel,1.0,1.0,1,3,1.0,1.0,0.2,0.016949,1.0,1.0,15,59,1,1,0.333333,0.333333,100,100,83,60,100,100,100,100,1,1,1,lichtenberg,0,1.015556,sheer panel gentl filter light header pole top panel machin washabl wide array color choos 100 polyest contain 1 curtain panel,0.0
181669,175688,cap tread cross wood 94 in. long 12 1 8 in. deep 1 11 16 in. height vinyl left return cover stair 1 in. thick,wood chip best cover,3.0,wood chips best to cover,Cap A Tread Cross Wood 94 in. Long x 12-1/8 in. Deep x 1-11/16 in. Height Vinyl Left Return to Cover Stairs 1 in. Thick,Cap A Tread is a durable vinyl with pre-attached stair nosing. For updating carpet or worn out stairs. Easy glue down installation. Cost effective way to add value to your home.California residents: see&nbsp;Proposition 65 informationCap A Tread adds character to your stairsDurable vinyl with pre-attached stair nosing20-year warrantyDon't forget your coordinating moldings and trimCoordinates with vinyl flooringThis product is proudly manufactured in USA by Zamma Corporation,cap tread durabl vinyl pre attach stair nosing. updat carpet worn stairs. easi glue installation. cost effect way add valu home.california resid see nbsp proposit 65 in. formationcap tread add charact stairsdur vinyl pre attach stair nosing20 year warrantydon forget coordin mold trimcoordin vinyl flooringthi product proud manufactur usa xbyzamma corpor,0.5,0.0,4,20,0.6,0.2,0.12,1.0,72.25,0.0,25,51,1,0,0.2,0.2,67,11,70,15,100,45,100,100,1,0,0,cap tread,44,1.003965,cap tread add charact stair durabl vinyl pre attach stair nose 20 year warranti forget coordin mold trim coordin vinyl floor product proud manufactur usa xbyzamma corpor,0.0
218645,204191,fan essenti 1 ft. 1 1 2 ft. bois state univers 2 side garden flag 3 2 3 ft. metal flagpol,metal garden cts,3.0,metal garden cts,Fan Essentials 1 ft. x 1-1/2 ft. Boise State University 2-Sided Garden Flag with 3-2/3 ft. Metal Flagpole,This officially licensed Fan Essentials NCAA 18 in. x 12.5 in. sleeve style Garden Flag with Decorative Stand helps show your school pride. The decorative flag is protected against UV radiation for outdoor durability. Flag is designed to be viewed from either side for more versatility in placement.California residents: see&nbsp;Proposition 65 informationHeavy-duty polyester construction helps resist the wear and tear from frequent use2-sided design on flag provides viewing from either directionDouble stitching on sleeve to provide lasting strengthWide sleeve slips easily onto flagpoleRust-resistant metal helps the flagpole maintain its shine,offici licens fan essenti ncaa 18 in. 12.5 in. sleev style garden flag decor stand help show school pride. decor flag protect uv radiat outdoor durability. flag design view either side versatil placement.california resid see nbsp proposit 65 in. formationheavi duti polyest construct help resist wear tear frequent use2 side design flag provid view either directiondoubl stitch sleev provid last strengthwid sleev slip easili onto flagpolerust resist metal help flagpol maintain shine,0.666667,0.666667,3,16,0.666667,0.666667,0.619048,0.152778,9.0,784.0,21,72,0,0,0.0,0.333333,86,86,86,86,100,100,100,100,1,1,0,fan essenti,36,1.000609,heavi duti polyest construct help resist wear tear frequent use 2 side design flag provid view either direct doubl stitch sleev provid last strength wide sleev slip easili onto flagpol rust resist metal help flagpol maintain shine,0.027027
113218,137331,halo 4 in. 2700k matt white recess retrofit baffl trim led modul ring 90 cri,mmodel,2.67,mmodel,Halo 4 in. 2700K Matte White Recessed Retrofit Baffle-Trim LED Module Ring 90 CRI,"Halo RL4 Series consists of retrofit LED Module-Trim for 4 in. aperture recessed downlights. The RL4 Series is designed for use with compatible 4 in. housings: Halo, All-Pro and others. RL4 LED Modules produce warm white light, and are dimmable. For further value the RL4 lens provides uniform illumination and wet location listing. The RL4 die-cast construction makes any housing AIR-TITE; for added HVAC savings and code compliance.Attractive Halo matte white baffle and trimRated for damp and wet location2700K warm white light, 600 lumens, 90 CRIProduces light equivalent to a 65-Watt bulb yet uses just 10.5-WattDimmable- standard 120-Volt electronic low-voltage dimmer is recommended for best performanceLasts 50,000 hours with 70% maintained lumensE26 Edison screw base adapter for retrofit into to existing housingsCalifornia title 24 compliantHome Depot Protection Plan:",halo rl4 seri consist retrofit led modul trim 4 in. apertur recess downlights. rl4 seri design use compat 4 in. hous halo pro others. rl4 led modul produc warm white light dimmable. valu rl4 len provid uniform illumin wet locat listing. rl4 die cast construct make hous air tite ad hvac save code compliance.attract halo matt white baffl trimrat damp wet location2700k warm white light 600 lumen 90 criproduc light equival 65 watt bulb yet use 10.5 wattdimm standard 120 volt electron low voltag dimmer recommend best performancelast 50 000 hour 70 maintain lumense26 edison screw base adapt retrofit exist housingscalifornia titl 24 complianthom depot protect plan,0.0,0.0,1,6,0.0,0.0,1.0,1.0,0.0,0.0,15,107,0,0,0.0,0.0,12,2,12,2,67,50,67,50,0,0,0,halo,25,0.991215,attract halo matt white baffl trim rate damp wet locat 2700k warm white light 600 lumen 90 cri produc light equival 65 watt bulb yet use 10.5 watt dimmabl standard 120 volt electron low voltag dimmer recommend best perform last 50 000 hour 70 maintain lumen e26 edison screw base adapt retrofit exist hous california titl 24 compliant,0.0
72911,120678,ekena millwork 1 1 4 in. 80 in. 5 1 2 in. polyurethan standard crosshead mold,1 1 4 pvcsch 80,3.0,1 1/4 pvcsch 80,Ekena Millwork 1-1/4 in. x 80 in. x 5-1/2 in. Polyurethane Standard Crosshead Moulding,"When making plans for design improvements to the home or office, one of the easiest and most cost effective ways to increase visual appeal and value is to add Crossheads to your doorways and windows. Whether you're working with indoor or outdoor applications, Crossheads are an excellent design improvement to the outer casings of doorways and windows. Often used in tandem with other architectural trim items like Pilasters, Crossheads provide outstanding visual improvement without spending a lot of money. Our Crossheads are made of a highly dense foam product called Urethane. Urethane has the same basic feel of Pine wood without having to deal with the typical weathering issues of a wood product. They are water proof and unappealing to insects and will not expand or contract with fluctuating temperatures so they will not crack your paint. This makes our Crossheads ideal for both indoor and outdoor applications. Our Crossheads come in a variety of sizes and are easy for anyone to install. They can be trimmed with common woodworking tools and installed with simple construction adhesives and wood applicable hardware.Eye-catching style for your door or windowsLightweight and easy to installLow maintenance productAn answer for both indoor and outdoor applicationsOptional trim to add further depth and personalityAdd pilasters and pediments for an extraordinary look",make plan design improv home offic one easiest cost effect way increas visual appeal valu add crosshead doorway windows. whether work indoor outdoor applic crosshead excel design improv outer case doorway windows. often use tandem architectur trim item like pilast crosshead provid outstand visual improv without spend lot money. crosshead made high dens foam product call urethane. urethan basic feel pine wood without deal typic weather issu wood product. water proof unapp insect expand contract fluctuat temperatur crack paint. make crosshead ideal indoor outdoor applications. crosshead come varieti size easi anyon install. trim common woodwork tool instal simpl construct adhes wood applic hardware.ey catch style door windowslightweight easi installlow mainten productan answer indoor outdoor applicationsopt trim add depth personalityadd pilast pediment extraordinari look,0.6,0.0,5,15,0.6,0.0,0.125,1.0,2.666667,0.0,16,123,1,0,0.25,0.0,63,3,63,2,100,31,100,31,1,0,1,,0,0.987566,,0.0
179141,173941,wiremold 500 700 seri 5 1 2 in. open base extens box,safe box slide open,3.0,safe box with slide open,Wiremold 500 and 700 Series 5-1/2 in. Open Base Extension Box,"Legrand/Wiremold small surface metal raceway systems are the ideal choice for a homeowner that wants to extend circuits or communication cabling in their home. Surface metal raceway allows you to extend circuits, add fixtures or outlets without disturbing the drywall, plaster or insulation in your home. All parts are paintable to blend with room decor. The leading choice of professionals is now available for your DIY project.90_ twist elbow kit enables a turn around an outside corner while changing the raceway channel's orientationAccommodates fixtures with 2-3/4 in. 3-1/2 in. and 4-1/16 in. mounting patternsCompatible with both Legrand/Wiremold 500 series (B-series) and 700 series (BW series) racewaysIvory finishPaintable and stainable using latex based productsUL listedNote: product may vary by store",legrand wiremold small surfac metal raceway system ideal choic homeown want extend circuit communic cabl home. surfac metal raceway allow extend circuit add fixtur outlet without disturb drywal plaster insul home. part paintabl blend room decor. lead choic profession avail diy project.90 twist elbow kit enabl turn around outsid corner chang raceway channel orientationaccommod fixtur 2 3 4 in. 3 1 2 in. 4 1 16 in. mount patternscompat legrand wiremold 500 seri b seri 700 seri bw seri racewaysivori finishpaint stainabl use latex base productsul listednot product may vari xbystor,0.5,0.0,4,19,0.4,0.2,0.666667,1.0,2.25,0.0,12,91,1,0,0.0,0.2,59,7,50,29,100,47,100,100,0,0,0,wiremold,38,0.987295,90 twist elbow kit enabl turn around outsid corner chang raceway channel orient accommod fixtur 2 3 4 in. 3 1 2 in. 4 1 16 in. mount pattern compat legrand wiremold 500 seri b seri 700 seri bw seri raceway ivori finish paintabl stainabl use latex base product ul list,0.0
150259,156172,pittsburgh corn 24 in. 24 in. lightwis icescap pattern aluminum clad glass block window,242 24 6,3.0,242x24x6,Pittsburgh Corning 24 in. x 24 in. LightWise IceScapes Pattern Aluminum-Clad Glass Block Window,Use the LightWise Pittsburgh Corning Aluminum-Clad Glass Block Window either in new construction or as a replacement. The Aluminum-Clad window comes with a setback nail fin for use with stucco and other standard finishes that can be removed for renovation installations. The window comes with Aluminum-Cladding on the exterior with a natural pine interior for staining or painting.Made in AmericaWill not scratch or discolorThree choices of pattern10-Year warranty,use lightwis pittsburgh corn aluminum clad glass block window either new construct replacement. aluminum clad window come setback nail fin use stucco standard finish remov renov installations. window come aluminum clad exterior natur pine interior stain painting.mad americawil scratch discolorthre choic pattern10 year warranti,0.333333,0.0,3,8,0.4,0.0,0.142857,1.0,0.0,0.0,14,44,0,0,0.0,0.0,40,1,6,1,100,12,25,12,0,0,0,,0,0.985833,,0.0


In [32]:
train.sort_values(['pred_diff'], ascending=True)[:50]

Unnamed: 0_level_0,product_uid,product_title,search_term,relevance,orig_search_term,orig_product_title,orig_descr,descr,match_product_title,match_descr,len_search_term_words,len_search_term_letters,match_pos_title,match_pos_descr,prod_title_ind,descr_ind,words_std_title,words_std_descr,len_title,len_descr,last_word_title,last_word_descr,match_orig_title,match_orig_descr,fuzzy_title,fuzzy_descr,fuzzy_orig_title,fuzzy_orig_descr,part_fuzzy_title,part_fuzzy_descr,part_fuzzy_orig_title,part_fuzzy_orig_descr,first_word_title,first_word_descr,match_numbers_title,brand,fuzzy_brand,pred_diff,bullets,match_bullets
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1
69679,119478,romano 4 ft. boxwood spiral topiari tree,topiari tree,1.0,topiary tree,Romano 4 ft. Boxwood Spiral Topiary Tree,"Enhance your home with a Romano Boxwood Spiral Topiary Tree. This wonderful full bodied Boxwood Spiral Topiary Tree keeps its colors when all the rest lose their color and leaves. You can always look forward to the rich green tones of the Romano Boxwood Spiral Topiary Tree in your home. If you are looking for a High quality artificial tree to make a bold statement in your home, this is it.Light and mobile designOver 700 leavesIndoor/outdoorIron frameUV resistant rubber leaves7 in. potEnhances any room in your house",enhanc home romano boxwood spiral topiari tree. wonder full bodi boxwood spiral topiari tree keep color rest lose color leaves. alway look forward rich green tone romano boxwood spiral topiari tree home. look high qualiti artifici tree make bold statement home it.light mobil designov 700 leavesindoor outdooriron frameuv resist rubber leaves7 in. potenh room hous,1.0,1.0,2,12,1.0,1.0,0.714286,0.090909,0.25,16.0,7,55,1,1,0.0,0.5,100,100,100,100,100,100,100,100,1,1,0,,33,-1.60743,,0.0
136808,148897,earthquak 212cc tiller rear tine crt side shield,side shield,1.0,side shields,Earthquake 212cc Tiller Rear Tine CRT with Side Shields,"The Earthquake 6015V rear tine rototiller delivers the ultimate combination of power and size. Rear-mounted tines till stubborn dirt easily with a high-performance 212cc Viper engine. We have engineered a compact rear tine rototiller with the same power as its larger counterparts.Easy - single-handed control through sodEarthquake 6015V rear tineCompact - full-sized power in a compact frameDurable - cast iron, bronze gear drive transmission",earthquak 6015v rear tine rototil deliv ultim combin power size. rear mount tine till stubborn dirt easili high perform 212cc viper engine. engin compact rear tine rototil power larger counterparts.easi singl hand control sodearthquak 6015v rear tinecompact full size power compact framedur cast iron bronz gear drive transmiss,1.0,0.0,2,11,1.0,0.0,0.75,1.0,0.25,0.0,8,48,1,0,0.0,0.0,100,8,100,6,100,64,100,67,1,0,0,,0,-1.438943,,0.0
120624,140844,rachael ray 10 qt. cover stockpot,rachael ray,1.0,rachael ray,Rachael Ray 10 qt. Covered Stockpot,"Whether you're boiling pasta, making a batch of chili or cooking grains, this large, 10 quart stockpot is just the right size. When it's not on the stovetop, its two short side handles help it take up less space in your cabinet. The hard-anodized construction is preferred for its even heat distribution throughout the pan and up the sides. The nonstick cooking surface means you don't need to worry about food sticking or about scrubbing dirty pans - cleanup is a breeze. In fact, Rachael's stockpot is dishwasher safe for added convenience.Hard-anodized cookware is exceptionally durable, plus it heats quickly and evenly, reducing ""hot spots"" that can burn foodsA clear coat exterior makes this cookware dishwasher safe for convenienceLong-lasting nonstick interior lets food slide off with ease and makes cleanup quick and easyFun rubberized handles provide a comfortable grip and are oven safe to 350 degrees FahrenheitTempered glass lid to watch your foods cook without having to remove the lid to keep heat and moisture locked inLifetime limited warranty",whether boil pasta make batch chili cook grain larg 10 quart stockpot right size. stovetop two short side handl help take less space cabinet. hard anod construct prefer even heat distribut throughout pan sides. nonstick cook surfac mean need worri food stick scrub dirti pan cleanup breeze. fact rachael stockpot dishwash safe ad convenience.hard anod cookwar except durabl plus heat quick even reduc hot spot burn foodsa clear coat exterior make cookwar dishwash safe conveniencelong last nonstick interior let food slide eas make cleanup quick easyfun rubber handl provid comfort grip oven safe 350deg. fahrenheittemp glass lid watch food cook without remov lid keep heat moistur lock inlifetim limit warranti,1.0,0.5,2,11,1.0,0.5,0.0,0.436364,0.25,0.0,6,110,1,0,0.0,0.0,100,78,100,78,100,100,100,100,1,1,0,,33,-1.409658,,0.0
9508,101618,sure comfort 40gal. tall 3 year 34 000 btu natur gas water heater,hot water tank gas,1.0,hot water tank gas,"Sure Comfort 40 Gal. Tall 3 Year 34,000 BTU Natural Gas Water Heater","The Sure Comfort 40 Gal. Natural Gas Tall Water Heater comes with a 34,000 BTU/hour environmentally friendly low NOx burner that provides hot water for households with 2 to 4 people. The piezo ignition system ensures safe and easy startup. Its maintenance free combustion air intake system means there are no filters to clean and replace. A patented magnesium anode rod design protects the tanks from rust, providing longer tank life. Factory installed temperature and pressure relief valve and 3/4 in. water connections are included with the water heater for easy installation.California residents: due to requirements from air quality management districts in the state of California, please use the map below to confirm whether you live in an area that requires the purchase of ultra low NOx water heaters40 Gal. tank provides hot water for households with 2 to 4 peopleNatural gas water heater costs less to operate than electric modelsDelivers 67 Gal. of hot water in the first hour3 year limited warranty on tank and 1 year limited warranty on parts for your convenienceMaintenance free - no filter to clean or replacePiezo ignition system simplifies the startup processRated for installations up to 8,400 ft.Narrow tank profile fits in a variety of indoor spacesDesigned for easy replacement of your existing water heaterCall 1-855-400-2552 before noon for same-day service in most areas, or Visit our Water Heater Installation page for more information.Meets new 2015 NAECA StandardsCalifornia Residents - See if your county requires water heaters that meet Air Quality Management District Ultra low NOx (ULN) requirements:",sure comfort 40gal. natur gas tall water heater come 34 000 btu hour environment friend low nox burner provid hot water household 2 4 people. piezo ignit system ensur safe easi startup. mainten free combust air intak system mean filter clean replace. patent magnesium anod rod design protect tank rust provid longer tank life. factori instal temperatur pressur relief valv 3 4 in. water connect includ water heater easi installation.california resid due requir air qualiti manag district state california pleas use map confirm whether live area requir purchas ultra low nox water heaters40gal. tank provid hot water household 2 4 peoplenatur gas water heater cost less oper electr modelsdeliv 67gal. hot water first hour3 year limit warranti tank 1 year limit warranti part conveniencemainten free filter clean replacepiezo ignit system simplifi startup processr instal 8 400 ft. narrow tank profil fit varieti indoor spacesdesign easi replac exist water heatercal 1 855 400 2552 noon day servic area visit water heater instal page information.meet new 2015 naeca standardscalifornia resid see counti requir water heater meet air qualiti manag district ultra low nox uln requir,0.5,1.0,4,18,0.5,1.0,0.769231,0.021858,0.25,308.6875,13,183,1,1,0.0,1.0,67,100,67,100,100,100,100,100,0,1,0,sure comfort,33,-1.372669,california resid due requir air qualiti manag district state california pleas use map confirm whether live area requir purchas ultra low nox water heater 40gal. tank provid hot water household 2 4 peopl natur gas water heater cost less oper electr model deliv 67gal. hot water first hour 3 year limit warranti tank 1 year limit warranti part conveni mainten free filter clean replac piezo ignit system simplifi startup process rate instal 8 400 ft. narrow tank profil fit varieti indoor space,0.04878
156918,160006,werner 14 ft. fiberglass round rung straight ladder 375 lb. load capac type iaa duti rate,14 ft. ladder,1.0,14 ft ladder,Werner 14 ft. Fiberglass Round Rung Straight Ladder with 375 lb. Load Capacity Type IAA Duty Rating,"The 7114-1 one-section round rung 14 ft. Straight Ladder is made with non-conductive fiberglass rails. Easy to transport and maneuver, these ladders bear a duty rating of 375 lbs. making them durable and long lasting. The slip-resistant, TRACTION-TRED rungs are round, and the durable rail shields and shoe brackets help to protect the rails from damage. The plate and rung assemblies are riveted at four points to the side rails.ALFLO TWIST-PROOF performanceMar resistant end capsSlip resistant round rungsShoe with pad and spur plateCannot ship to home, available for store delivery only",7114 1 one section round rung 14 ft. straight ladder made non conduct fiberglass rails. easi transport maneuv ladder bear duti rate 375 lb. make durabl long lasting. slip resist traction tred rung round durabl rail shield shoe bracket help protect rail damage. plate rung assembl rivet four point side rails.alflo twist proof performancemar resist end capsslip resist round rungssho pad spur platecannot ship home avail store deliveri,1.0,1.0,3,13,1.0,1.0,0.0625,0.088235,6.888889,1.555556,16,68,1,1,0.333333,0.333333,100,100,100,100,100,100,100,100,1,1,1,,33,-1.367846,,0.0
17236,103041,ortho home defens max 1.33gal. perimet indoor insect killer wand,ant killer,1.0,ant killer,Ortho Home Defense Max 1.33 Gal. Perimeter and Indoor Insect Killer with Wand,"The Ortho Home Defense Max 1.33 Gal. Ready-to-Use Perimeter and Indoor Insect Killer is designed for interior and exterior use to kill ants, roaches, spiders and other pests and to help keep new ones from entering your home. This product features a sprayer for application of the fast-drying, non-staining formula.Kills interior bugs to help keep areas free of pestsHelps to prevent exterior bugs from entering your homeExtended reach ergonomic comfort wand with multiple spray settings offers one touch continuous sprayFormula is non-staining, odor free and quick drying1 application provides up to 12 months of interior protection against crickets, carpet beetles, earwigs, firebrats, moths, silverfish and certain spidersExtended reach wand with multiple spray settings",ortho home defens max 1.33gal. readi use perimet indoor insect killer design interior exterior use kill ant roach spider pest help keep new one enter home. product featur sprayer applic fast dri non stain formula.kil interior bug help keep area free pestshelp prevent exterior bug enter homeextend reach ergonom comfort wand multipl spray set offer one touch continu sprayformula non stain odor free quick drying1 applic provid 12 month interior protect cricket carpet beetl earwig firebrat moth silverfish certain spidersextend reach wand multipl spray set,0.5,1.0,2,10,0.5,0.5,0.8,0.117647,0.0,9.0,10,85,1,1,0.0,0.0,75,100,75,75,100,100,100,100,0,1,0,ortho,20,-1.275038,kill interior bug help keep area free pest help prevent exterior bug enter home extend reach ergonom comfort wand multipl spray set offer one touch continu spray formula non stain odor free quick dri 1 applic provid 12 month interior protect cricket carpet beetl earwig firebrat moth silverfish certain spider extend reach wand multipl spray set,0.0
204530,192555,heath bird stop blue ceram wild bird feeder,bird stop,1.0,bird stops,Heath Bird Stop Blue Ceramic Wild Bird Feeder,"The Bird Stop Blue Ceramic Wild Bird Feeder features a blend of ceramic and metal construction. The circular openings are ideal for feeding sunflower seeds, shelled peanuts or mealworms. Holds up to 1 lb. of seed. Drainage holes in the tray help keep seed dry. The top removes for refilling.Sleek, attractive design pops in any gardenVersatile design holds mealworms, shelled peanuts or sunflower seedsCeramic and metal constructionFully disassembles for cleaning",bird stop blue ceram wild bird feeder featur blend ceram metal construction. circular open ideal feed sunflow seed shell peanut mealworms. hold 1 lb. seed. drainag hole tray help keep seed dry. top remov refilling.sleek attract design pop gardenversatil design hold mealworm shell peanut sunflow seedsceram metal construct disassembl clean,1.0,1.0,2,9,0.5,0.5,0.125,0.0,0.25,0.25,8,50,1,1,0.0,0.0,100,100,57,57,100,100,100,100,1,1,0,heath,20,-1.252272,sleek attract design pop garden versatil design hold mealworm shell peanut sunflow seed ceram metal construct fulli disassembl clean,0.0
96532,129999,brasscraft 3 8 in. o.d. 36 in. copper faucet riser chrome,copper faucet,1.0,copper faucet,BrassCraft 3/8 in. O.D. x 36 in. Copper Faucet Riser in Chrome,"BrassCraft 3/8 in. O.D. tube x 36 in. length chrome-plated copper riser for faucet installations features one-piece formed metal nosepiece with insert. Cut-to-Length for flexible fit with a clean, professional appearance. For use in potable water distribution systems in accessible locations only.3/8 in. O.D. x 36 in. length copper riser for faucet installations1-piece formed metal nosepiece with insertChrome platedCut-to-length for flexible fit and clean, professional appearanceFor use in potable water distribution systems in accessible locations onlyTemperature rating: 40 to 180 FPressure rating: 125 psi maximumNo-lead certifiedIAPMO and CSA listed to ASME A112.18.1/CSA B125.1Designed, machined and assembled in the USA",brasscraft 3 8 in. o.d. tube 36 in. length chrome plate copper riser faucet instal featur one piec form metal nosepiec insert. cut length flexibl fit clean profession appearance. use potabl water distribut system access locat only.3 8 in. o.d. 36 in. length copper riser faucet installations1 piec form metal nosepiec insertchrom platedcut length flexibl fit clean profession appearancefor use potabl water distribut system access locat onlytemperatur rate 40 180 fpressur rate 125 psi maximumno lead certifiediapmo csa list asm a112.18.1 csa b125.1design machin assembl usa,1.0,1.0,2,13,1.0,0.5,0.636364,0.127907,0.25,1.0,11,86,1,1,0.0,1.0,100,100,100,100,100,100,100,100,1,1,0,,33,-1.222854,,0.0
176724,172318,green matter 3 light mahogani bronz vaniti fixtur,bronz green,1.0,bronze green,Green Matters 3-Light Mahogany Bronze Vanity Fixture,"The cornerstone of this collection is quality, and this incandescent vanity is no exception. Combining a classic mahogany bronze finish and posh styling, you will find no better way to highlight the charm of your home. With the superb craftsmanship and affordable price this fixture is sure to tastefully indulge your extravagant side.Energy Star qualified to meet or exceed federal guidelines for energy efficiency for year-round energy and money savingsMetal construction with a mahogany bronze finishChampagne washed linen glass shadesUses (3) CFL bulbs, 13-Watt maximum (included)Easy installation instructions and template enclosed for convenient setupIdeal for providing illumination in bathroomsUL listedIncludes installation hardwareHome Depot Protection Plan:Click image to enlarge: br bra href=""https://idm.homedepot.com/assets/image/52/52993f59-930d-4515-bdfa-d6a69de7228b.jpg"" target=""_blank""img src=""https://idm.homedepot.com/assets/image/52/52993f59-930d-4515-bdfa-d6a69de7228b.jpg"" width =""416"" height = ""416""/abr",cornerston collect qualiti incandesc vaniti exception. combin classic mahogani bronz finish posh style find better way highlight charm home. superb craftsmanship afford price fixtur sure tast indulg extravag side.energi star qualifi meet exceed feder guidelin energi effici year round energi money savingsmet construct mahogani bronz finishchampagn wash linen glass shadesus 3 cfl bulb 13 watt maximum includ easi instal instruct templat enclos conveni setupid provid illumin bathroomsul listedinclud instal hardwarehom depot protect plan click imag enlarg br bra href https idm.homedepot.com asset imag 52 52993f59 930d 4515 bdfa d6a69de7228b.jpg target blank img src https idm.homedepot.com asset imag 52 52993f59 930d 4515 bdfa d6a69de7228b.jpg width 416 height 416 abr,1.0,0.5,2,11,0.5,0.5,0.0,0.083333,6.25,0.0,8,108,1,0,0.0,0.5,100,62,100,67,100,100,100,100,1,1,0,green matter,100,-1.19889,energi star qualifi meet exceed feder guidelin energi effici year round energi money save metal construct mahogani bronz finish champagn wash linen glass shade use 3 cfl bulb 13 watt maximum includ easi instal instruct templat enclos conveni setup ideal provid illumin bathroom ul list includ instal hardwar,0.020833
75580,121656,liquid wrench 32 fl. oz. hydraul jack oil,hydraul jack renat,1.0,hydraulic jack renat,Liquid Wrench 32 fl. oz. Hydraulic Jack Oil,"For use in all hydraulic jacks, snow plows and refillable shock absorbers. Intermixes completely with all other standard hydraulic jack fluids. This is 20 weight oil, ISO rating of 40 and 43 CST at 40C.California residents: see&nbsp;Proposition 65 informationFormulated with extreme pressure additivesFor use in all hydraulic jacksAlso good for snow plowsIntermixes with standard hydraulic jack fluids",use hydraul jack snow plow refil shock absorbers. intermix complet standard hydraul jack fluids. 20 weight oil iso rate 40 43 cst 40c.california resid see nbsp proposit 65 in. formationformul extrem pressur additivesfor use hydraul jacksalso good snow plowsintermix standard hydraul jack fluid,0.666667,0.666667,3,18,0.666667,0.666667,0.625,0.023256,0.25,0.25,8,43,0,0,0.0,0.666667,80,80,82,82,100,100,100,100,1,1,0,liquid wrench,38,-1.188314,formul extrem pressur addit use hydraul jack also good snow plow intermix standard hydraul jack fluid,0.125


In [34]:
train['look_part'] = train['search_term'].str.contains('(part|case|cover|tool|kit)')
train['is_part'] = train['product_title'].str.contains('(part|case|cover|tool|kit)')

test['look_part'] = test['search_term'].str.contains('(part|case|cover|tool|kit)')
test['is_part'] = test['product_title'].str.contains('(part|case|cover|tool|kit)')

tree_cols = ['match_product_title', 'match_descr',
             'len_search_term_words', 'len_search_term_letters',
             'match_pos_title', 'match_pos_descr',
             'prod_title_ind', 'descr_ind',
             'words_std_title', 'words_std_descr',
             'len_title', 'len_descr',
             'last_word_title', 'last_word_descr',
             'match_orig_title', 'match_orig_descr',
             'fuzzy_title', 'fuzzy_descr', 'fuzzy_orig_title', 'fuzzy_orig_descr',
             'part_fuzzy_title', 'part_fuzzy_descr', 'part_fuzzy_orig_title', 'part_fuzzy_orig_descr',
             'first_word_title', 'first_word_descr',
             'match_numbers_title',
             'fuzzy_brand',
             'match_bullets',
             'look_part', 'is_part']

use_cols = tree_cols + ['search_term', 'product_title', 'descr', 'brand', 'bullets']

clf = fitter5(tsvd5=20)
X_train = clf.transform(train[use_cols])
X_test = clf.transform(test[use_cols])

gbm = lgb.LGBMRegressor(learning_rate=0.03, colsample_bytree=0.5, num_leaves=80, n_estimators=1000)
gbm.fit(X_train, y)

tc_len = len(tree_cols)
print(dict(zip(tree_cols, gbm.feature_importances_[:tc_len]) ))
print(gbm.feature_importances_[tc_len:])

write_submission(gbm, X_test)

  """Entry point for launching an IPython kernel.
  
  after removing the cwd from sys.path.
  """


{'match_product_title': 282, 'match_descr': 130, 'len_search_term_words': 215, 'len_search_term_letters': 623, 'match_pos_title': 142, 'match_pos_descr': 171, 'prod_title_ind': 574, 'descr_ind': 377, 'words_std_title': 209, 'words_std_descr': 272, 'len_title': 365, 'len_descr': 346, 'last_word_title': 74, 'last_word_descr': 64, 'match_orig_title': 96, 'match_orig_descr': 159, 'fuzzy_title': 521, 'fuzzy_descr': 414, 'fuzzy_orig_title': 527, 'fuzzy_orig_descr': 364, 'part_fuzzy_title': 188, 'part_fuzzy_descr': 81, 'part_fuzzy_orig_title': 203, 'part_fuzzy_orig_descr': 69, 'first_word_title': 76, 'first_word_descr': 35, 'match_numbers_title': 110, 'fuzzy_brand': 293, 'match_bullets': 202, 'look_part': 11, 'is_part': 44}
[375 347 272 266 325 273 253 305 248 311 305 294 278 332 290 273 266 288
 273 352 262 309 238 284 306 272 283 278 251 302 255 302 279 339 277 283
 282 331 278 281 283 256 266 291 266 310 274 270 304 275 324 271 273 295
 353 296 300 319 322 330 241 375 305 334 291 345 326 2

0.46728

In [35]:
gbm = lgb.LGBMRegressor(learning_rate=0.02, colsample_bytree=0.7, num_leaves=100, n_estimators=1000)
gbm.fit(X_train, y)

tc_len = len(tree_cols)
print(dict(zip(tree_cols, gbm.feature_importances_[:tc_len]) ))
print(gbm.feature_importances_[tc_len:])

write_submission(gbm, X_test)

{'match_product_title': 390, 'match_descr': 184, 'len_search_term_words': 356, 'len_search_term_letters': 915, 'match_pos_title': 190, 'match_pos_descr': 205, 'prod_title_ind': 859, 'descr_ind': 559, 'words_std_title': 283, 'words_std_descr': 375, 'len_title': 520, 'len_descr': 423, 'last_word_title': 118, 'last_word_descr': 102, 'match_orig_title': 171, 'match_orig_descr': 170, 'fuzzy_title': 681, 'fuzzy_descr': 505, 'fuzzy_orig_title': 728, 'fuzzy_orig_descr': 428, 'part_fuzzy_title': 244, 'part_fuzzy_descr': 124, 'part_fuzzy_orig_title': 225, 'part_fuzzy_orig_descr': 90, 'first_word_title': 117, 'first_word_descr': 59, 'match_numbers_title': 160, 'fuzzy_brand': 417, 'match_bullets': 268, 'look_part': 5, 'is_part': 65}
[520 466 380 338 417 300 358 349 292 362 350 312 355 456 419 306 321 349
 370 384 354 360 333 355 310 370 363 346 303 412 330 342 363 414 332 369
 349 412 404 346 314 337 414 339 345 411 336 351 340 316 400 364 337 372
 431 328 352 427 444 364 364 450 351 351 387 405 4

0.46654

In [45]:
tree_cols = ['match_product_title', 'match_descr',
             'len_search_term_words', 'len_search_term_letters',
             'match_pos_title', 'match_pos_descr',
             'prod_title_ind', 'descr_ind',
             'words_std_title', 'words_std_descr',
             'len_title', 'len_descr',
             'last_word_title', 'last_word_descr',
             'match_orig_title', 'match_orig_descr',
             'fuzzy_title', 'fuzzy_descr', 'fuzzy_orig_title', 'fuzzy_orig_descr',
             'part_fuzzy_title', 'part_fuzzy_descr', 'part_fuzzy_orig_title', 'part_fuzzy_orig_descr',
             'first_word_title', 'first_word_descr',
             'match_numbers_title',
             'fuzzy_brand',
             'match_bullets',
             'look_part', 'is_part']

use_cols = tree_cols + ['search_term', 'product_title', 'descr', 'brand', 'bullets']

clf = fitter5(tsvd4=20, tsvd5=20)
X_train = clf.transform(train[use_cols])
X_test = clf.transform(test[use_cols])

gbm = lgb.LGBMRegressor(learning_rate=0.02, colsample_bytree=0.7, num_leaves=100, n_estimators=1000)
gbm.fit(X_train, y)

tc_len = len(tree_cols)
print(dict(zip(tree_cols, gbm.feature_importances_[:tc_len]) ))
print(gbm.feature_importances_[tc_len:])

write_submission(gbm, X_test)

{'match_product_title': 387, 'match_descr': 217, 'len_search_term_words': 375, 'len_search_term_letters': 926, 'match_pos_title': 171, 'match_pos_descr': 200, 'prod_title_ind': 856, 'descr_ind': 544, 'words_std_title': 275, 'words_std_descr': 362, 'len_title': 548, 'len_descr': 359, 'last_word_title': 122, 'last_word_descr': 103, 'match_orig_title': 165, 'match_orig_descr': 161, 'fuzzy_title': 686, 'fuzzy_descr': 449, 'fuzzy_orig_title': 751, 'fuzzy_orig_descr': 427, 'part_fuzzy_title': 239, 'part_fuzzy_descr': 102, 'part_fuzzy_orig_title': 245, 'part_fuzzy_orig_descr': 69, 'first_word_title': 131, 'first_word_descr': 61, 'match_numbers_title': 169, 'fuzzy_brand': 430, 'match_bullets': 252, 'look_part': 11, 'is_part': 72}
[543 426 357 343 421 356 380 353 272 331 356 304 362 453 381 325 365 373
 308 423 366 351 276 370 392 359 351 299 336 372 341 331 350 462 351 338
 386 331 341 358 409 372 373 364 360 363 339 351 381 363 379 343 317 379
 389 415 339 406 391 377 340 468 345 412 361 386 

In [44]:
train.to_csv('zip/train8.csv.gz', compression='gzip')
test.to_csv('zip/test8.csv.gz', compression='gzip')