#### pip install spacy openpyxl
#### python -m spacy download en_core_web_lg


In [20]:
import pandas as pd

# Load your CSV files
categories_df = pd.read_excel("data/business_category_taxonomy.xlsx", engine='openpyxl')
domains_df = pd.read_excel("data/Naics3_taxonomy.xlsx", engine='openpyxl')
business_df = pd.read_csv("data/tournament_hints_data.csv", nrows=100000)

In [21]:
business_df

Unnamed: 0,commercial_name,business_tags,short_description,description,main_business_category
0,White Horse,Tile Manufacturing | European Aesthetics Ceram...,White Horse is highly regarded as a tile trail...,White Horse Ceramic Singapore is a leading man...,Tile Store
1,Wealth Solution Partners,Super and SMSF Services | Financial Planning a...,"WSP, Wealth Solution Partners, Financial Plann...",Wealth Solution Partners Pty Ltd is an indepen...,Investment Consultants & Financial Advisors
2,PMG,Fire and Water Cleanup Services | Mold Remedia...,PMG General Solutions Inc. is an environmental...,PMG General Solutions Inc. is an environmental...,Damage Restoration & Mold Remediation
3,TMP Capital PLLC,Licensed in AL & FL | 203K Loans | 15-year Fix...,TMP Capital PLLC Consulting Company Franklin M...,"TMP Capital PLLC Consulting Company, also know...",Mortgage Brokers
4,Genertek Power,Industrial and Commercial Energy Storage | Ass...,Genertek Power Ltd a UK electricity systems & ...,Genertek Power Limited is a privately-owned UK...,Renewable energy companies
...,...,...,...,...,...
99995,Karma Hardware,Wholesale | Brass Hook Marble Tower Bolt | Coa...,"Karma Hardware - Manufacturer of Door Hinges, ...",Karma Hardware is a manufacturer based in Koth...,Hardware Stores
99996,Rainbow Paper,Bulky Newsprint | Parchment Paper & Parchment ...,Rainbow Paper offers a huge selection of art &...,Rainbow Paper is an Australian-owned brand tha...,Paper & Cardboard Products
99997,Blue Support Services,24/7 Facility Management | Building Maintenanc...,Blue Support Services provides a flexible and ...,Blue Support Services is a company that offers...,Security Guards & Patrol Services
99998,Digital Citizen,Bespoke Resourcing Solutions | Cross-platform ...,Digital Citizen is a NYC-based digital product...,Digital Citizen is a New York City-based digit...,Digital & Marketing Agencies


In [22]:
import spacy
import string
from sklearn.metrics.pairwise import cosine_similarity
import re

# Load the medium spaCy model
nlp = spacy.load("en_core_web_lg")


def remove_mentions_and_hashtags(text):
    text = re.sub(r"@\w+", "", text)
    text = re.sub(r"#\w+", "", text)
    return text


def remove_numbers(text):
    text = re.sub(r"\d+", "", text)
    return text


def remove_punctuation(text):

    translator = str.maketrans("", "", string.punctuation)

    # Remove punctuation using the translation table
    text_without_punct = text.translate(translator)

    return text_without_punct


def remove_stopwords(text):
    filtered_sentence = []
    doc = nlp(text)
    for token in doc:
        if token.is_stop == False:
            filtered_sentence.append(token.text)
    return " ".join(filtered_sentence)


def lemmatize(text):
    doc = nlp(text)
    text = " ".join([token.lemma_ for token in doc])
    return text


def clean_text(text, to_lemmatize: bool = True):
    # Standardize text
    # text = standardize_accented_chars(text)

    # Remove URLs
    text = re.sub(r"http\S+", "", text)

    # Remove mentions and hashtags
    text = remove_mentions_and_hashtags(text)

    # Lowercase
    text = text.lower()

    # Remove punctuation
    text = remove_punctuation(text)

    # Remove numbers
    text = remove_numbers(text)

    # Remove all the special characters
    text = re.sub(r"\W", " ", text)

    # Remove stopwords
    text = remove_stopwords(text)

    # Substituting multiple spaces with single space
    text = re.sub(r"\s+", " ", text, flags=re.I)

    if to_lemmatize:
        text = lemmatize(text)

    return text


categories_df["clean_description"] = categories_df["description"].apply(
    lambda x: clean_text(x)
)
domains_df["clean_description"] = domains_df["description"].apply(
    lambda x: clean_text(x)
)

categories_df["description_vector"] = categories_df["clean_description"].apply(nlp)
domains_df["description_vector"] = domains_df["clean_description"].apply(nlp)

In [23]:
categories_df[:1]

Unnamed: 0,label,description,clean_description,description_vector
0,ATVs Dealers & Services,Businesses categorized under ATVs Dealers & Se...,business categorize atvs dealer service specia...,"(business, categorize, atvs, dealer, service, ..."


In [24]:
def find_best_match(cat_desc_vec, label_vectors):
    # Calculate similarities and return the index of the highest similarity
    similarities = [
        cat_desc_vec.similarity(label_desc) for label_desc in label_vectors
    ]
    return similarities.index(max(similarities))


# Convert domain vectors to a list for efficiency
label_vectors = domains_df["description_vector"].tolist()

# Find the best match for each category
categories_df["best_match_label_index"] = categories_df["description_vector"].apply(
    lambda x: find_best_match(x, label_vectors)
)

# Map the index to the actual domain name
categories_df["best_match_label"] = categories_df["best_match_label_index"].apply(
    lambda x: domains_df.iloc[x]["naics_label"]
)

In [25]:
categories_df

Unnamed: 0,label,description,clean_description,description_vector,best_match_label_index,best_match_label
0,ATVs Dealers & Services,Businesses categorized under ATVs Dealers & Se...,business categorize atvs dealer service specia...,"(business, categorize, atvs, dealer, service, ...",68,Rental and Leasing Services
1,Abortion Clinics,Abortion Clinics provide medical services rela...,abortion clinic provide medical service relate...,"(abortion, clinic, provide, medical, service, ...",76,Hospitals
2,Accounting & Bookkeeping Services,Accounting & Bookkeeping Services encompass a ...,accounting bookkeeping service encompass range...,"(accounting, bookkeeping, service, encompass, ...",72,Administrative and Support Services
3,Acupuncture clinic,An Acupuncture clinic is a healthcare facility...,acupuncture clinic healthcare facility special...,"(acupuncture, clinic, healthcare, facility, sp...",75,Ambulatory Health Care Services
4,Adhesives & Sealants,Adhesives & Sealants encompass a diverse range...,adhesive sealant encompass diverse range produ...,"(adhesive, sealant, encompass, diverse, range,...",32,Miscellaneous Manufacturing
...,...,...,...,...,...,...
598,Work Clothing & Protection Equipment,Work Clothing & Protection Equipment pertains ...,work clothing protection equipment pertain spe...,"(work, clothing, protection, equipment, pertai...",84,Repair and Maintenance
599,Writers & Copywriters,Writers & Copywriters are skilled professional...,writer copywriter skilled professional special...,"(writer, copywriter, skilled, professional, sp...",57,Publishing Industries
600,YMCA Camps,YMCA Camps offer structured programs and activ...,ymca camp offer structured program activity de...,"(ymca, camp, offer, structured, program, activ...",85,Personal and Laundry Services
601,Yoga Studios,Yoga Studios offer a tranquil space for indivi...,yoga studio offer tranquil space individual en...,"(yoga, studio, offer, tranquil, space, individ...",79,"Performing Arts, Spectator Sports, and Related..."


In [26]:
business_df[:5]

Unnamed: 0,commercial_name,business_tags,short_description,description,main_business_category
0,White Horse,Tile Manufacturing | European Aesthetics Ceram...,White Horse is highly regarded as a tile trail...,White Horse Ceramic Singapore is a leading man...,Tile Store
1,Wealth Solution Partners,Super and SMSF Services | Financial Planning a...,"WSP, Wealth Solution Partners, Financial Plann...",Wealth Solution Partners Pty Ltd is an indepen...,Investment Consultants & Financial Advisors
2,PMG,Fire and Water Cleanup Services | Mold Remedia...,PMG General Solutions Inc. is an environmental...,PMG General Solutions Inc. is an environmental...,Damage Restoration & Mold Remediation
3,TMP Capital PLLC,Licensed in AL & FL | 203K Loans | 15-year Fix...,TMP Capital PLLC Consulting Company Franklin M...,"TMP Capital PLLC Consulting Company, also know...",Mortgage Brokers
4,Genertek Power,Industrial and Commercial Energy Storage | Ass...,Genertek Power Ltd a UK electricity systems & ...,Genertek Power Limited is a privately-owned UK...,Renewable energy companies


In [27]:
final_data = pd.merge(business_df, categories_df[['label','best_match_label_index','best_match_label']], left_on="main_business_category",right_on="label")
final_data.drop(columns=['label'], inplace=True)

In [28]:
final_data

Unnamed: 0,commercial_name,business_tags,short_description,description,main_business_category,best_match_label_index,best_match_label
0,White Horse,Tile Manufacturing | European Aesthetics Ceram...,White Horse is highly regarded as a tile trail...,White Horse Ceramic Singapore is a leading man...,Tile Store,31,Furniture and Related Product Manufacturing
1,Wealth Solution Partners,Super and SMSF Services | Financial Planning a...,"WSP, Wealth Solution Partners, Financial Plann...",Wealth Solution Partners Pty Ltd is an indepen...,Investment Consultants & Financial Advisors,64,"Securities, Commodity Contracts, and Other Fin..."
2,PMG,Fire and Water Cleanup Services | Mold Remedia...,PMG General Solutions Inc. is an environmental...,PMG General Solutions Inc. is an environmental...,Damage Restoration & Mold Remediation,84,Repair and Maintenance
3,TMP Capital PLLC,Licensed in AL & FL | 203K Loans | 15-year Fix...,TMP Capital PLLC Consulting Company Franklin M...,"TMP Capital PLLC Consulting Company, also know...",Mortgage Brokers,64,"Securities, Commodity Contracts, and Other Fin..."
4,Genertek Power,Industrial and Commercial Energy Storage | Ass...,Genertek Power Ltd a UK electricity systems & ...,Genertek Power Limited is a privately-owned UK...,Renewable energy companies,8,Utilities
...,...,...,...,...,...,...,...
99995,Karma Hardware,Wholesale | Brass Hook Marble Tower Bolt | Coa...,"Karma Hardware - Manufacturer of Door Hinges, ...",Karma Hardware is a manufacturer based in Koth...,Hardware Stores,37,Building Material and Garden Equipment and Sup...
99996,Rainbow Paper,Bulky Newsprint | Parchment Paper & Parchment ...,Rainbow Paper offers a huge selection of art &...,Rainbow Paper is an Australian-owned brand tha...,Paper & Cardboard Products,20,Printing and Related Support Activities
99997,Blue Support Services,24/7 Facility Management | Building Maintenanc...,Blue Support Services provides a flexible and ...,Blue Support Services is a company that offers...,Security Guards & Patrol Services,64,"Securities, Commodity Contracts, and Other Fin..."
99998,Digital Citizen,Bespoke Resourcing Solutions | Cross-platform ...,Digital Citizen is a NYC-based digital product...,Digital Citizen is a New York City-based digit...,Digital & Marketing Agencies,72,Administrative and Support Services


In [29]:
final_data.to_csv('labelled_data.csv', index=False)


In [30]:
final_data[final_data['main_business_category'] == 'Beauty Salons']


Unnamed: 0,commercial_name,business_tags,short_description,description,main_business_category,best_match_label_index,best_match_label
243,CARIDAD VIDRO,On-location Services for Private Events | Brid...,"Welcome to CARIDAD VIDRO SHOP, wedding favors ...",Caridad VIDRO is a professional hair and makeu...,Beauty Salons,41,Health and Personal Care Retailers
680,Azalea Beauty Consultancy India,Sourcing and Purchasing Services | Layout Plan...,Initial and ongoing training and support for S...,Azalea Beauty Consultancy India is a company t...,Beauty Salons,41,Health and Personal Care Retailers
1469,BEAUTIFUL REMEDY,Waxing Services | Permanent Cosmetics | Facial...,BEAUTIFUL REMEDY is a beauty salon based in Ka...,Beautiful Remedy LLC is a beauty company that ...,Beauty Salons,41,Health and Personal Care Retailers
1674,Sogno The Salon Company,Personal Care Services | Hair Coloring Service...,Sogno The Salon Company is a beauty salon base...,Sogno The Salon Company is a beauty salon loca...,Beauty Salons,41,Health and Personal Care Retailers
2244,Eman Beauty Lab,Beauty Treatments | Beauty Services | Retail |...,"Welcome to Eman Beauty Lab, our Nail Salon in ...",Eman Beauty Lab is a nail salon located in Asc...,Beauty Salons,41,Health and Personal Care Retailers
...,...,...,...,...,...,...,...
98062,Polish Me Pink,Beauty & Care | Beauty Services,"Introducing Polish Me Pink, your ultimate solu...",Polish Me Pink is a company that offers a cura...,Beauty Salons,41,Health and Personal Care Retailers
98882,Laura J. Crawford,Electrolysis Services | Brazilian Waxing | Hai...,"Laura J. Crawford - Hair Removal, Brazilian Wa...",Laura J. Crawford is a hair removal studio loc...,Beauty Salons,41,Health and Personal Care Retailers
99500,The Vanity Co.,Personal Care Services | Microblading Services...,The Vanity Co. is hair salon that been a stapl...,The Vanity Co. Salon & Esthetics is a hair sal...,Beauty Salons,41,Health and Personal Care Retailers
99618,Yellow Door Studio,Acupuncture | Massage Services | Reiki Service...,"Yellow Door Studio is a fully licensed, artful...","Yellow Door Studio is a fully licensed, artful...",Beauty Salons,41,Health and Personal Care Retailers


In [31]:
description = "NU U 2 Hair & Beauty is a professional hair and beauty salon located in Milton Regis High Street, Sittingbourne. They offer a variety of hair and body treatments, including hair cutting and styling, as well as other beauty treatments such as waxing, eye treatments, facials, massage, body treatments and permanent make-up."

In [32]:
final_data["clean_description"] = final_data["description"].apply(
    lambda x: clean_text(x)
)

final_data["description_vector"] = final_data["clean_description"].apply(nlp)

In [36]:
final_data

Unnamed: 0,commercial_name,business_tags,short_description,description,main_business_category,best_match_label_index,best_match_label,clean_description,description_vector
0,White Horse,Tile Manufacturing | European Aesthetics Ceram...,White Horse is highly regarded as a tile trail...,White Horse Ceramic Singapore is a leading man...,Tile Store,31,Furniture and Related Product Manufacturing,white horse ceramic singapore lead manufacture...,"(white, horse, ceramic, singapore, lead, manuf..."
1,Wealth Solution Partners,Super and SMSF Services | Financial Planning a...,"WSP, Wealth Solution Partners, Financial Plann...",Wealth Solution Partners Pty Ltd is an indepen...,Investment Consultants & Financial Advisors,64,"Securities, Commodity Contracts, and Other Fin...",wealth solution partner pty ltd independent fi...,"(wealth, solution, partner, pty, ltd, independ..."
2,PMG,Fire and Water Cleanup Services | Mold Remedia...,PMG General Solutions Inc. is an environmental...,PMG General Solutions Inc. is an environmental...,Damage Restoration & Mold Remediation,84,Repair and Maintenance,pmg general solutions inc environmental remedi...,"(pmg, general, solutions, inc, environmental, ..."
3,TMP Capital PLLC,Licensed in AL & FL | 203K Loans | 15-year Fix...,TMP Capital PLLC Consulting Company Franklin M...,"TMP Capital PLLC Consulting Company, also know...",Mortgage Brokers,64,"Securities, Commodity Contracts, and Other Fin...",tmp capital pllc consult company know franklin...,"(tmp, capital, pllc, consult, company, know, f..."
4,Genertek Power,Industrial and Commercial Energy Storage | Ass...,Genertek Power Ltd a UK electricity systems & ...,Genertek Power Limited is a privately-owned UK...,Renewable energy companies,8,Utilities,genertek power limit privatelyowned uk investm...,"(genertek, power, limit, privatelyowned, uk, i..."
...,...,...,...,...,...,...,...,...,...
99995,Karma Hardware,Wholesale | Brass Hook Marble Tower Bolt | Coa...,"Karma Hardware - Manufacturer of Door Hinges, ...",Karma Hardware is a manufacturer based in Koth...,Hardware Stores,37,Building Material and Garden Equipment and Sup...,karma hardware manufacturer base kothariya raj...,"(karma, hardware, manufacturer, base, kothariy..."
99996,Rainbow Paper,Bulky Newsprint | Parchment Paper & Parchment ...,Rainbow Paper offers a huge selection of art &...,Rainbow Paper is an Australian-owned brand tha...,Paper & Cardboard Products,20,Printing and Related Support Activities,rainbow paper australianowned brand specialize...,"(rainbow, paper, australianowned, brand, speci..."
99997,Blue Support Services,24/7 Facility Management | Building Maintenanc...,Blue Support Services provides a flexible and ...,Blue Support Services is a company that offers...,Security Guards & Patrol Services,64,"Securities, Commodity Contracts, and Other Fin...",blue support service company offer flexible in...,"(blue, support, service, company, offer, flexi..."
99998,Digital Citizen,Bespoke Resourcing Solutions | Cross-platform ...,Digital Citizen is a NYC-based digital product...,Digital Citizen is a New York City-based digit...,Digital & Marketing Agencies,72,Administrative and Support Services,digital citizen new york citybase digital prod...,"(digital, citizen, new, york, citybase, digita..."


In [34]:
row_index = 5  # Replace with the desired index
specific_row = final_data.loc[row_index]
print(specific_row)


commercial_name                                             HQ Machine Tech
business_tags             Nickel Alloy Machining | ISO 9001 Certificatio...
short_description         HQ Machine Tech LLC is closely affiliated with...
description               HQ Machine Tech, LLC is a company that special...
main_business_category                         Machinery parts manufacturer
best_match_label_index                                                   27
best_match_label                                    Machinery Manufacturing
clean_description         hq machine tech llc company specialize provide...
description_vector        (hq, machine, tech, llc, company, specialize, ...
Name: 5, dtype: object


In [37]:
final_data.loc[find_best_match(nlp(description),final_data["description_vector"].tolist())]

commercial_name                                                       Mejes
business_tags             Beautification Products | Eye Masks | Portable...
short_description         In 1997, Lina Mejes founded the Mejes high-tec...
description               Mejes Beauté Sciences R & D is a French compan...
main_business_category                                     Cosmetics Stores
best_match_label_index                                                   34
best_match_label                     Merchant Wholesalers, Nondurable Goods
clean_description         mejes beauté sciences r d french company found...
description_vector        (mejes, beauté, sciences, r, d, french, compan...
Name: 27283, dtype: object

In [38]:
final_data.to_csv('./data/clean_description.csv', index=False)

In [47]:
description = "Nirmiti Advertising is an authorized agency that specializes in audio and video production for television and radio commercials, event management, and media planning. They are also authorized for various TV channels and radio stations and work in multiple languages such as Marathi, Hindi, English, Gujarati, and Kannada. Their services include event coverage, film production, TV serials, and hoardings. The company is known for their expertise in audio, video, and film production and event management."

final_data.loc[find_best_match(nlp(description),final_data["description_vector"].tolist())]

commercial_name                                          Peekaboo Animation
business_tags             New Media Production | TV Rights Management | ...
short_description         Peekaboo Animation produces and distributes ch...
description               Peekaboo Animation is a studio founded in 2015...
main_business_category                        Radio & Television Production
best_match_label_index                                                   58
best_match_label                         Broadcasting and Content Providers
clean_description         peekaboo animation studio found specialize dev...
description_vector        (peekaboo, animation, studio, found, specializ...
Name: 31705, dtype: object

In [55]:
# get just an item from the final_data list on column 'description_vector'

important_words = [token.text for token in final_data['description_vector'][0] if token.pos_ in ['NOUN', 'ADJ', 'VERB']]
important_words

['white',
 'lead',
 'manufacturer',
 'highquality',
 'ceramic',
 'offer',
 'wide',
 'range',
 'ceramic',
 'product',
 'residential',
 'commercial',
 'space',
 'establish',
 'company',
 'know',
 'commitment',
 'quality',
 'innovation',
 'artistry',
 'skilled',
 'artisan',
 'craft',
 'piece',
 'ceramic',
 'create',
 'beautiful',
 'functional',
 'product',
 'stand',
 'test',
 'time',
 'global',
 'presence',
 'country',
 'white',
 'horse',
 'ceramic',
 'synonymous',
 'style',
 'durability',
 'unmatched',
 'craftsmanship',
 'offer',
 'variety',
 'tile',
 'include',
 'ceramic',
 'tile',
 'stone',
 'tile',
 'company',
 'commit',
 'sustainability',
 'offer',
 'green',
 'label',
 'certify',
 'tile',
 'meet',
 'environmental',
 'performance',
 'health',
 'parameter']

In [54]:
len(final_data['description_vector'][0])

73

In [50]:
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

def extract_keywords_tfidf(documents, max_features=20):
    vectorizer = TfidfVectorizer(stop_words="english", max_features=max_features)
    tfidf_matrix = vectorizer.fit_transform(documents)
    feature_names = vectorizer.get_feature_names_out()
    # Create a dictionary to store keywords by document
    keywords = {i: [] for i in range(tfidf_matrix.shape[0])}
    for i, j in zip(*tfidf_matrix.nonzero()):
        keywords[i].append(feature_names[j])
    return keywords


# Apply keyword extraction
# documents = final_data["description"].tolist()
# keywords = extract_keywords_tfidf(documents)


# Function to filter keywords by POS tags and recompute vectors
def filter_keywords_recompute_vector(keywords):
    filtered_vectors = []
    for doc_keywords in keywords.values():
        doc = nlp(" ".join(doc_keywords))
        filtered_text = " ".join(
            [token.text for token in doc if token.pos_ in ["NOUN", "ADJ", "VERB"]]
        )
        filtered_vector = nlp(filtered_text).vector
        filtered_vectors.append(filtered_vector)
    return np.array(filtered_vectors)


# Recompute vectors with filtered keywords
filtered_vectors = filter_keywords_recompute_vector(final_data["description_vector"].to_list())

# Assign the new vectors to the dataframe
final_data["keywords"] = list(filtered_vectors)

AttributeError: 'list' object has no attribute 'values'

In [1]:
import pandas as pd

# Load the datasets
test_df = pd.read_csv("data/data.csv")
dataset = pd.read_csv("data/tournament_hints_data.csv")

In [3]:
from fuzzywuzzy import process


# Function to find best match for each item in round_1
def find_best_matches(row, choices):
    # This returns the best match, its score, and the index
    best_match = process.extractOne(row, choices)
    return best_match


# Apply the function to each element in the round_1 column
test_df["best_match"] = test_df["round_1"].apply(
    find_best_matches, choices=dataset["commercial_name"].tolist()
)

# Split the tuple of best match into separate columns if needed
test_df[["match_name", "match_score", "match_index"]] = pd.DataFrame(
    test_df["best_match"].tolist(), index=False
)

ValueError: Columns must be same length as key

In [4]:
test_df

Unnamed: 0.1,Unnamed: 0,company_id,round_1,answer,round_2,round_3,round_4,round_5,best_match
0,0,84,Advisory Care,621 Ambulatory Health Care Services,Hygiene Services | One-on-one Attention and Ca...,Advisory Care is a home health care services p...,Advisory Care Pty Ltd is a company that provid...,Home Health Care,"(ADVISORCARE, 92)"
1,1,115,PPK Mining Equipment,213 Support Activities for Mining,High & Low Seam Drills | Ventilation Solutions...,PPK Mining Equipment is the solution partner o...,PPK Mining Equipment is a company that special...,Mining & Gas Exploration,"(Mining Equipment, 95)"
2,2,50,First Tracks Lawn Care,561 Administrative and Support Services,Weekly Mowing | Driveway Plowing Services | Mu...,"First Tracks Lawn Care, LLC prides itself on p...","First Tracks Lawn Care, LLC is a lawn care com...",Landscaping & Lawn Services,"(TRA, 90)"
3,3,92,New Golden Furnishers,337 Furniture and Related Product Manufacturing,Staff Task Computer Chairs Supplier | Mesh Bac...,New Golden Furnishers Co - Manufacturer of Dou...,New Golden Furnishers Co is a furniture manufa...,Office Furniture,"(Furni, 90)"
4,4,88,Lilac City Septic,562 Waste Management and Remediation Services,Clean-Out Installation | Line Replacement Serv...,Lilac City Septic services residential & comme...,Lilac City Septic Services Co is a family-owne...,Garbage Collection & Waste Disposal,"(TIC, 90)"
...,...,...,...,...,...,...,...,...,...
101,101,28,Purple Pixl,811 Repair and Maintenance,Battery Replacement | Viruses Removal | Front ...,Purple Pixl offers fast & effective repair ser...,Purple Pixl is a technology and electronics re...,Phone Repair & Services,"(PUR, 90)"
102,102,116,Pasargad Jonub,237 Heavy and Civil Engineering Construction,Infrastructure Project Management | Design and...,Having a brilliant background and well-trained...,Pasargad Jonub is a private joint stock compan...,Civil Engineering Services,"(ASAR, 90)"
103,103,6,Davis Business Machines,"423 Merchant Wholesalers, Durable Goods",Managed IT Services | Document Scanners | Fax ...,"Since May 13, 1963, Davis Business Machines ha...",Davis Business Machines is a company that has ...,Office Supplies,"(ESS, 90)"
104,104,24,High Desert Tool,326 Plastics and Rubber Products Manufacturing,Aerospace Molds Supplier | Automotive Molds Di...,High Desert Tool specializes in large mold man...,High Desert Tool is a company that specializes...,Injection Molding Services,"(High Desert Tools, 97)"


In [6]:
test_df[["match_name", "match_score", "match_index"]] = pd.DataFrame(
    test_df["best_match"].tolist(), index=test_df.index
)

ValueError: Columns must be same length as key

In [7]:
test_df["best_match"].to_list()

[('ADVISORCARE', 92),
 ('Mining Equipment', 95),
 ('TRA', 90),
 ('Furni', 90),
 ('TIC', 90),
 ('TNT Industries', 93),
 ('ROC', 90),
 ('Bake Me Away', 92),
 ('The Law Firm Of Carlton F. Bennett', 86),
 ('Design', 90),
 ('AGI', 90),
 ('ABL', 90),
 ('BLI', 90),
 ('Reco', 90),
 ('Cosmetic Surgery', 95),
 ('Avi', 90),
 ('JAB training and consultancy', 90),
 ('IDS', 90),
 ('TIC', 90),
 ('Accurate', 90),
 ('Sti', 90),
 ('ICAL', 90),
 ('Tree Service', 90),
 ('HEÖ', 90),
 ('Pediatrics', 90),
 ('OTS', 90),
 ('JS INTERNATIONAL', 97),
 ('Roof', 90),
 ('Roof', 90),
 ('SIC', 90),
 ('Extreme Auto Body', 95),
 ('Inera', 90),
 ('ATS', 90),
 ('Studio D', 100),
 ('Springfield', 90),
 ('3 Brothers', 90),
 ('CYCLE', 90),
 ("Kim's", 90),
 ('TRA', 90),
 ('RTR', 90),
 ('CTS', 90),
 ('ECT', 90),
 ('NER', 90),
 ('Trifta', 91),
 ('PING', 90),
 ('Park-n-Store', 92),
 ('ISTI', 90),
 ('ARE', 90),
 ('BIG DIPPER', 100),
 ('Fanxi Textile', 90),
 ('IAM', 90),
 ('IGAN', 90),
 ('LAT', 90),
 ('Tax Service', 90),
 ('LASE',