# Woocommerce Category Converter
This script is used to convert the categories in a Woocommerce product export csv file from an external websites categories to match our categories. 

I use pandas to read in the dataset and have created a dictionary to match the categories

In [None]:
# import relevant packages
import pandas as pd
import re

#import category mapping dictionary from external file
import categories

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
# import datasets
kkmp = pd.read_csv("wc-product-export-15-2-2021-1613399354650.csv")
ess = pd.read_csv("wc-product-export-26-1-2021-1611660714205.csv")

In [None]:
# look at kkmp columns
kkmp.columns

In [None]:
# look at external site's columns
ess.columns

In [None]:
# look at kkmp unique categories
kkmp.Categories.unique()

In [None]:
# look at kkmp unique categories
ess.Categories.unique()

In [None]:
# convert unique categories list to list of only individual categories
# this function returns the categories which are not yet in the mapping dictionary
# this provides an easy way to verify if the mapping dictionary is created correctly
def category_checker(dataset, dictionary):
    unique_categories = dataset.Categories.unique()
    for i in range(len(unique_categories)):
        unique_categories[i] = re.split(r'(?<!\\),', unique_categories[i])
        for j in range(len(unique_categories[i])): 
            unique_categories[i][j] = unique_categories[i][j].strip()

    #ess_unique_categories

    flat_list = [item for sublist in ess_unique_categories for item in sublist]
    myset = set(flat_list)
    individual_categories = list(myset)
    individual_categories
    unmapped = set(individual_categories) - set(dictionary.keys())
    return unmapped

In [None]:
category_checker(ess, categories.ess_to_kkmp)

In [None]:
# this function reads in the mapping dictionary and sorts it based on length of keys
# this ensures that the regex find and replace works correctly and doesnt find and replace short strings before long
def sort_categories(dictionary):
    sorted_categories = {}
    for k in sorted(dictionary, key=len, reverse=True):
        sorted_categories[k] = dictionary[k]
    return sorted_categories

In [None]:
# replaces values in the categories with the mapping dictionary values
# NOTE: This replaces in place
def replace_categories(sorted_dict, dataset):
    dataset.Categories.replace(sorted_dict, regex=True, inplace=True)
    
replace_categories(sort_categories(categories.ess_to_kkmp), ess)

In [None]:
# check categories after find and replace
ess.Categories.unique()

In [None]:
# export to csv
ess.to_csv("converted_csv.csv")

In [102]:
# create sample file to test import
ess.head(10).to_csv("converted_test_csv.csv")