# Convert a given Google shopping feed to QSC
**This project is still under development. Therefore some features might be missing.**</br>
## First we need to define some helper functions:

In [1]:
import csv
import json


class GShoppingUtils:
    def __init__(self, taxonomy_file: str, key_mapping_file: str):
        self.path_to_id = self.full_path_to_category_id(taxonomy_file)
        self.id_to_path = self.category_id_to_full_path(taxonomy_file)
        self.qsc_key_mapping = self.get_google_to_qsc_mapping(key_mapping_file)

    @staticmethod
    def validate_date(date_string: str) -> bool:
        """
        Validate date string
        :param date_string: datetime string to validate
        :return: true if valid, false if not valid
        """
        # TODO implement validate_date.
        return False

    @classmethod
    def full_path_to_category_id(cls, filename: str) -> dict:
        """
        Converts Google category full paths to their id
        :param filename: Google Product Taxonomy txt file
        :return: Dictionary mapping full paths to their id
        """

        # Swap key and value
        return {v: k for k, v in cls.category_id_to_full_path(filename=filename).items()}

    @staticmethod
    def category_id_to_full_path(filename: str) -> dict:
        """
        Converts Google category ids to their full path
        :param filename: Google Product Taxonomy txt file
        :return: Dictionary mapping ids to their full path
        """

        # Resulting dict mapping ids to their full path
        ids_to_full_path = {}

        # Open file containing Google categories
        with open(file=filename, mode='r', encoding='utf_8') as infile:
            # Skip first line
            infile.readline()

            # Read file line by line
            for line in infile:
                # Remove newline
                line = line.replace('\n', '')

                # Split line into id and path
                tokens = line.split(' - ')

                # Append to result
                ids_to_full_path.update({tokens[0]: tokens[1]})

        return ids_to_full_path

    @staticmethod
    def get_google_to_qsc_mapping(filename: str) -> dict:
        """
        Get mapping from Google product keys to qsc keys.
        The following qscKeys are a valid option:
        attribute: Google key should be used as an attribute
        category: Google key should be used as a category
        property: Google key should be used a normal property
        :param filename: csv file containing the mapping in the following format: dataType;googleKey;qscKey
        :return: dict mapping Google keys to qsc keys
        """
        mapping = {}

        with open(file=filename, mode='r') as infile:
            csv_reader = csv.reader(infile, delimiter=';')

            line_count = 0
            for line in csv_reader:
                if line[2] != '' and line_count != 0:
                    mapping[line[1]] = line[2]
                line_count += 1

        return mapping

    @staticmethod
    def is_float(element: str) -> bool:
        if element is None or type(element) is not str:
            return False
        try:
            float(element)
            return True
        except ValueError:
            return False

    def build_qsc_category(self, google_category: str) -> json:
        """
        Converts a given Google category id OR Google category path to the QSC format
        :param google_category: ID or full path of the category to convert
        :return: Category in QSC format as json or None
        """

        # Determine if we are using the category id or category path
        # Using path
        if not google_category.isnumeric():
            # Check if path is valid
            if google_category not in self.path_to_id:
                print("Path not found")
                return None

            # Set working path
            google_path = google_category

        # Using id
        elif google_category.isnumeric():
            # Check if id is valid
            if google_category not in self.id_to_path:
                print("ID not found")
                return None

            # Get working path over id
            google_path = self.id_to_path.get(google_category)

        # Input invalid
        else:
            print("Invalid input")
            return None

        # Build QSC category
        qsc_category = {'category': []}

        # Split path into categories
        tokens = google_path.split(" > ")

        tmp_path = ""
        depth = 0

        # Iterate through each sub path
        for token in tokens:
            if not tmp_path:
                tmp_path = token
            else:
                tmp_path = tmp_path + " > " + token

            # Append sub path to result
            qsc_category.get('category').append(
                {'id': self.path_to_id.get(tmp_path), 'name': tmp_path.split(" > ")[-1], 'level': depth})

            depth += 1

        return qsc_category

    def get_qsc_categories(self, g_product: json) -> json:
        """
        Generate QSC categories from Google product
        :param g_product: Google product to get categories from
        :return: categories in QSC format or None
        """

        # List containing categories
        categories = []

        for key in g_product:
            qsc_key = self.qsc_key_mapping.get(key)
            value = g_product.get(key)
            
            if qsc_key == "category" and value is not None and key == "googleProductCategory":
                categories.append(self.build_qsc_category(g_product.get(key)))

        if categories:
            return {"categories": categories}
        else:
            return None

    def get_qsc_attributes(self, g_product: json) -> json:
        """
        Generate QSC attributes from Google product
        :param g_product: Google product to get attributes from
        :return: attributes in QSC format or None
        """

        # List containing attributes
        attributes = []

        for key in g_product:
            qsc_key = self.qsc_key_mapping.get(key)
            value = g_product.get(key)
            if qsc_key == "attribute" and value is not None:
                if type(value) == list:
                    values = value
                elif type(value) == dict:
                    # TODO treat nested objects better. Currently they are just passed as a raw json string.
                    values = [value]
                else:
                    values = [value]

                # Make sure list is not empty
                if not values:
                    continue

                # Get datatype
                data_type = ""
                if type(values[0]) == int:
                    data_type = "long"
                elif self.is_float(values[0]):
                    data_type = "double"
                elif type(values[0]) == bool:
                    data_type = "boolean"
                elif self.validate_date(values[0]):
                    data_type = "date"
                elif type(values[0]) == str:
                    data_type = "string"

                if data_type:
                    tmp = {"name": key, "values": values, "datatype": data_type}
                else:
                    tmp = {"name": key, "values": values}

                attributes.append(tmp)

        if attributes:
            return {"attributes": attributes}
        else:
            return None

    def get_qsc_product(self, g_product: json) -> json:
        """
        Convert Google product to QSC product
        :param g_product: Google product
        :return: Google product in QSC format
        """

        product = {}

        for key in g_product:
            qsc_key = self.qsc_key_mapping.get(key)
            value = g_product.get(key)
            if qsc_key == "property" and value is not None:
                product.update({key: value})

        if not product:
            return None

        categories = self.get_qsc_categories(g_product)
        attributes = self.get_qsc_attributes(g_product)

        if categories is not None:
            product.update(categories)

        if attributes is not None:
            product.update(attributes)

        # Add header
        product.update({"header": {"id": g_product.get("id"), "action": "update"}})

        return product


## Use Google shopping utils
To use the utils class we need to get the most recent Google taxonomy file.</br>
After that we can create an instance of GShoppingUtils, passing the key mapping and the taxonomy file.</br>
You can change the key mapping by editing the 'google_to_qsc_mapping.csv' file.</br>
Valid qscKeys are:
- **attribute:** Google key should be used as an attribute
- **category:** Google key should be used as a category
- **property:** Google key should be used a normal property

In [2]:
import requests
import os

# Create resource folder if not existent
if not os.path.exists('resources'):
    os.makedirs('resources')

# Get the most recent Google taxonomy file
with open('resources/taxonomy-with-ids.de-DE.txt', 'wb') as outfile:
    url = 'https://www.google.com/basepages/producttype/taxonomy-with-ids.de-DE.txt'
    r = requests.get(url, allow_redirects=True)
    outfile.write(r.content)
    
# Get key mapping file from git repository
with open('resources/google_to_qsc_mapping.csv', 'wb') as outfile:
    url = 'https://raw.githubusercontent.com/quasiris/qsc-quickstart/main/resources/google_to_qsc_mapping.csv'
    r = requests.get(url, allow_redirects=True)
    outfile.write(r.content)
    
# Get dummy product from git repository
with open('resources/google_dummy_product.json', 'wb') as outfile:
    url = 'https://raw.githubusercontent.com/quasiris/qsc-quickstart/main/resources/google_dummy_product.json'
    r = requests.get(url, allow_redirects=True)
    outfile.write(r.content)
    
# Create instance of GShoppingUtils
gutils = GShoppingUtils('resources/taxonomy-with-ids.de-DE.txt', 'resources/google_to_qsc_mapping.csv')

### Convert a given Google shopping feed product to QSC format

In [3]:
# Open dummy product
with open('resources/google_dummy_product.json') as infile:
    # Read json
    prod = json.load(infile)

    # Convert to QSC format
    product = gutils.get_qsc_product(prod)
    
    # Print converted product
    print(json.dumps(product, ensure_ascii=False, indent=4))

{
    "id": "A2B4",
    "title": "Some product",
    "description": "Some product description",
    "link": "http://www.example.com/asp/sp.asp?cat=12&id=1030",
    "imageLink": "http://www.example.com/image1.jpg",
    "additionalImageLinks": [
        "http://www.example.com/image1.jpg",
        "http://www.example.com/image2.jpg",
        "http://www.example.com/image3.jpg"
    ],
    "brand": "Some brand",
    "gtin": "3234567890126",
    "mpn": "GO12345OOGLE",
    "price": {
        "value": "300",
        "currency": "EUR"
    },
    "categories": [
        {
            "category": [
                {
                    "id": "222",
                    "name": "Elektronik",
                    "level": 0
                },
                {
                    "id": "345",
                    "name": "Drucken, Kopieren, Scannen & Faxen",
                    "level": 1
                },
                {
                    "id": "6865",
                    "name": "3D-Drucker",
