## This notebook will focus on converting all russian based product names to english names using google translate

In [None]:
__author__ = "konwar.m"
__copyright__ = "Copyright 2022, AI R&D"
__credits__ = ["konwar.m"]
__license__ = "Individual Ownership"
__version__ = "1.0.1"
__maintainer__ = "konwar.m"
__email__ = "rickykonwar@gmail.com"
__status__ = "Development"

In [None]:
!pip install googletrans==4.0.0rc1

In [None]:
# Importing Libraries
import os
import tqdm
import pandas as pd
from googletrans import Translator

In [None]:
# os.chdir('..')
os.getcwd()

In [None]:
# Example on how to run google translate api
translator = Translator()
translation = translator.translate('안녕하세요.', dest='ja')
print(f"{translation.origin} ({translation.src}) --> {translation.text} ({translation.dest})")

In [None]:
# Reading item and categories files and shop names
item_category_data = pd.read_csv(r'../input/competitive-data-science-predict-future-sales/item_categories.csv')
item_data = pd.read_csv('../input/competitive-data-science-predict-future-sales/items.csv')
shop_data = pd.read_csv('../input/competitive-data-science-predict-future-sales/shops.csv')

In [None]:
# Forming Item Category Dictionary
item_category_dict = {}
item_dict = {}
shop_dict = {}

In [None]:
def translate_text(original_text, destination_lang='en'):
    try:
        translator = Translator()
        translation = translator.translate(original_text, dest=destination_lang)
        return translation.text
    except Exception as ex:
        print('Caught Exception while translating text: %s with exception as %s' %(original_text, ex))

In [None]:
def extract_translated_text(input_dict, original_text):
    if original_text in input_dict.keys():
        return input_dict[original_text]
    else:
        return original_text

In [None]:
# Convert item categories names
# Check if translated item categories file exists
if not os.path.exists(os.path.join('datasets','translated_item_categories.csv')):
    for category_name in tqdm.tqdm(list(item_category_data.item_category_name.unique()), desc='Translating Item Categories to English'):
        if category_name not in item_category_dict.keys():
            item_category_dict[category_name] = translate_text(category_name)
    item_category_data['translated_item_category_name'] = item_category_data['item_category_name'].apply(lambda x: extract_translated_text(input_dict=item_category_dict, original_text=x))
else:
    item_category_data = pd.read_csv(os.path.join('datasets','translated_item_categories.csv'))
item_category_data.head()

In [None]:
# Convert shop names
# Check if translated shop file exists
if not os.path.exists(os.path.join('datasets','translated_shops.csv')):
    for shop_name in tqdm.tqdm(list(shop_data.shop_name.unique()), desc='Translating Shop Names to English'):
        if shop_name not in shop_dict.keys():
            shop_dict[shop_name] = translate_text(shop_name)
    shop_data['translated_shop_name'] = shop_data['shop_name'].apply(lambda x: extract_translated_text(input_dict=shop_dict, original_text=x))
else:
    shop_data = pd.read_csv(os.path.join('datasets','translated_shops.csv'))
shop_data.head()

In [None]:
# Convert item names
# Check if translated item file exists
if not os.path.exists(os.path.join('datasets','translated_items.csv')):
    for item_name in tqdm.tqdm(list(item_data.item_name.unique()), desc='Translating Item Names to English'):
        if item_name not in item_dict.keys():
            item_dict[item_name] = translate_text(item_name)
    item_data['translated_item_name'] = item_data['item_name'].apply(lambda x: extract_translated_text(input_dict=item_dict, original_text=x))
else:
    item_data = pd.read_csv(os.path.join('datasets','translated_items.csv'))
item_data.head()

In [None]:
# Save Translated files
item_category_data.to_csv(os.path.join('datasets','translated_item_categories.csv'), index=False) if not os.path.exists(os.path.join('datasets','translated_item_categories.csv')) else \
    print('Translated Item Category File Exist')
item_data.to_csv(os.path.join('datasets','translated_items.csv'), index=False) if not os.path.exists(os.path.join('datasets','translated_items.csv')) else \
    print('Translated Item File Exist')
shop_data.to_csv(os.path.join('datasets','translated_shops.csv'), index=False) if not os.path.exists(os.path.join('datasets','translated_shops.csv')) else \
    print('Translated Shop File Exist')