In [None]:
from googletrans import Translator
from google.transliteration import transliterate_word
import json
import re
import os, time

In [None]:
filename = 'artists-corpus/sinhala_artists_data_1.json'

In [None]:
with open (filename,'w') as f:
            f.write(json.dumps([]))  

In [None]:
def translate(text):
    if text:
        ar = translator.translate(text, dest='si')
        return ar.text
    else:
        return ""

In [None]:
def transliterate(text):
    ar = transliterate_word(text, lang_code='si')
    return ar[0]

In [None]:
def translate_filmography(filmography):
    print ("started translating filmography...")
    response = []
    for film in filmography:
        film_si = {}
        film_si['film_name_si'] =  transliterate(film['film_name_en']) 
        film_si['role_name_si'] =  translate(film['role_name_en'])
        
        print(film_si)
        
        response.append(film_si)

        
    print ("translating filmography completed!")
    return response

In [None]:
def translate_biography(bio):
    print ("started translating bio...")
    bio = parse_biography(bio)
    translation = ""
    for line in bio:
        if len(line) > 2:
            translation += translate(line)
        
    print ("translating bio completed!")
    return translation     

In [None]:
def parse_biography(bio):
    return bio.strip().split("\r\n\t")

In [None]:
def translate_awards(awards):
    print ("started translating awards...")
    response = []
    
    for award in awards:
        award_si = {}
        award_si['award_name_si'] =  ""
        award_si['award_ceremony_name_si'] =  ""
        award_si['film_name_si'] =  ""
        
        if (award['award_name_en']): 
            award_si['award_name_si'] =  translate(award['award_name_en']) 
            
        if (award['award_ceremony_name_en']): 
            award_si['award_ceremony_name_si'] =  translate(award['award_ceremony_name_en'])
            
        if (award['film_name_en']): 
            award_si['film_name_si'] =  transliterate(award['film_name_en'])
        
        print(award_si)
        
        response.append(award_si)

        
    print ("translating awards completed!")
    return response
    

In [None]:
def translate_artist_json(artist_json, start, end):
    
    print("translating from " + str(start) + " to " + str(end-1) + " ... ", )
    
    for idx in range(start, end):
        artist = artist_json[idx]
        
        print("started translating: " + artist['known_as_en'] + " | index: " + str(idx) )
        
        artist_data = {}
        
        known_as_si = transliterate(artist['known_as_en'])
        
        if artist['real_name_en'] != '':
            real_name_si = transliterate(artist['real_name_en'])
        else:    
            real_name_si = known_as_si
            
        birth_si = translate(artist['birth_en'])
        death_si = translate(artist['death_en'])
        biography_si = translate_biography(artist['biography_en'])
        national_awards_si = translate_awards(artist['national_awards_en'])
        filmography_si = translate_filmography(artist['filmography_en'])
       
        # final json format
        artist_data["known_as_en"] = artist['known_as_en']
        
        if (artist['known_as_si']==""):
            artist_data["known_as_si"] = known_as_si
        else:    
            artist_data["known_as_si"] = artist['known_as_si']
            
        if (artist['real_name_en'] == ''):
            artist_data["real_name_en"] = artist['known_as_en'] 
        else:    
            artist_data["real_name_en"] = artist['real_name_en']    
        

        if (artist['real_name_en'] == artist['known_as_en']): 
            artist_data["real_name_si"] = artist['known_as_si']
        else:
            artist_data["real_name_si"] = real_name_si
        
        artist_data["birth_en"] = artist['birth_en']
        artist_data["birth_si"] = birth_si
        
        artist_data["death_en"] = artist['death_en']
        artist_data["death_si"] = death_si
        
        artist_data["biography_en"] = artist['biography_en']
        artist_data["biography_si"] = biography_si
        
        artist_data["national_awards_en"] = artist['national_awards_en']
        artist_data["national_awards_si"] = national_awards_si
        
        artist_data["filmography_en"] = artist['filmography_en']
        artist_data["filmography_si"] = filmography_si


        with open (filename,'r') as f:
            data = json.load(f)
        
        data.append(artist_data)
        
        with open(filename, "w") as f:
            json.dump(data, f)
            
        print("completed translating : " + artist_data['known_as_si'] )    


In [None]:
translator = Translator()
content = open("artists-corpus/artists_data.json", 'r').read()
artist_json = json.loads(content)

print("translating " + str(len(artist_json)) + " records... ", )


In [None]:
translate_artist_json(artist_json, 0, 5)

In [None]:
print("translating completed!!! ")

In [None]:
translate_artist_json(artist_json, 5, 50)

In [None]:
print("translating completed!!! ")

In [None]:
translate_artist_json(artist_json, 50, 60)

In [None]:
print("translating completed!!! ")

In [None]:
translate_artist_json(artist_json, 61, 70)

In [None]:
print("translating completed!!! ")

In [None]:
translate_artist_json(artist_json, 70, 100)

In [None]:
print("translating completed!!! ")

In [None]:
translate_artist_json(artist_json, 100, 150)

In [None]:
print("translating completed!!! ")

In [None]:
translate_artist_json(artist_json, 150, 250)

In [None]:
print("translating completed!!! ")

In [None]:
translate_artist_json(artist_json, 250, 350)

In [None]:
print("translating completed!!! ")

In [None]:
translate_artist_json(artist_json, 350, 366)

In [None]:
print("translating completed!!! ")