## Lyrics Translation and Processing
This notebook translates the lyrics and processes them.

In [1]:
# Import necessary libraries 
import pandas as pd 
import numpy as np 
import requests
import os, requests, uuid, json

import detectlanguage

In [39]:
# Set the GOOGLE API Key 
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="infra-earth-356614-f55f16a35b86.json"

In [2]:
# Import the top songs data 
df = pd.read_excel('../data/top_decade_songs.xlsx')
df.shape

(70, 24)

We have top 10 songs by popularity for each decade in India. 

In [3]:
# check the top rows and the hindi text 
df.head(5)

Unnamed: 0,track_id,name,album,artist,release_date,length,popularity,key,acousticness,danceability,...,speechiness,tempo,valence,time_signature,mode,playlist_id,year,decade,hindi_lyrics,english_lyrics
0,59HjNZgoziKgAwGOhrKRPJ,Tere Mere Sapne Ab Ek Rang Hain,Guide,S. D. Burman,12/6/1965,0.000261,47,3,0.978,0.246,...,0.0369,174.8,0.552,3,1,37i9dQZF1DXa1eCiO3E6Rr,1965,1960,तेरे मेरे सपने\nअब्ब एक रंग हैं\nजहां भी ले जा...,Tere mere sapne\nAbb ek rang hain\nJaha bhee l...
1,1P278K5LuPJOatR1wBUywC,Aane Se Uske Aaye Bahar,Jeene Ki Raah,Laxmikant Pyarelal,1/1/1969,0.000248,51,6,0.886,0.37,...,0.036,95.448,0.668,4,0,3dKv6gpADy34FI6rcP7DAT,1969,1960,आने से उस के आये बहार\nजाने से उस के जाए बहार\...,Aane se us ke aaye bahar\nJaane se us ke jaaye...
2,7ukboFFuDuxKWRdxahmth7,Beqarar Karke Hamen Yun Na Jaiye,Bees Saal Baad,Hemant Kumar,1/1/1962,0.00019,51,10,0.971,0.653,...,0.0483,113.465,0.757,4,0,3dKv6gpADy34FI6rcP7DAT,1962,1960,बेक़रार करके हमें यूँ न जाइये\nआपको हमारी कसम ल...,Beqaraar karake hame yun na jaaiye\nAapako ham...
3,6xCnMMPkIIhn3QyGJgd5xd,Ae Mere Zohra Jabeen,Waqt,Ravi,7/28/1965,0.000235,52,6,0.984,0.441,...,0.0742,79.582,0.514,3,0,3dKv6gpADy34FI6rcP7DAT,1965,1960,ऐ मेरी जोहरा जबीं\nतुझे मालुम नहीं\nतू अभी तक ...,Ai meri zoharaa zabi\nTujhe maalum nahi\nTu ab...
4,1AlatlIkROgLvG6pgBBnAz,Roop Tera Mastana,Aradhana,S. D. Burman,9/27/1969,0.000225,55,8,0.666,0.449,...,0.0951,83.362,0.543,4,0,3dKv6gpADy34FI6rcP7DAT,1969,1960,रूप तेरा मस्ताना\nरूप तेरा मस्ताना\nभूल कोई हम...,"Rup teraa mastaanaa, pyaar meraa divaanaa\nRup..."


## Translation to English Language

The first step is to detect the language of the text to distinguish between hindi and punjabi. 

In [45]:
# Detect the language of the text
df['language'] = ''

def detect_language(text):  # sourcery skip: do-not-use-bare-except

    # Set the API key 
    detectlanguage.configuration.api_key = "b4901088a59b36e7dc7b853cc2c70247"

    try: 
        result = detectlanguage.detect(text)
    except: 
        print(f"Got an error for line: {text}")
        return None
    
    return result[0]['language']

# Apply the API on the whole text 
df['language'] = df['hindi_lyrics'].apply(detect_language)


In [46]:
# Drop null values 
df.dropna(inplace=True)

In [53]:
# Save the excel with language data 
df.to_excel('../data/top_decade_songs_language.xlsx', index=False)

#### Translation API 

In [46]:
# Read the data 
df = pd.read_excel('../data/top_decade_songs_language.xlsx')
df.shape

(69, 25)

In [47]:
# Function to get the translations using Bing API 
def get_lyric_translations_using_bing(text, input_language):

    subscription_key = 'c9a68a42d950428db2e84ef8d3431a24'
    region = 'eastus'
    endpoint = 'https://api.cognitive.microsofttranslator.com/'

    path = '/translate?api-version=3.0'
    params = f'&from={input_language}&to=en'
    constructed_url = endpoint + path + params

    headers = {
        'Ocp-Apim-Subscription-Key': subscription_key,
        'Ocp-Apim-Subscription-Region': region,
        'Content-type': 'application/json',
        'X-ClientTraceId': str(uuid.uuid4())
    }

    # You can pass more than one object in body.
    body = [{
        'text' : text,
    },
    ]
    request = requests.post(constructed_url, headers=headers, json=body)
    response = request.json()

    result = json.dumps(response, sort_keys=True, indent=4, ensure_ascii=False, separators=(',', ': '))

    # Handle errors 
    if 'error' in result: 
        print("Encountered errors")
        return None
    else:     
        # Convert string dictionary to dictionary 
        result = json.loads(result)
        return result[0]['translations'][0]['text']

In [48]:
# Function to get the translations for the lyric sentence 
def get_lyric_translations(text, input_language):

    # Break the text into sentences 
    sentences = text.split('\n')

    # Iterate through the sentences
    translations = []
    for sentence in sentences:
    
        # Get the translation 
        translation = get_lyric_translations_using_bing(text=sentence, input_language=input_language)

        # Append the translation to the list 
        translations.append(translation)
    
    # Join the translations into a single string
    return '\n'.join(translations)

In [51]:
df['english_translated_lyrics'] = ''
# Iterate through the dataframe rows and add the translations 
for index, row in df.iterrows():
    print(f"Translating song {index}")
    try: 
        df.loc[index, 'english_translated_lyrics'] = get_lyric_translations(text=row['hindi_lyrics'], input_language=row['language'])
    except:
        print(f"Encountered errors for song {index}")
        df.loc[index, 'english_translated_lyrics'] = 'No Translations Found'
        continue

Translating song 0
Translating song 1
Translating song 1
Translating song 1
Translating song 2
Translating song 1
Translating song 3
Translating song 1
Translating song 4
Translating song 1
Translating song 5
Translating song 1
Translating song 6
Translating song 1
Translating song 7
Translating song 1
Translating song 8
Translating song 1
Translating song 9
Translating song 1
Translating song 10
Translating song 1
Translating song 11
Translating song 1
Translating song 12
Translating song 1
Translating song 13
Translating song 1
Translating song 14
Translating song 1
Translating song 15
Translating song 1
Translating song 16
Translating song 1
Translating song 17
Translating song 1
Translating song 18
Translating song 1
Translating song 19
Translating song 1
Translating song 20
Translating song 1
Translating song 21
Translating song 1
Translating song 22
Translating song 1
Translating song 23
Translating song 1
Translating song 24
Translating song 1
Translating song 25
Translating son

In [53]:
df.to_excel('../data/top_decade_songs_translated.xlsx', index=False)