In [None]:
import pandas as pd

#LOADING SAMPLE
textSample = pd.read_csv('textSample.csv') 

In [None]:
#Translation Model
# Install googletrans module
# Note: The original googletrans package has compatibility issues, so we'll install a specific version
#!pip install googletrans==4.0.0-rc1

from googletrans import Translator
import asyncio
import concurrent.futures

# --- Async Translation Function ---

# We need a synchronous helper function that runs within the executor
def sync_translate_helper(text, dest_language, src_language):
    """Synchronous function to perform the translation."""
    try:
        translator = Translator()
        # This call is blocking and will be executed in a separate thread
        translated = translator.translate(text, src=src_language, dest=dest_language)
        return translated.text
    except Exception as e:
        # Return the error message so the async function can handle it
        return f"An error occurred: {e}"

In [None]:
async def translate_text_async(text, dest_language='en', src_language='auto'):
    """
    Translates text asynchronously from a source language to a destination language 
    by running the blocking googletrans call in a separate thread.
    """
    loop = asyncio.get_running_loop()
    # Use the default process/thread pool executor to run the sync function
    result = await loop.run_in_executor(
        None, # Use the default ThreadPoolExecutor
        sync_translate_helper,
        text, dest_language, src_language
    )
    
    if "An error occurred" in result:
        print(f"Error for text '{text[:30]}...': {result}")
    return result

In [None]:
#--- Helper Function to Prepare Tasks ---

def translateToEngAsync(question_contents, lang):
    """
    Creates a list of awaitable translation tasks.
    """
    tasks = []
    target_language = 'en'
    source_language = lang
    
    for text in question_contents:
        # Append the awaitable task object
        task = translate_text_async(text, target_language, source_language)
        tasks.append(task)

    # This function returns a list of tasks, not the results yet.
    return tasks

In [None]:
# --- Execution in Jupyter Notebook ---

# 1. Ensure you have your DataFrame 'df' loaded first.
# Example DataFrame setup if you don't have one loaded:
# data = {'question_language': ['nyn', 'nyn', 'es'], 'question_content': ['Hva heter du?', 'Korleis g√•r det?', 'Hola mundo']}
# df = pd.DataFrame(data)

# 2. Extract content
langContent = textSample['question_content'] #Have to make random if you want to limit the number of samples

# 3. Get the list of async tasks
translation_tasks = translateToEngAsync(langContent, 'auto')

# 4. Use asyncio.gather to run all tasks concurrently and wait for all results.
# In Jupyter, you can use the built-in 'await' functionality at the cell level.
try:
    # 'await' can be used directly in a notebook cell
    translated_results = await asyncio.gather(*translation_tasks)
    
    # 5. Process results into a DataFrame and save
    translatedSample = pd.DataFrame(translated_results, columns=['question_language'])
    print("Translations complete. Head of DataFrame:")
    print(translatedSample.head())
    translatedSample.to_csv("translatedSample", index=False)

except Exception as e:
    print(f"An error occurred during asyncio gathering: {e}")

In [None]:
engText = textSample[textSample['question_language'] == 'eng']['question_content']

In [None]:
engFullText = " ".join(engText)

In [None]:
with open("engFullText.txt", 'w', encoding='utf-8') as file:
    file.write(engFullText)