In [None]:
import json
import fitz  # PyMuPDF for PDF text extraction
from flask import Flask, request, jsonify, send_file
from transformers import MBart50Tokenizer, MBartForConditionalGeneration
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas

class MultiLanguageTranslator:
    def __init__(self, model_name="facebook/mbart-large-50-many-to-many-mmt"):
        self.tokenizer = MBart50Tokenizer.from_pretrained(model_name, force_download=True)
        self.model = MBartForConditionalGeneration.from_pretrained(model_name, force_download=True)
        # Define supported languages
        self.languages = {
            "English": "en_XX",
            "Tamil": "ta_IN",
            "Hindi": "hi_IN",
            "Telugu": "te_IN",
            "Malayalam": "ml_IN"
        }

    def translate(self, input_text, input_language_name="English", output_language_name="Hindi"):
        # Validate languages
        if input_language_name not in self.languages or output_language_name not in self.languages:
            return {"error": "Unsupported language. Supported languages: " + ", ".join(self.languages.keys())}

        try:
            # Set source language
            self.tokenizer.src_lang = self.languages[input_language_name]

            # Tokenize input text
            inputs = self.tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)

            # Generate translation
            outputs = self.model.generate(
                **inputs,
                max_length=512,
                num_beams=5,
                early_stopping=True,
                forced_bos_token_id=self.tokenizer.lang_code_to_id[self.languages[output_language_name]]
            )

            # Decode translation
            translated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

            return {
                "original": input_text,
                "input_language": input_language_name,
                "translated_text": translated_text,
                "output_language": output_language_name
            }

        except Exception as e:
            return {"error": str(e)}

# Instantiate the translator
translator = MultiLanguageTranslator()


## 1. Created Multilanguage Json Response from English Request !!

## 2. Creating Pretained pickle file

In [None]:
import pickle

# Create an instance of the translator
translator = MultiLanguageTranslator()

# Save the instance to a pickle file
pickle_file_name = "Andrometocs_English_4Indian_Languages_V1.pickle"
with open(pickle_file_name, "wb") as f:
    pickle.dump(translator, f)

print(f"Multi-language translator saved as {pickle_file_name}")

## 3. Pretrained Model to unpack pickle to get the responses

In [None]:
# Load the pickle file
with open("Andrometocs_English_4Indian_Languages_V1.pickle", "rb") as f:
    translator = pickle.load(f)

 # Example of a long sentence
# long_sentence = "This is a very long sentence that exceeds the maximum allowed length for translation and should return an appropriate error or truncated response."
# response_json = translator.translate(long_sentence)
# print(response_json)

## 4. Until say end, Continue as Chating for Multi-Language Responses 

In [None]:
import json
translator = MultiLanguageTranslator()
# Start the chat loop
print("Multi-Language Chat: Translate English to Tamil, Hindi, Telugu, Malayalam !!")
print("*********************************")
print("Type 'end' to exit the chat.")

while True:
    # Get input from the user
    user_input = input("You: ")

    # Break the loop if the user types 'end'
    if user_input.lower() == "end":
        print("Chat ended.")
        break

    # Get translations
    translations = translator.translate(user_input)
    print("Json Response ")
    print("**********************")
    # Display the translationsprint(response_json)
    print(translations)

    print("\n")
    translations = json.loads(translations)
    formatted_response = f"Original:\n{translations['original']}\n\n"
    for language, translation in translations['translations'].items():
        formatted_response = f"{language}:\n{translation}\n\n"

        # Print the formatted response
        print(formatted_response)

In [4]:
!pip install pyngrok
!pip install cors
!pip install flask-cors
!ngrok config add-authtoken 2tNgVMW1Arw4PDhwnFPcfdXa0l9_2f2wNsoCAuZmC5DPaVEpC
import os
import json
import pickle
from flask import Flask, request, jsonify
from transformers import MBart50Tokenizer, MBartForConditionalGeneration
from pyngrok import ngrok
from flask_cors import CORS

class MultiLanguageTranslator:
    def __init__(self, model_name="facebook/mbart-large-50-many-to-many-mmt"):
        self.tokenizer = MBart50Tokenizer.from_pretrained(model_name, force_download=True)
        self.model = MBartForConditionalGeneration.from_pretrained(model_name, force_download=True)
        # Define supported languages
        self.languages = {
            "English": "en_XX",
            "Tamil": "ta_IN",
            "Hindi": "hi_IN",
            "Telugu": "te_IN",
            "Malayalam": "ml_IN"
        }

    def translate_text_chunk(self, chunk, input_lang, output_lang):
        self.tokenizer.src_lang = self.languages[input_lang]
        inputs = self.tokenizer(chunk, return_tensors="pt", max_length=512, truncation=True)
        outputs = self.model.generate(
            **inputs,
            max_length=256,
            num_beams=5,
            early_stopping=True,
            forced_bos_token_id=self.tokenizer.lang_code_to_id[self.languages[output_lang]]
        )
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

    def translate(self, text, input_lang, output_lang):
        # Break text into chunks if too long
        chunks = [text[i:i+512] for i in range(0, len(text), 512)]
        translated_chunks = []
        for chunk in chunks:
            translated = self.translate_text_chunk(chunk, input_lang, output_lang)
            translated_chunks.append(translated)
        return ' '.join(translated_chunks)

#
app = Flask(__name__)
CORS(app, resources={r"/*": {"origins": "*"}})

@app.route('/translate_text', methods=['POST'])
def translate_text():
    data = request.get_json()
    if not data or "text" not in data:
        return jsonify({"error": "No text provided"}), 400

    input_text = data["text"]
    input_lang = data.get("input_language", "").title()
    output_lang = data.get("output_language", "").title()

    if not input_lang or not output_lang:
        return jsonify({"error": "Missing language parameters"}), 400

    if input_lang not in translator.languages or output_lang not in translator.languages:
        return jsonify({"error": "Unsupported language"}), 400

    try:
        translated_text = translator.translate(input_text, input_lang, output_lang)
        return jsonify({"translated_text": translated_text})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

if __name__ == '__main__':
    public_url = ngrok.connect(5000).public_url
    print("Public URL:", public_url)
    app.run(host='0.0.0.0', port=5000)


Collecting argparse (from cors)
  Using cached argparse-1.4.0-py2.py3-none-any.whl.metadata (2.8 kB)
Using cached argparse-1.4.0-py2.py3-none-any.whl (23 kB)
Installing collected packages: argparse
Successfully installed argparse-1.4.0
Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
Public URL: https://6fa5-34-80-125-207.ngrok-free.app
 * Serving Flask app '__main__'
 * Debug mode: off


In [None]:
!pip uninstall fitz
!pip install PyMuPDF
