In [3]:
!pip install PyPDF2 # extract/parse text from pdf
!pip install python-docx # extract/parse text from docx
!pip install streamlit
!pip install gTTS
#!pip install langchain_community langchain openai
!pip install pyngrok
!pip install langchain==0.0.302
!pip install openai==0.27.0

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/232.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━[0m [32m174.1/232.6 kB[0m [31m5.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1
Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Downloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx
Successfully installed python-docx-1.1.2
Collecting streamlit
  Downloading streamlit-1.40.2-py2.py3-no



In [4]:
%%writefile TextExtractor.py
import PyPDF2
import docx
import pandas as pd

# read data from PDF
def from_pdf(pdf_path):
  reader = PyPDF2.PdfReader(pdf_path)
  text = ''
  for page in reader.pages:
    text += page.extract_text()
  return text
# read data from word document
def from_docx(docx_path):
  doc = docx.Document(docx_path)
  text = ''
  for para in doc.paragraphs:
    text += para.text
  return text
# read data from text file
def from_txt(txt_path):
  with open(txt_path, 'r') as f:
    text = f.read()
  return text

def from_excel(excel_path):
  df = pd.read_excel(excel_path)
  text = df['text'].values()
  return text

def from_csv(csv_path):
  df = pd.read_csv(csv_path)
  text = df['text'].values()
  return text

Writing TextExtractor.py


In [41]:
%%writefile CustomError.py
class CustomError(Exception):
    def __init__(self, message):
        self.message = message
        super().__init__(self.message)

Overwriting CustomError.py


In [None]:
%%writefile LLMExecuter.py
from langchain.chat_models import ChatOpenAI # openai
from langchain.schema import SystemMessage, HumanMessage
from openai.error import RateLimitError
import os
from CustomError import CustomError

llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.4, max_tokens=100)

def execute(lang1, lang2, text):
    try:
        prompt = f"Translate the given text in {lang1} to {lang2}"
        text_content = text
        sys_prompt = [SystemMessage(content=prompt), HumanMessage(content=text_content)]
        output = llm(sys_prompt).content
        if output:
            return output
    except RateLimitError:
        print("Rate limit exceeded. Please try again later.")
        raise CustomError("Licience expired")


Overwriting LLMExecuter.py


In [20]:
%%writefile AudioConvertor.py

from gtts import gTTS
import uuid

def textToSpeech(text, language,slow):
  tts = gTTS(text=text, lang=language, slow=slow)
  audio_file = str(uuid.uuid4())+".mp3"
  tts.save(audio_file)
  return audio_file



Overwriting AudioConvertor.py


In [49]:
%%writefile app.py

import os
import IPython.display as ipd
import TextExtractor
import AudioConvertor
import LLMExecuter
from CustomError import CustomError
import streamlit as st

language_dict = {
    "English": "en",
    "Spanish": "es",
    "French": "fr",
    "German": "de",
    "Italian": "it"
}

def alertMessageHtml(msg):
  return f"""
    <div style="
        color: red;
        font-family: 'Courier New', monospace;
        font-size: 20px;
        font-weight: bold;
    ">
       {msg}
    </div>
    """

st.title("Translation Application")

col1, col2 = st.columns([2, 2])  # Adjust column width proportions if needed
with col1:
    selected_language = st.selectbox("Select language", list(language_dict.keys()))

with col2:
    slow_speech = st.checkbox("Slow speech")

text = st.text_input("Text: ")
uploaded_file  = st.file_uploader("Upload a file and supporting format:pdf,docx,txt,csv,xlsx:")

if uploaded_file is not None:
  doc_name = uploaded_file.name
  if doc_name.endswith(".pdf"):
    text = TextExtractor.from_pdf(uploaded_file)
  elif doc_name.endswith(".docx"):
    text = TextExtractor.from_docx(uploaded_file)
  elif doc_name.endswith(".txt"):
    text = TextExtractor.from_txt(uploaded_file)
  elif doc_name.endswith(".csv"):
    text = TextExtractor.from_csv(uploaded_file)
  elif doc_name.endswith(".xlsx"):
    text = TextExtractor.from_excel(uploaded_file)
  else:
    st.markdown(alertMessageHtml("file is not supported"), unsafe_allow_html=True)


# Display the corresponding value (language code)
st.write(f"You selected: {selected_language} ({language_dict[selected_language]})")

try:
  if text:
      st.write("Translation:")
      tran_text = LLMExecuter.execute(text,"English",selected_language)
      st.write(tran_text)
      audio_file = AudioConvertor.textToSpeech(tran_text,language_dict[selected_language],slow_speech)
      # Display audio player in Streamlit
      st.audio(audio_file, format="audio/mp3")
      # Play the audio
      st.write("Playing your text as speech...")
      ipd.display(ipd.Audio(audio_file))
  else:
    st.markdown(alertMessageHtml('provide the Data for Translation'), unsafe_allow_html=True)
except CustomError as e:
    st.markdown(alertMessageHtml(e.message+", please try later"), unsafe_allow_html=True)


Overwriting app.py


In [39]:
from pyngrok import ngrok


ngrok.set_auth_token("2q1UqGyBpAH8x3khQmHJrqWesqk_NZS8dWXbnSkR8XvYeiXS")
# Start ngrok tunnel for Streamlit
public_url = ngrok.connect(8501, "http")
print("Streamlit URL:", public_url)

# Run Streamlit app
!streamlit run app.py &>/dev/null &




PyngrokNgrokHTTPError: ngrok client exception, API returned 502: {"error_code":103,"status_code":502,"msg":"failed to start tunnel","details":{"err":"failed to start tunnel: Your account may not run more than 3 tunnels over a single ngrok agent session.\nThe tunnels already running on this session are:\ntn_2q1zvxOVhUvv8hhyokTnMyHBj10, tn_2q23I2PbgUqYknGiDQCy2STXaMg, tn_2q23XrKkAGydKI4hEFoS526HkC2\n\r\n\r\nERR_NGROK_324\r\n"}}
