In [13]:
!pip install jina


Collecting jina
  Downloading jina-3.28.0.tar.gz (368 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting websockets (from jina)
  Downloading websockets-14.1-cp312-cp312-win_amd64.whl.metadata (6.9 kB)
Collecting grpcio-health-checking<=1.68.0,>=1.46.0 (from jina)
  Downloading grpcio_health_checking-1.68.0-py3-none-any.whl.metadata (1.1 kB)
Collecting opentelemetry-instrumentation-fastapi>=0.33b0 (from jina)
  Downloading opentelemetry_instrumentation_fastapi-0.49b2-py3-none-any.whl.metadata (2.1 kB)
Collecting opentelemetry-exporter-otlp>=1.12.0 (from jina)
  Downloading opentelemetry_exporter_otlp-1.28.2-py3-none-any.whl.metadata (2.3 kB)
Collecting aiofiles (from jina)
  Downloading aiofiles-2

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
conda-repo-cli 1.0.114 requires urllib3>=2.2.2, but you have urllib3 1.26.20 which is incompatible.


In [17]:
from jina import Executor, requests
from transformers import AutoTokenizer, AutoModelForCausalLM

class ReportGenerationExecutor(Executor):
    def __init__(self, model_name="EleutherAI/gpt-neox-20b", **kwargs):
        super().__init__(**kwargs)
        # Initialize GPT-NeoX model and tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForCausalLM.from_pretrained(model_name)

    @requests
    def generate_report(self, docs, **kwargs):
        for doc in docs:
            grammar_score = doc.tags.get('grammar_score', 0)
            pronunciation_score = doc.tags.get('pronunciation_score', 0)
            fluency_score = doc.tags.get('fluency_score', 0)
            grammar_issues = doc.tags.get('grammar_issues', [])
            mispronounced_words = doc.tags.get('mispronounced_words', [])
            filler_words = doc.tags.get('filler_words', [])
            pauses = doc.tags.get('pauses', 0)

            # Construct the prompt for GPT-NeoX to generate a report
            prompt = f"""
            You are a language assessment AI. The user has been assessed based on their English speaking ability.
            Their scores are as follows:
            - Grammar: {grammar_score}
            - Pronunciation: {pronunciation_score}
            - Fluency: {fluency_score}

            Specific issues identified:
            - Grammar issues: {", ".join(grammar_issues) if grammar_issues else "None"}
            - Mispronounced words: {", ".join(mispronounced_words) if mispronounced_words else "None"}
            - Filler words: {", ".join(filler_words) if filler_words else "None"}
            - Long pauses: {pauses} detected.

            Generate a detailed, personalized report summarizing the user's performance in grammar, pronunciation, and fluency, along with actionable suggestions for improvement.
            """
            
            # Generate feedback using GPT-NeoX model
            inputs = self.tokenizer(prompt, return_tensors="pt").input_ids
            outputs = self.model.generate(inputs, max_length=512, temperature=0.7, top_p=0.9)

            # Decode and assign the generated report as feedback
            feedback = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            doc.text = feedback  # Set the generated feedback as the result
