# **1. Setting Up the Environment**

In [2]:
!pip install openai langchain langchain-openai

Collecting langchain-openai
  Downloading langchain_openai-0.3.9-py3-none-any.whl.metadata (2.3 kB)
Collecting tiktoken<1,>=0.7 (from langchain-openai)
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading langchain_openai-0.3.9-py3-none-any.whl (60 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.9/60.9 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m21.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tiktoken, langchain-openai
Successfully installed langchain-openai-0.3.9 tiktoken-0.9.0


In [3]:
# Key dependencies
import openai
from langchain_openai import OpenAI
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
import sqlite3
import json
import os

# **2. Database Design**

In [4]:
def setup_database():
    conn = sqlite3.connect('language_learning.db')
    cursor = conn.cursor()

    # User profiles table
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS users (
        user_id INTEGER PRIMARY KEY,
        native_language TEXT,
        learning_language TEXT,
        proficiency_level TEXT
    )
    ''')

    # Learning sessions table
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS sessions (
        session_id INTEGER PRIMARY KEY,
        user_id INTEGER,
        scenario TEXT,
        start_time TIMESTAMP,
        end_time TIMESTAMP,
        FOREIGN KEY (user_id) REFERENCES users (user_id)
    )
    ''')

    # Mistakes tracking table
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS mistakes (
        mistake_id INTEGER PRIMARY KEY,
        session_id INTEGER,
        original_text TEXT,
        correction TEXT,
        mistake_type TEXT,
        explanation TEXT,
        FOREIGN KEY (session_id) REFERENCES sessions (session_id)
    )
    ''')

    conn.commit()
    return conn

# **3. LLM Configuration**

In [5]:
def initialize_llm(api_key=None):
    # IMPORTANT: Enter your OpenAI API key when calling this function
    # For testing, you can uncomment and use the line below:
    api_key = "your-api-key-here"  # Replace with your actual OpenAI API key

    # REMEMBER: Remove any hardcoded API keys before submission!

    if api_key is None:
        # Attempt to get from environment if not provided
        api_key = os.environ.get("OPENAI_API_KEY")
        if not api_key:
            raise ValueError("No API key provided. Either pass an API key parameter or set the OPENAI_API_KEY environment variable.")

    # Set the environment variable for consistency
    os.environ["OPENAI_API_KEY"] = api_key

    # Using LangChain for conversation management
    from langchain_openai import OpenAI

    llm = OpenAI(temperature=0.7, openai_api_key=api_key)
    memory = ConversationBufferMemory()
    conversation = ConversationChain(
        llm=llm,
        memory=memory,
        verbose=True
    )

    return conversation

# **4. Core Chatbot Logic**

In [6]:
class LanguageLearningBot:
    def __init__(self, api_key):
        self.db_conn = setup_database()
        self.conversation = initialize_llm(api_key)
        self.user_info = None
        self.current_session = None
        self.mistakes = []
        self.scenarios = [
            "At a restaurant", "Shopping", "Asking for directions",
            "At the doctor", "Job interview", "Casual conversation"
        ]

    def start_session(self):
        # Get user information
        native_language = input("What language do you speak fluently? ")
        learning_language = input("What language would you like to practice? ")
        proficiency_level = input("What's your level (beginner/intermediate/advanced)? ")

        # Store user info
        cursor = self.db_conn.cursor()
        cursor.execute(
            "INSERT INTO users (native_language, learning_language, proficiency_level) VALUES (?, ?, ?)",
            (native_language, learning_language, proficiency_level)
        )
        self.db_conn.commit()
        self.user_info = {
            "user_id": cursor.lastrowid,
            "native_language": native_language,
            "learning_language": learning_language,
            "proficiency_level": proficiency_level
        }

        # Choose scenario
        print("\nChoose a scenario to practice:")
        for i, scenario in enumerate(self.scenarios):
            print(f"{i+1}. {scenario}")
        scenario_choice = int(input("Enter number: ")) - 1
        selected_scenario = self.scenarios[scenario_choice]

        # Create new session
        cursor.execute(
            "INSERT INTO sessions (user_id, scenario, start_time) VALUES (?, ?, datetime('now'))",
            (self.user_info["user_id"], selected_scenario)
        )
        self.db_conn.commit()
        self.current_session = {
            "session_id": cursor.lastrowid,
            "scenario": selected_scenario
        }

        # Initialize conversation with system prompt
        system_prompt = self._generate_system_prompt()
        initial_response = self.conversation.predict(input=system_prompt)
        print(initial_response)

    def _generate_system_prompt(self):
        prompt = f"""
        You are a language learning assistant helping someone practice {self.user_info['learning_language']}.
        The user's native language is {self.user_info['native_language']} and they are at a {self.user_info['proficiency_level']} level.

        The conversation scenario is: {self.current_session['scenario']}

        Please follow these guidelines:
        1. Begin the conversation in {self.user_info['learning_language']}, appropriate for their level
        2. Primarily use {self.user_info['learning_language']}, but explain complex things in {self.user_info['native_language']} if needed
        3. When the user makes a language mistake, gently correct them in a supportive way
        4. Keep the conversation focused on the scenario
        5. Use appropriate vocabulary for a {self.user_info['proficiency_level']} level learner

        Start by setting the scene for {self.current_session['scenario']} and begin the conversation.
        """
        return prompt

    def chat(self):
        self.start_session()

        while True:
            user_input = input("\nYou: ")

            if user_input.lower() in ["exit", "quit", "end"]:
                self._end_session()
                break

            # Process user input with mistake detection
            response, mistakes = self._process_input(user_input)

            # Store any detected mistakes
            for mistake in mistakes:
                self._store_mistake(mistake)

            print(f"\nBot: {response}")

    def _process_input(self, user_input):
        # This function would send the user input to the LLM with a special prompt
        # to both generate a response and analyze for mistakes
        prompt = f"""
        The user said: "{user_input}"

        First, analyze if there are any language mistakes in their response.
        If there are mistakes, identify them in this format:
        MISTAKE: [original text]
        CORRECTION: [corrected text]
        TYPE: [grammar/vocabulary/pronunciation/etc]
        EXPLANATION: [brief explanation of the mistake]

        Then, respond to the user naturally as part of the ongoing conversation in {self.user_info['learning_language']}.
        If there were mistakes, subtly incorporate the corrections into your response without making it feel like a formal correction.

        Format your complete response as:
        ANALYSIS: [your mistake analysis, or "No mistakes detected"]
        RESPONSE: [your natural conversation response]
        """

        full_response = self.conversation.predict(input=prompt)

        # Parse the response to separate mistake analysis from conversation response
        parts = full_response.split("RESPONSE:")

        if len(parts) > 1:
            analysis = parts[0].replace("ANALYSIS:", "").strip()
            response = parts[1].strip()

            # Extract mistakes from analysis
            mistakes = []
            if "No mistakes detected" not in analysis:
                # Parse the mistakes using a more robust approach
                mistake_entries = analysis.split("MISTAKE:")
                for entry in mistake_entries[1:]:  # Skip the first empty element
                    try:
                        mistake_parts = entry.split("\n")
                        original = mistake_parts[0].strip()
                        correction = mistake_parts[1].replace("CORRECTION:", "").strip()
                        mistake_type = mistake_parts[2].replace("TYPE:", "").strip()
                        explanation = mistake_parts[3].replace("EXPLANATION:", "").strip()

                        mistakes.append({
                            "original": original,
                            "correction": correction,
                            "type": mistake_type,
                            "explanation": explanation
                        })
                    except IndexError:
                        # Skip malformed mistake entries
                        pass
        else:
            # Fallback if response isn't formatted as expected
            response = full_response
            mistakes = []

        return response, mistakes

    def _store_mistake(self, mistake):
        self.mistakes.append(mistake)

        cursor = self.db_conn.cursor()
        cursor.execute(
            "INSERT INTO mistakes (session_id, original_text, correction, mistake_type, explanation) VALUES (?, ?, ?, ?, ?)",
            (
                self.current_session["session_id"],
                mistake["original"],
                mistake["correction"],
                mistake["type"],
                mistake["explanation"]
            )
        )
        self.db_conn.commit()

    def _end_session(self):
        # Update session end time
        cursor = self.db_conn.cursor()
        cursor.execute(
            "UPDATE sessions SET end_time = datetime('now') WHERE session_id = ?",
            (self.current_session["session_id"],)
        )
        self.db_conn.commit()

        # Generate summary and feedback
        self._generate_learning_summary()

    def _generate_learning_summary(self):
        # Retrieve mistakes for this session
        cursor = self.db_conn.cursor()
        cursor.execute(
            "SELECT original_text, correction, mistake_type, explanation FROM mistakes WHERE session_id = ?",
            (self.current_session["session_id"],)
        )
        mistakes = cursor.fetchall()

        if not mistakes:
            print("\n=== Session Summary ===")
            print("Great job! You had no mistakes in this session.")
            print("Keep practicing to build your fluency!")
            return

        # Group mistakes by type
        mistake_types = {}
        for mistake in mistakes:
            original, correction, mistake_type, explanation = mistake
            if mistake_type not in mistake_types:
                mistake_types[mistake_type] = []
            mistake_types[mistake_type].append({
                "original": original,
                "correction": correction,
                "explanation": explanation
            })

        # Create summary prompt for LLM
        summary_prompt = f"""
        The user has completed a language learning session in {self.user_info['learning_language']} at a {self.user_info['proficiency_level']} level.

        Here are the mistakes they made during the conversation:
        {json.dumps(mistake_types, indent=2)}

        Please provide:
        1. A supportive summary of their performance
        2. Analysis of patterns in their mistakes
        3. Specific advice on what areas to focus on for improvement
        4. 2-3 practice exercises they could do to address their main issues

        Keep your response encouraging and constructive.
        """

        summary = self.conversation.predict(input=summary_prompt)

        print("\n=== Session Summary ===")
        print(summary)

# **5. Main Application**

> Add blockquote



In [None]:
def main():
    # In production, use environment variables or a config file
    api_key = os.environ.get("OPENAI_API_KEY")

    bot = LanguageLearningBot(api_key)
    print("Welcome to the Language Learning Assistant!")
    print("Type 'exit', 'quit', or 'end' to finish the session.")

    bot.chat()

if __name__ == "__main__":
    main()

  memory = ConversationBufferMemory()
  conversation = ConversationChain(


Welcome to the Language Learning Assistant!
Type 'exit', 'quit', or 'end' to finish the session.
