Add chat; Minor fixes

meghdadFar · Apr 8, 2024 · c1f86cb · c1f86cb
1 parent 5cd378c
commit c1f86cb
Showing 1 changed file with 64 additions and 3 deletions.
diff --git a/wordview/mwes/mwe.py b/wordview/mwes/mwe.py
@@ -1,11 +1,14 @@
 import re
 import string
+import threading
 from re import Match
 from typing import Optional
 
 import nltk
 import pandas
+from flask import Flask, jsonify, request, send_from_directory
 from nltk import RegexpParser, word_tokenize
+from openai import OpenAI
 from tabulate import tabulate  # type: ignore
 from tqdm import tqdm
 
@@ -26,7 +29,9 @@ def is_alphanumeric_latinscript_multigram(word: str) -> Optional[Match[str]]:
 
 
 class MWE:
-    """Extract MWEs of type LVC, VPC, Noun Compounds, Adjective Compounds, and custom patterns from a text corpus."""
+    """Extract MWEs of typeS:
+    LVC, VPC, Noun Compounds, Adjective Compounds, and custom patterns from a text corpus.
+    """
 
     def __init__(
         self,
@@ -99,19 +104,75 @@ def __init__(
             custom_pattern=mwe_patterns,
         )
 
+    def chat(self, api_key: str = ""):
+        """Chat with OpenAI's latest model about MWEs .
+        Access the chat UI in your localhost under http://127.0.0.1:5001/
+
+        Args:
+            api_key: OpenAI API key.
+
+        Returns:
+            None
+        """
+        self.api_key = api_key
+        self.chat_client = OpenAI(api_key=api_key)
+        base_content = f"""Answer any questions about the Multiword Expressions (MWEs) that extracted from the uploaded text corpus by Wordview and are presented in the following MWEs dictionary.
+        \n\n
+        ------------------------------
+        MWEs dictionary:
+        ------------------------------
+        {self.mwes}
+        \n\n
+        Important Points:\n
+        - Answer the questions without including "According/based on to MWEs dictionary".\n
+        - The format of the above dictionary is as follows:\n
+            "MWE Type": "MWE instance 1": "Association measure", "MWE instance 2": "Association measure", ...\n
+        - There could be other custom types in which case you should just mention the dictionary key.\n
+        - Depending on a parameter N set by the user, each MWE type contains at most N instances. But it can contain less or even 0.
+        """
+        chat_history = [
+            {"role": "system", "content": base_content},
+        ]
+        app = Flask(__name__, static_folder="path_to_your_ui_folder")
+
+        @app.route("/")
+        def index():
+            return send_from_directory("../chat_ui", "chat.html")
+
+        @app.route("/chat", methods=["POST"])
+        def chat():
+            user_input = request.json["message"]
+            chat_history.append({"role": "user", "content": user_input})
+            response = (
+                self.chat_client.chat.completions.create(
+                    model="gpt-3.5-turbo",
+                    messages=chat_history,
+                )
+                .choices[0]
+                .message.content
+            )
+            chat_history.append({"role": "assistant", "content": response})
+            return jsonify({"reply": response})
+
+        def run():
+            app.run(port=5001)
+
+        flask_thread = threading.Thread(target=run)
+        flask_thread.start()
+
     def extract_mwes(
         self,
         sort: bool = True,
         top_n: Optional[int] = None,
     ) -> dict[str, dict[str, float]]:
-        """Extract MWEs from the text corpus.
+        """Extract MWEs from the text corpus and add them to self.mwes.
 
         Args:
             sort: If True, the MWEs will be sorted in descending order of association measure.
             top_n: If provided, only the top n MWEs will be returned.
 
         Returns:
-            A dictionary containing the MWEs and their association measures.
+            None.
         """
         for sentence in tqdm(self.reader.get_sentences()):
             try: