diff --git a/llm_app/__init__.py b/llm_app/__init__.py
deleted file mode 100644
index 0245283..0000000
--- a/llm_app/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from llm_app import model_wrappers as model_wrappers
-from llm_app.processing import chunk_texts, extract_texts
-from llm_app.utils import send_slack_alerts
-
-__all__ = [
-    "model_wrappers",
-    "extract_texts",
-    "chunk_texts",
-    "send_slack_alerts",
-]
diff --git a/llm_app/model_wrappers/__init__.py b/llm_app/model_wrappers/__init__.py
deleted file mode 100644
index f34fc5f..0000000
--- a/llm_app/model_wrappers/__init__.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from llm_app.model_wrappers.huggingface_wrapper.api_models import (
-    HFApiFeatureExtractionTask,
-    HFApiTextGenerationTask,
-)
-from llm_app.model_wrappers.huggingface_wrapper.pipelines import (
-    HFFeatureExtractionTask,
-    HFTextGenerationTask,
-)
-from llm_app.model_wrappers.litellm_wrapper.api_models import (
-    LiteLLMChatModel,
-    LiteLLMEmbeddingModel,
-)
-from llm_app.model_wrappers.openai_wrapper.api_models import (
-    OpenAIChatGPTModel,
-    OpenAIEmbeddingModel,
-)
-from llm_app.model_wrappers.sentence_transformer.embedding import (
-    SentenceTransformerTask,
-)
-
-__all__ = [
-    "HFApiFeatureExtractionTask",
-    "HFApiTextGenerationTask",
-    "HFFeatureExtractionTask",
-    "HFTextGenerationTask",
-    "LiteLLMChatModel",
-    "LiteLLMEmbeddingModel",
-    "OpenAIChatGPTModel",
-    "OpenAIEmbeddingModel",
-    "SentenceTransformerTask",
-]
diff --git a/llm_app/model_wrappers/api_clients/__init__.py b/llm_app/model_wrappers/api_clients/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/llm_app/model_wrappers/api_clients/clients.py b/llm_app/model_wrappers/api_clients/clients.py
deleted file mode 100644
index f4fa716..0000000
--- a/llm_app/model_wrappers/api_clients/clients.py
+++ /dev/null
@@ -1,107 +0,0 @@
-import logging
-from abc import ABC, abstractmethod
-
-import requests
-from tenacity import retry, stop_after_attempt, wait_random_exponential
-
-logfun = logging.debug
-
-
-class APIClient(ABC):
-    @abstractmethod
-    def make_request(self, **kwargs):
-        pass
-
-
-class OpenAIClient(APIClient):
-    def __init__(
-        self,
-        api_key: str,
-        api_type: str | None = None,
-        api_base: str | None = None,
-        api_version: str | None = None,
-    ):
-        import openai
-
-        openai.api_key = api_key
-        if api_type:
-            openai.api_type = api_type
-        if api_base:
-            openai.api_base = api_base
-        if api_version:
-            openai.api_version = api_version
-
-        self.api = openai
-        self.client = openai.OpenAI(api_key=api_key)
-
-
-class OpenAIChatCompletionClient(OpenAIClient):
-    @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
-    def make_request(self, **kwargs):
-        logfun("Calling OpenAI chat completion service %s", str(kwargs)[:100])
-        return self.client.chat.completions.create(**kwargs)
-
-
-class OpenAIEmbeddingClient(OpenAIClient):
-    @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
-    def make_request(self, **kwargs):
-        logfun("Calling OpenAI embedding service %s", str(kwargs)[:100])
-        return self.client.embeddings.create(**kwargs)
-
-
-class HuggingFaceClient(APIClient):
-    def __init__(
-        self,
-        api_key: str,
-        api_base: str = "https://api-inference.huggingface.co/models",
-    ) -> None:
-        self.headers = {"Authorization": f"Bearer {api_key}"}
-        self.api_url_prefix = api_base
-
-    def make_request(self, **kwargs):
-        logfun("Calling HuggingFace %s", str(kwargs)[:100])
-        endpoint = kwargs.pop("model")
-        url = f"{self.api_url_prefix}/{endpoint}"
-        response = requests.post(url, headers=self.headers, json=kwargs)
-        return response.json()
-
-
-class LiteLLMClient(APIClient):
-    """
-    A wrapper for the LiteLLM.
-
-    Attributes:
-        task_fn (Callable): Function reference for the specified task.
-
-    Args:
-        task (str, optional): Type of task to be executed. Defaults to "completion".
-            Supported tasks are:
-                - "completion"
-                - "embedding"
-
-    Raises:
-        ValueError: If the provided task is not supported.
-    """
-
-    def __init__(self, task: str = "completion") -> None:
-        """
-        Initializes the client with the specified task type.
-
-        Args:
-            task (str, optional): Type of task. Defaults to "completion".
-            Supported are 'completion' and 'embedding'.
-        """
-        from litellm import completion, embedding
-
-        if task == "completion":
-            self.task_fn = completion
-        elif task == "embedding":
-            self.task_fn = embedding
-        else:
-            raise ValueError("Supported tasks are (completion, embedding).")
-
-    def make_request(self, **kwargs):
-        """
-        Makes a request to the LLM service using the specified task function.
-        """
-        return self.task_fn(**kwargs)
diff --git a/llm_app/model_wrappers/base.py b/llm_app/model_wrappers/base.py
deleted file mode 100644
index 2ddd765..0000000
--- a/llm_app/model_wrappers/base.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import functools
-import os
-from abc import ABC
-
-import diskcache
-import pathway as pw
-
-
-class _Cache:
-    """A simple cache"""
-
-    def __init__(self) -> None:
-        if cache_dir := os.environ.get("PATHWAY_CACHE_DIR"):
-            self.cache = diskcache.Cache(cache_dir)
-        elif cache_dir := os.environ.get("PATHWAY_PERSISTENT_STORAGE"):
-            self.cache = diskcache.Cache(cache_dir)
-        else:
-            self.cache = {}
-
-    def __call__(self, fun):
-        base_name = f"{fun.__module__}_{fun.__qualname__}"
-
-        @functools.wraps(fun)
-        def wrapper(*args, **kwargs):
-            key = f"{base_name}({(args, kwargs)})"
-            if key not in self.cache:
-                self.cache[key] = fun(*args, **kwargs)
-            return self.cache[key]
-
-        return wrapper
-
-
-class BaseModel(ABC):
-    def __init__(self):
-        self.cache = _Cache()
-
-    def __call__(self, text: str, **kwargs):
-        raise NotImplementedError()
-
-    def apply(
-        self,
-        text: pw.ColumnExpression | str,
-        **kwargs,
-    ) -> pw.ColumnExpression:
-        return pw.apply_async(self.cache(self.__call__), text=text, **kwargs)
diff --git a/llm_app/model_wrappers/huggingface_wrapper/api_models.py b/llm_app/model_wrappers/huggingface_wrapper/api_models.py
deleted file mode 100644
index 5ae4132..0000000
--- a/llm_app/model_wrappers/huggingface_wrapper/api_models.py
+++ /dev/null
@@ -1,98 +0,0 @@
-from llm_app.model_wrappers.api_clients.clients import HuggingFaceClient
-from llm_app.model_wrappers.base import BaseModel
-
-
-class HuggingFaceAPIModel(BaseModel):
-    def __init__(self, **kwargs):
-        super().__init__()
-        self.api_client = self.get_client(**kwargs)
-
-    def get_client(self, **kwargs) -> HuggingFaceClient:
-        return HuggingFaceClient(**kwargs)
-
-    def call_api(self, **kwargs):
-        """
-        Makes a request to the Hugging Face API and returns the result.
-
-        The method accepts arguments as keyword arguments (**kwargs).
-        The expected arguments are 'model' and others that depend on the specific task.
-        Please check [HuggingFace Inference API](https://huggingface.co/docs/api-inference/detailed_parameters)
-        'model' is a string representing the pre-trained model to use for the call.
-
-        Examples:
-
-        1) Question-Answering Task usage:
-
-        model = HuggingFaceModel(api_key=token)
-        result = model.call(
-            inputs=dict(
-                context="Pathway is a realtime stream data processing framework. It has a python api",
-                question="Does Pathway have a python API ?"
-            ),
-            model='deepset/roberta-base-squad2'
-        )
-        The expected output for the example above is:
-        {
-            'score': 0.41046786308288574,
-            'start': 56,
-            'end': 75,
-            'answer': 'It has a python api'
-        }
-
-        2) Sentiment analysis model usage:
-
-        result = model.call(
-            inputs="What a performance that was!",
-            model='distilbert-base-uncased-finetuned-sst-2-english'
-        )
-        The expected output for the example above is:
-        [
-            [
-                {'label': 'POSITIVE', 'score': 0.9988183379173279},
-                {'label': 'NEGATIVE', 'score': 0.0011816363548859954}
-            ]
-        ]
-
-        Args:
-            **kwargs: The arguments for the model call. 'inputs' and 'model' keys are expected.
-
-        Returns:
-            The response from the Hugging Face API, the format depends on the model being used.
-        """
-        return self.api_client.make_request(**kwargs)
-
-
-class HFApiFeatureExtractionTask(HuggingFaceAPIModel):
-    def __call__(
-        self, text: str, locator="sentence-transformers/all-MiniLM-L6-v2", **kwargs
-    ):
-        response = self.call_api(inputs=[text], model=locator, **kwargs)
-        return response
-
-
-class HFApiTextGenerationTask(HuggingFaceAPIModel):
-    """
-    A class that represents a text generation task using the Hugging Face API.
-
-    It inherits from the HuggingFaceModel class and overrides the call method to
-    specifically work with text generation models.
-
-    This class allows users to simply pass a text string and get a generated
-    text in return.
-
-    Args:
-        api_key (str): The API key to access the Hugging Face API.
-
-    Example:
-
-    # >>> model = HFTextGenerationTask(api_key=token)
-    # >>> text = "Once upon a time"
-    # >>> generated_text = model(text, locator="gpt2")
-    # >>> print(generated_text)
-    'Once upon a time in a land far away...'
-
-    """
-
-    def __call__(self, text: str, locator="gpt2", **kwargs):
-        response = self.call_api(inputs=text, model=locator, **kwargs)
-        return response[0]["generated_text"]
diff --git a/llm_app/model_wrappers/huggingface_wrapper/pipelines.py b/llm_app/model_wrappers/huggingface_wrapper/pipelines.py
deleted file mode 100644
index 4f482e0..0000000
--- a/llm_app/model_wrappers/huggingface_wrapper/pipelines.py
+++ /dev/null
@@ -1,126 +0,0 @@
-from llm_app.model_wrappers.base import BaseModel
-
-
-class HFPipelineTask(BaseModel):
-    def __init__(self, model, device="cpu", **kwargs):
-        """
-        A wrapper class for Hugging Face's `Pipeline` class.
-
-        The `pipeline` function from Hugging Face is a utility factory method that creates
-        a Pipeline to handle different tasks.
-        It supports tasks like text classification, translation, summarization, and many more.
-
-        This wrapper class simplifies the process of initializing the pipeline and allows the user
-        to easily change the underlying model used for computations.
-
-        Parameters:
-        -----------
-        model : str, required
-            The model identifier from Hugging Face's model hub.
-        device : str, default='cpu'
-            The device where the computations will be performed.
-            Supports 'cpu' or 'gpu'. Default is 'cpu'.
-        **kwargs : optional
-            Additional arguments form HF.
-            Please check out https://huggingface.co/docs/transformers/main/main_classes/pipelines
-            for more information on the models and available arguments.
-
-        Attributes:
-        -----------
-        pipeline : transformers.Pipeline
-            The Hugging Face pipeline object.
-        tokenizer : transformers.PreTrainedTokenizer
-            The tokenizer associated with the pipeline.
-
-        Example:
-        --------
-        >>> pipe = HFPipelineTask('gpt2')
-        >>> result = pipe('Hello world')
-        """
-        from transformers import pipeline
-
-        super().__init__(**kwargs)
-        self.pipeline = pipeline(model=model, device=device)
-        self.tokenizer = self.pipeline.tokenizer
-
-    def crop_to_max_length(self, input_string, max_length=500):
-        tokens = self.tokenizer.tokenize(input_string)
-        if len(tokens) > max_length:
-            tokens = tokens[:max_length]
-        return self.tokenizer.convert_tokens_to_string(tokens)
-
-
-class HFFeatureExtractionTask(HFPipelineTask):
-    def __init__(self, model, device="cpu", max_length=500, **kwargs):
-        super().__init__(model, device=device, **kwargs)
-        self.max_length = max_length
-
-    def __call__(self, text, **kwargs):
-        """
-        This method computes feature embeddings for the given text.
-        HuggingFace Feature extraction models return embeddings per token.
-        To get the embedding vector of a text, we simply take the average.
-
-        Args:
-            text (str): The text for which we compute the embedding.
-            **kwargs: Additional arguments to be passed to the pipeline.
-
-        Returns:
-            List[float]: The average feature embeddings computed by the model.
-        """
-
-        text = self.crop_to_max_length(text, max_length=self.max_length)
-        # This will return a list of lists (one list for each word in the text)
-        embedding = self.pipeline(text, **kwargs)[0]
-
-        # For simplicity, we'll just average all word vectors to get a sentence embedding
-        avg_embedding = [sum(col) / len(col) for col in zip(*embedding)]
-
-        return avg_embedding
-
-
-class HFTextGenerationTask(HFPipelineTask):
-    def __init__(
-        self, model, device="cpu", max_prompt_length=500, max_new_tokens=500, **kwargs
-    ):
-        super().__init__(model, device=device, **kwargs)
-        self.max_prompt_length = max_prompt_length
-        self.max_new_tokens = max_new_tokens
-
-    def __call__(self, text, **kwargs):
-        """
-        Run the model to complete the text.
-        Args:
-            text (str): prompt to complete.
-            return_full_text (bool, optional, defaults to True):
-                If True, returns the full text, if False, only added text is returned.
-                Only significant if return_text is True.
-            clean_up_tokenization_spaces (bool, optional, defaults to False):
-                If True, removes extra spaces in text output.
-            prefix (str, optional): Adds prefix to prompt.
-            handle_long_generation (str, optional): By default, doesn't handle long generation.
-                Provides strategies to address this based on your use case:
-                    None: Does nothing special
-                    "hole": Truncates left of input, leaving a gap for generation.
-                        Might truncate a lot of the prompt, not suitable when generation exceeds model capacity.
-            Other arguments from transformers.TextGenerationPipeline.__call__ are supported as well. Link:
-            https://huggingface.co/docs/transformers/main/main_classes/pipelines#transformers.TextGenerationPipeline.__call__
-
-        """
-        text = self.crop_to_max_length(text, self.max_prompt_length)
-
-        max_new_tokens = kwargs.pop("max_new_tokens", self.max_new_tokens)
-
-        messages = [
-            {
-                "role": "system",
-                "content": "You are a helpful virtual assistant that only responds in english clearly and precisely.",
-            },
-            {"role": "user", "content": text},
-        ]
-        prompt = self.tokenizer.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True
-        )
-
-        output = self.pipeline(prompt, max_new_tokens=max_new_tokens, **kwargs)
-        return output[0]["generated_text"]
diff --git a/llm_app/model_wrappers/litellm_wrapper/__init__.py b/llm_app/model_wrappers/litellm_wrapper/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/llm_app/model_wrappers/litellm_wrapper/api_models.py b/llm_app/model_wrappers/litellm_wrapper/api_models.py
deleted file mode 100644
index f25b110..0000000
--- a/llm_app/model_wrappers/litellm_wrapper/api_models.py
+++ /dev/null
@@ -1,158 +0,0 @@
-import pathway as pw
-
-from llm_app.model_wrappers.api_clients.clients import LiteLLMClient
-from llm_app.model_wrappers.base import BaseModel
-
-
-class LiteLLMChatModel(BaseModel):
-    def __init__(self):
-        super().__init__()
-        self.api_client = self.get_client()
-
-    def get_client(
-        self,
-    ) -> LiteLLMClient:
-        return LiteLLMClient(task="completion")
-
-    def __call__(self, text: str, locator="gpt-3.5-turbo", **kwargs) -> str:
-        """
-
-        Example
-        # >>> os.environ["OPENAI_API_KEY"] = ""
-        # >>> model = LiteLLMChatModel()
-        # >>> model(
-        # ...     locator='gpt-4-0613',
-        # ...     text="Tell me a joke about jokes",
-        # ...     temperature=1.1
-        # ... )
-        """
-
-        messages = [
-            dict(role="system", content="You are a helpful assistant"),
-            dict(role="user", content=text),
-        ]
-        response = self.api_client.make_request(
-            messages=messages, model=locator, **kwargs
-        )
-        return response.choices[0].message.content
-
-    def apply(
-        self,
-        *args,
-        **kwargs,
-    ) -> pw.ColumnExpression:
-        """
-        Applies the specified API model in `locator` to the provided text.
-        Parameters
-        ----------
-        text : Union[pw.ColumnExpression, str]
-            The input text on which the model will be applied. It can be a column expression or a string.
-        locator : Union[pw.ColumnExpression, str, None]
-            The model locator to use for applying the model.
-            If provided, it should be a column expression or a string.
-            Otherwise, the default chat completion model `gpt-3.5-turbo` is applied.
-            Please visit https://docs.litellm.ai/docs/ to see the available models.
-        **kwargs : dict
-            Additional keyword arguments that will be used for the model application.
-            These could include settings such as `temperature`, `max_tokens`, etc.
-        Returns
-        -------
-        pw.ColumnExpression
-            The result of the model application as a column expression or str.
-            Please note that the output is `chat_completion.choices[0].message.content`
-            where `chat_completion` is the api response.
-        Example:
-        # >>> os.environ["OPENAI_API_KEY"] = "<YOUR OPENAI TOKEN>"
-        # >>> os.environ["COHERE_API_KEY"] = "<YOUR COHERE TOKEN>"
-        # >>> model = LiteLLMChatModel()
-        # >>> table = pw.debug.table_from_pandas(
-        # ...     pd.DataFrame.from_records([
-        # ...         {"text": "How to use pathway to process a kafka stream ?"},
-        # ...         {"text": "How to apply a function to a pathway table ?"}
-        # ...     ])
-        # ... )
-        # >>> table += table.select(
-        # ...     openai_response = model.apply(
-        # ...         pw.this.text,
-        # ...         locator='gpt-4',
-        # ...         temperature=1.5,
-        # ...         max_tokens=1000
-        # ...     )
-        # ... )
-        # >>> table += table.select(
-        # ...     cohere_response = model.apply(
-        # ...         pw.this.text,
-        # ...         locator='command-nightly',
-        # ...         temperature=1.5,
-        # ...         max_tokens=1000
-        # ...     )
-        # ... )
-        """
-        return super().apply(*args, **kwargs)
-
-
-class LiteLLMEmbeddingModel(BaseModel):
-    def __init__(self):
-        super().__init__()
-        self.api_client = self.get_client()
-
-    def get_client(self) -> LiteLLMClient:
-        return LiteLLMClient(task="embedding")
-
-    def __call__(self, text: str, locator="text-embedding-ada-002", **kwargs):
-        """
-        Example:
-
-        # >>> os.environ["OPENAI_API_KEY"] = ""
-        # >>> embedder = LiteLLMEmbeddingModel()
-        # >>> embedder(
-        # ...    text='Some random text'
-        # ...    locator='text-embedding-ada-002'
-        # ... )
-        """
-
-        response = self.api_client.make_request(input=[text], model=locator, **kwargs)
-        return response["data"][0]["embedding"]
-
-    def apply(
-        self,
-        *args,
-        **kwargs,
-    ) -> pw.ColumnExpression:
-        """
-        Applies the specified API model in `locator` to the provided text.
-        Parameters
-        ----------
-        text : Union[pw.ColumnExpression, str]
-            The input text on which the model will be applied. It can be a column expression or a constant value.
-        locator : Union[pw.ColumnExpression, str, None]
-            The model locator to use for applying the model.
-            If provided, it should be a column expression or a constant value.
-            Otherwise, the default chat completion model `gpt-3.5-turbo` is applied.
-            Please visit https://docs.litellm.ai/docs/embedding/supported_embedding
-            to see the available models.
-        **kwargs : dict
-            Additional keyword arguments that will be used for the model application.
-            These could include settings such as `temperature`, `max_tokens`, etc.
-        Returns
-        -------
-        pw.ColumnExpression
-            The result of the model application as a column expression or constant of type list.
-            Please note that the output is `results["data"][0]["embedding"]`
-        Example:
-        # >>> os.environ["OPENAI_API_KEY"] = ""
-        # >>> embedder = LiteLLMEmbeddingModel()
-        # >>> table = pw.debug.table_from_pandas(
-        # ...     pd.DataFrame.from_records([
-        # ...         {"text": "How to use pathway to process a kafka stream ?"},
-        # ...         {"text": "How to apply a function to a pathway table ?"}
-        # ...     ])
-        # ... )
-        # >>> table += table.select(
-        # ...     embedding = embedder.apply(
-        # ...         pw.this.text,
-        # ...         locator='text-embedding-ada-002'
-        # ...     )
-        # ... )
-        """
-        return super().apply(*args, **kwargs)
diff --git a/llm_app/model_wrappers/openai_wrapper/__init__.py b/llm_app/model_wrappers/openai_wrapper/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/llm_app/model_wrappers/openai_wrapper/api_models.py b/llm_app/model_wrappers/openai_wrapper/api_models.py
deleted file mode 100644
index c20c85e..0000000
--- a/llm_app/model_wrappers/openai_wrapper/api_models.py
+++ /dev/null
@@ -1,176 +0,0 @@
-import logging
-
-import pathway as pw
-
-from llm_app.model_wrappers.api_clients.clients import (
-    OpenAIChatCompletionClient,
-    OpenAIClient,
-    OpenAIEmbeddingClient,
-)
-from llm_app.model_wrappers.base import BaseModel
-
-logfun = logging.debug
-
-
-class MessagePreparer:
-    @staticmethod
-    def prepare_chat_messages(prompt: str):
-        return [
-            dict(role="system", content="You are a helpful assistant"),
-            dict(role="user", content=prompt),
-        ]
-
-
-class OpenAIChatGPTModel(BaseModel):
-    def __init__(self, **kwargs):
-        super().__init__()
-        self.api_client = self.get_client(**kwargs)
-
-    def get_client(self, **kwargs) -> OpenAIClient:
-        return OpenAIChatCompletionClient(**kwargs)
-
-    def __call__(self, text: str, locator="gpt-3.5-turbo", **kwargs) -> str:
-        """
-
-        Example
-
-        # >>> model = OpenAIChatGPTModel(api_key = api_key)
-        # >>> model(
-        # ...     locator='gpt-4-0613',
-        # ...     text="Tell me a joke about jokes",
-        # ...     temperature=1.1
-        # ... )
-        """
-        if self.api_client.api.api_type == "azure":
-            kwargs["engine"] = locator
-        else:
-            kwargs["model"] = locator
-
-        messages = MessagePreparer.prepare_chat_messages(text)
-        if "response_format" in kwargs:
-            kwargs["response_format"] = kwargs["response_format"].value
-
-        logfun(f"Calling OpenAI API with: {messages}\n")
-        response = self.api_client.make_request(messages=messages, **kwargs)
-        logfun(f"API Response: {response.choices[0].message.content}\n")
-        return response.choices[0].message.content
-
-    def apply(
-        self,
-        *args,
-        **kwargs,
-    ) -> pw.ColumnExpression:
-        """
-        Applies the specified model in `locator` from OpenAIChatGPT API to the provided text.
-        Parameters
-        ----------
-        text : Union[pw.ColumnExpression, str]
-            The input text on which the model will be applied. It can be a column expression or a string.
-        locator : Union[pw.ColumnExpression, str, None]
-            The model locator to use for applying the model.
-            If provided, it should be a column expression or a string.
-            Otherwise, the default chat completion model `gpt-3.5-turbo` is applied.
-            Please check out https://platform.openai.com/docs/models/model-endpoint-compatibility
-            to see the available models.
-        **kwargs : dict
-            Additional keyword arguments that will be used for the model application.
-            These could include settings such as `temperature`, `max_tokens`, etc.
-            Check https://platform.openai.com/docs/api-reference/chat/create for the official API Reference
-        Returns
-        -------
-        pw.ColumnExpression
-            The result of the model application as a column expression or str.
-            Please note that the output is `chat_completion.choices[0].message.content`
-            where `chat_completion` is the api response.
-        Example:
-        # >>> model = OpenAIChatGPTModel(api_key = api_key)
-        # >>>
-        # >>> table = pw.debug.table_from_pandas(
-        # ...     pd.DataFrame.from_records([
-        # ...         {"text": "How to use pathway to process a kafka stream ?"},
-        # ...         {"text": "How to apply a function to a pathway table ?"}
-        # ...     ])
-        # ... )
-        # >>> table += table.select(
-        # ...     response = model.apply(
-        # ...         pw.this.text,
-        # ...         locator='gpt-4',
-        # ...         temperature=1.5,
-        # ...         max_tokens=1000
-        # ...     )
-        # ... )
-        """
-        return super().apply(*args, **kwargs)
-
-
-class OpenAIEmbeddingModel(BaseModel):
-    def __init__(self, **kwargs):
-        super().__init__()
-        self.api_client = self.get_client(**kwargs)
-
-    def get_client(self, **kwargs) -> OpenAIClient:
-        return OpenAIEmbeddingClient(**kwargs)
-
-    def __call__(self, text: str, locator="text-embedding-ada-002", **kwargs):
-        """
-        Example:
-
-        # >>> embedder = OpenAIEmbeddingModel(api_key)
-        # >>>
-        # >>> embedder(
-        # ...    text='Some random text'
-        # ...    locator='text-embedding-ada-002'
-        # ... )
-        """
-        if self.api_client.api.api_type == "azure":
-            kwargs["engine"] = locator
-        else:
-            kwargs["model"] = locator
-
-        response = self.api_client.make_request(input=[text], **kwargs)
-        return response.data[0].embedding
-
-    def apply(
-        self,
-        *args,
-        **kwargs,
-    ) -> pw.ColumnExpression:
-        """
-        Applies the specified model in `locator` from OpenAIEmbeddingModel API to the provided text.
-        Parameters
-        ----------
-        text : Union[pw.ColumnExpression, str]
-            The input text on which the model will be applied. It can be a column expression or a constant value.
-        locator : Union[pw.ColumnExpression, str, None]
-            The model locator to use for applying the model.
-            If provided, it should be a column expression or a constant value.
-            Otherwise, the default chat completion model `gpt-3.5-turbo` is applied.
-            Please check out https://platform.openai.com/docs/models/model-endpoint-compatibility
-            to see the available models.
-        **kwargs : dict
-            Additional keyword arguments that will be used for the model application.
-            These could include settings such as `temperature`, `max_tokens`, etc.
-            You can check https://platform.openai.com/docs/api-reference/embeddings/create
-            for the official API Reference.
-        Returns
-        -------
-        pw.ColumnExpression
-            The result of the model application as a column expression or constant of type list.
-            Please note that the output is `results["data"][0]["embedding"]`
-        Example:
-        # >>> embedder = OpenAIEmbeddingModel(api_key)
-        # >>>
-        # >>> table = pw.debug.table_from_pandas(
-        # ...     pd.DataFrame.from_records([
-        # ...         {"text": "How to use pathway to process a kafka stream ?"},
-        # ...         {"text": "How to apply a function to a pathway table ?"}
-        # ...     ])
-        # ... )
-        # >>> table += table.select(
-        # ...     embedding = embedder.apply(
-        # ...         pw.this.text,
-        # ...         locator='text-embedding-ada-002'
-        # ...     )
-        # ... )
-        """
-        return super().apply(*args, **kwargs)
diff --git a/llm_app/model_wrappers/sentence_transformer/embedding.py b/llm_app/model_wrappers/sentence_transformer/embedding.py
deleted file mode 100644
index 2cdafa0..0000000
--- a/llm_app/model_wrappers/sentence_transformer/embedding.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from llm_app.model_wrappers.base import BaseModel
-
-
-class SentenceTransformerTask(BaseModel):
-    def __init__(self, model: str, device: str = "cpu", **kwargs):
-        """
-        Wrapper for sentence-transformers.
-        Arguments:
-            model: model name or path
-        """
-        from sentence_transformers import SentenceTransformer
-
-        super().__init__(**kwargs)
-        self.model = SentenceTransformer(model_name_or_path=model, device=device)
-
-    def __call__(self, text: str, **kwargs) -> str:
-        """
-        Arguments:
-            text: input text string.
-            **kwargs: Check out https://www.sbert.net/
-        """
-        return self.model.encode(text, **kwargs).tolist()
diff --git a/llm_app/processing.py b/llm_app/processing.py
deleted file mode 100644
index 8a0fdde..0000000
--- a/llm_app/processing.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import logging
-import unicodedata
-from io import BytesIO
-
-import pathway as pw
-
-CHARS_PER_TOKEN = 3
-PUNCTUATION = [".", "?", "!", "\n"]
-
-
-@pw.udf
-def chunk_texts(
-    texts: str | list[str],
-    min_tokens: int = 50,
-    max_tokens: int = 500,
-    encoding_name: str = "cl100k_base",
-) -> list[str]:
-    """
-    Splits a given string or a list of strings into chunks based on token
-    count.
-
-    This function tokenizes the input texts and splits them into smaller parts ("chunks")
-    ensuring that each chunk has a token count between `min_tokens` and
-    `max_tokens`. It also attempts to break chunks at sensible points such as
-    punctuation marks.
-
-    Arguments:
-        texts: string or list of strings.
-        min_tokens: minimum tokens in a chunk of text.
-        max_tokens: maximum size of a chunk in tokens.
-        encoding_name: name of the encoding from `tiktoken`.
-
-    Example:
-
-    # >>> from pathway.stdlib.ml import chunk_texts
-    # >>> import pathway as pw
-    # >>> t  = pw.debug.table_from_markdown(
-    # ...     '''| text
-    # ... 1| cooltext'''
-    # ... )
-    # >>> t += t.select(chunks = chunk_texts(pw.this.text, min_tokens=1, max_tokens=1))
-    # >>> pw.debug.compute_and_print(t, include_id=False)
-    # text     | chunks
-    # cooltext | ('cool', 'text')
-    """
-    import tiktoken
-
-    if not isinstance(texts, str):
-        texts = "\n".join(texts)
-
-    tokenizer = tiktoken.get_encoding(encoding_name)
-    text: str = texts
-    text = normalize_unicode(text)
-    tokens = tokenizer.encode_ordinary(text)
-    output = []
-    i = 0
-    while i < len(tokens):
-        chunk_tokens = tokens[i : i + max_tokens]
-        chunk = tokenizer.decode(chunk_tokens)
-        last_punctuation = max([chunk.rfind(p) for p in PUNCTUATION], default=-1)
-        if last_punctuation != -1 and last_punctuation > CHARS_PER_TOKEN * min_tokens:
-            chunk = chunk[: last_punctuation + 1]
-
-        i += len(tokenizer.encode_ordinary(chunk))
-
-        output.append(chunk)
-    return output
-
-
-def normalize_unicode(text: str):
-    """
-    Get rid of ligatures
-    """
-    return unicodedata.normalize("NFKC", text)
-
-
-@pw.udf
-def extract_texts(data: bytes) -> list[str]:
-    """
-    Extract text elements from binary data using the partition function from
-    unstructured-io.
-    Visit [unstructured-io](https://unstructured-io.github.io/unstructured/) to know
-    more.
-
-    Arguments:
-        data (bytes): Binary data representing the text format file.
-
-    Returns:
-        list[str]: A list of extracted text elements.
-
-    Example
-
-    # >>> from pathway.stdlib.ml import extract_texts
-    # >>> import pathway as pw
-    # >>> t  = pw.debug.table_from_markdown(
-    # ...     '''| text
-    # ... 1| cooltext'''
-    # ... )
-    # >>> t += t.select(bytes = pw.apply(str.encode, pw.this.text))
-    # >>> t = t.select(decoded=extract_texts(pw.this.bytes))
-    # >>> pw.debug.compute_and_print(t, include_id=False)
-    # decoded
-    # ('cooltext',)
-    """
-    from unstructured.partition.auto import partition
-
-    file_like = BytesIO(data)
-    try:
-        elements = partition(file=file_like)
-        texts = [element.text for element in elements]
-    except ValueError as ve:
-        logging.error(f"Value Error: {str(ve)}")
-        return []
-    except Exception as e:
-        logging.exception(f"An unexpected error occurred: {str(e)}")
-        return []
-    return texts
diff --git a/llm_app/utils.py b/llm_app/utils.py
deleted file mode 100644
index a2a53d0..0000000
--- a/llm_app/utils.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import logging
-
-import pathway as pw
-import requests
-
-logfun = logging.info
-
-
-def send_slack_alerts(
-    message: pw.ColumnReference, slack_alert_channel_id, slack_alert_token
-):
-    def send_slack_alert(key, row, time, is_addition):
-        if not is_addition:
-            return
-        alert_message = row[message.name]
-        logfun(alert_message)
-        requests.post(
-            "https://slack.com/api/chat.postMessage",
-            data="text={}&channel={}".format(alert_message, slack_alert_channel_id),
-            headers={
-                "Authorization": "Bearer {}".format(slack_alert_token),
-                "Content-Type": "application/x-www-form-urlencoded",
-            },
-        ).raise_for_status()
-
-    pw.io.subscribe(message._table, send_slack_alert)
diff --git a/poetry.lock b/poetry.lock
index c0bb729..46a1410 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
 
 [[package]]
 name = "aiohttp"
@@ -3144,6 +3144,7 @@ description = "Nvidia JIT LTO Library"
 optional = true
 python-versions = ">=3"
 files = [
+    {file = "nvidia_nvjitlink_cu12-12.4.99-py3-none-manylinux2014_aarch64.whl", hash = "sha256:75d6498c96d9adb9435f2bbdbddb479805ddfb97b5c1b32395c694185c20ca57"},
     {file = "nvidia_nvjitlink_cu12-12.4.99-py3-none-manylinux2014_x86_64.whl", hash = "sha256:c6428836d20fe7e327191c175791d38570e10762edc588fb46749217cd444c74"},
     {file = "nvidia_nvjitlink_cu12-12.4.99-py3-none-win_amd64.whl", hash = "sha256:991905ffa2144cb603d8ca7962d75c35334ae82bf92820b6ba78157277da1ad2"},
 ]
@@ -6123,6 +6124,17 @@ build = ["cmake (>=3.20)", "lit"]
 tests = ["autopep8", "flake8", "isort", "numpy", "pytest", "scipy (>=1.7.1)", "torch"]
 tutorials = ["matplotlib", "pandas", "tabulate", "torch"]
 
+[[package]]
+name = "types-pyyaml"
+version = "6.0.12.20240311"
+description = "Typing stubs for PyYAML"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "types-PyYAML-6.0.12.20240311.tar.gz", hash = "sha256:a9e0f0f88dc835739b0c1ca51ee90d04ca2a897a71af79de9aec5f38cb0a5342"},
+    {file = "types_PyYAML-6.0.12.20240311-py3-none-any.whl", hash = "sha256:b845b06a1c7e54b8e5b4c683043de0d9caf205e7434b3edc678ff2411979b8f6"},
+]
+
 [[package]]
 name = "types-requests"
 version = "2.31.0.20240311"
@@ -6704,4 +6716,4 @@ unstructured-to-sql = ["psycopg", "tiktoken", "unstructured"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "352af8da5e683839f3d4dffb7327cee78833520927d56f7e6b591818ac9757d3"
+content-hash = "378709c02a2991dc35c34f59f5d0f8b41f70cd7226b23a82fe4f493b7e3a549a"
diff --git a/pyproject.toml b/pyproject.toml
index 2558ace..e53a9fe 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,6 +16,7 @@ authors = [
 license = "MIT"
 readme = "README.md"
 keywords = ["Pathway", "LLM"]
+package-mode = false
 
 classifiers = [
     "Development Status :: 3 - Alpha",
@@ -65,6 +66,7 @@ mypy = "~1.9.0"
 flake8 = "~7.0.0"
 pytest = "^8.0.2"
 types-requests = "^2.31.0"
+types-PyYAML = "^6.0.0"
 
 [build-system]
 requires = ["poetry-core"]
@@ -75,11 +77,10 @@ target-version = ["py310", "py311"]
 
 [tool.isort]
 profile = "black"
-known_first_party = ["llm_app"]
 
 [tool.mypy]
 python_version = "3.10"
-exclude = ['examples']
+exclude = ["examples/data"]
 ignore_missing_imports = true
 check_untyped_defs = true
 warn_redundant_casts = true