In [1]:
import math

import mlx.core as mx
import mlx.nn as nn


class LoRALinear(nn.Module):
    @staticmethod
    def from_linear(
        linear: nn.Linear,
        r: int = 8,
        lora_alpha: float = 16,
        lora_dropout: float = 0.05,
        scale: float = 10.0,
    ):
        # TODO remove when input_dims and output_dims are attributes
        # on linear and quantized linear
        output_dims, input_dims = linear.weight.shape
        if isinstance(linear, nn.QuantizedLinear):
            input_dims *= 32 // linear.bits
        lora_lin = LoRALinear(
            input_dims=input_dims,
            output_dims=output_dims,
            r=r,
            lora_alpha=lora_alpha,
            lora_dropout=lora_dropout,
            scale=scale,
        )
        lora_lin.linear = linear
        return lora_lin

    def to_linear(self, de_quantize: bool = False):
        linear = self.linear
        bias = "bias" in linear
        weight = linear.weight
        is_quantized = isinstance(linear, nn.QuantizedLinear)

        # Use the same type as the linear weight if not quantized
        dtype = weight.dtype

        if is_quantized:
            dtype = mx.float16
            weight = mx.dequantize(
                weight,
                linear.scales,
                linear.biases,
                linear.group_size,
                linear.bits,
            )
        output_dims, input_dims = weight.shape
        fused_linear = nn.Linear(input_dims, output_dims, bias=bias)

        lora_b = (self.scale * self.lora_b.T).astype(dtype)
        lora_a = self.lora_a.T.astype(dtype)
        fused_linear.weight = weight + lora_b @ lora_a
        if bias:
            fused_linear.bias = linear.bias

        if is_quantized and not de_quantize:
            fused_linear = nn.QuantizedLinear.from_linear(
                fused_linear,
                linear.group_size,
                linear.bits,
            )

        return fused_linear

    def __init__(
        self,
        input_dims: int,
        output_dims: int,
        r: int = 8,
        lora_alpha: float = 16,
        lora_dropout: float = 0.0,
        scale: float = 10.0,
        bias: bool = False,
    ):
        super().__init__()

        # Regular linear layer weights
        self.linear = nn.Linear(input_dims, output_dims, bias=bias)

        self.lora_dropout = nn.Dropout(p=lora_dropout)

        # Scale for low-rank update
        self.scale = scale * (lora_alpha / r)

        # Low rank lora weights
        scale = 1 / math.sqrt(input_dims)
        self.lora_a = mx.random.uniform(
            low=-scale,
            high=scale,
            shape=(input_dims, r),
        )
        self.lora_b = mx.zeros(shape=(r, output_dims))

    def __call__(self, x):
        dtype = self.linear.weight.dtype
        if isinstance(self.linear, nn.QuantizedLinear):
            dtype = self.linear.scales.dtype
        y = self.linear(x.astype(dtype))
        z = (self.lora_dropout(x) @ self.lora_a) @ self.lora_b
        return y + self.scale * z

def linear_to_lora_layers(model: nn.Module, num_lora_layers: int):
    """
    Convert some of the models linear layers to lora layers.

    Args:
        model (nn.Module): The neural network model.
        num_lora_layers (int): The number of blocks to convert to lora layers
        starting from the last layer.
    """

    def check_lora_layers(num_model):
        if num_lora_layers > num_model:
            raise ValueError(
                f"Requested {num_lora_layers} LoRA layers "
                f"but the model only has {num_model} layers."
            )

    if model.model_type in [
        "mistral",
        "llama",
        "phi",
        "mixtral",
        "stablelm_epoch",
        "qwen2",
    ]:
        check_lora_layers(len(model.model.layers))

        for l in model.model.layers[len(model.model.layers) - num_lora_layers :]:
            l.self_attn.q_proj = LoRALinear.from_linear(l.self_attn.q_proj)
            l.self_attn.v_proj = LoRALinear.from_linear(l.self_attn.v_proj)
            if hasattr(l, "block_sparse_moe"):
                l.block_sparse_moe.gate = LoRALinear.from_linear(
                    l.block_sparse_moe.gate
                )
    elif model.model_type == "olmo":
        check_lora_layers(len(model.model.transformer.blocks))

        for l in model.model.transformer.blocks[
            len(model.model.transformer.blocks) - num_lora_layers :
        ]:
            l.att_proj = LoRALinear.from_linear(l.att_proj)
    elif model.model_type == "phi-msft":
        check_lora_layers(len(model.transformer.h))

        for l in model.transformer.h[len(model.transformer.h) - num_lora_layers :]:
            l.mixer.Wqkv = LoRALinear.from_linear(l.mixer.Wqkv)
            l.moe.gate = LoRALinear.from_linear(l.moe.gate)

    else:
        raise ValueError(f"Lora does not support {model.model_type}")


ModuleNotFoundError: No module named 'mlx'

In [18]:
from mlx_lm import load, generate

model, tokenizer = load("mlx_model")

# Convert linear layers to lora layers and unfreeze in the process
linear_to_lora_layers(model, 4)

In [19]:
model.load_weights("adapters.npz", strict=False)
model.eval()

In [20]:
def gen_text(inp):
    return generate(
        model=model,
        tokenizer=tokenizer,
        temp=0.8,
        max_tokens=500,
        prompt=f"<s>[INST]{inp}[/INST]",
    )

In [47]:
import sqlite3
import subprocess

# Connect to the database
conn = sqlite3.connect('/Users/neelredkar/Library/Messages/chat.db')


In [46]:
def send_imessage(number, text):
    """
    Sends an iMessage to the specified number with the given text.

    Parameters:
    - number: The phone number or email address to send the message to. Must be a string.
    - text: The message text to send. Must be a string.

    Returns:
    - None
    """
    # AppleScript command to send an iMessage
    applescript_command = f'''tell application "Messages"
        send "{text}" to buddy "{number}" of (service 1 whose service type is iMessage)
    end tell'''

    # Execute the AppleScript command
    try:
        subprocess.run(["osascript", "-e", applescript_command], check=True)
        print(f"Message sent to {number}: {text}")
    except subprocess.CalledProcessError as e:
        print(f"Failed to send message to {number}: {e}")

In [45]:
import sqlite3
import os
import time

# Path to the chat.db database
DATABASE_PATH = "~/Library/Messages/chat.db"
DATABASE_PATH = os.path.expanduser(DATABASE_PATH)  # Expands the ~ to the full path

def get_last_message_id(cursor):
    """Get the ID of the last message in the database."""
    cursor.execute("SELECT MAX(ROWID) FROM message")
    return cursor.fetchone()[0]

def fetch_new_messages(cursor, last_id):
    """Fetch new messages since the last known message ID."""
    cursor.execute("SELECT ROWID, handle_id, text FROM message WHERE ROWID > ? ORDER BY ROWID ASC", (last_id,))
    return cursor.fetchall()

def get_sender(handle_id, cursor):
    """Get the sender's phone number or email from the handle_id."""
    cursor.execute("SELECT id FROM handle WHERE ROWID = ?", (handle_id,))
    result = cursor.fetchone()
    return result[0] if result else "Unknown"

def fetch_past_messages(cursor, handle_id, limit=10):
    """Fetch the past 'limit' messages from a specific sender."""
    cursor.execute("""
        SELECT text FROM message
        WHERE handle_id = ? AND text IS NOT NULL AND cache_roomnames IS NULL
        ORDER BY ROWID DESC
        LIMIT ?
    """, (handle_id, limit))
    return [item[0] for item in cursor.fetchall()][::-1]

def main():
    # Connect to the database
    conn = sqlite3.connect(DATABASE_PATH)
    cursor = conn.cursor()

    # Get the last message ID at the start
    last_id = get_last_message_id(cursor)

    try:
        while True:
            # Fetch any new messages
            new_messages = fetch_new_messages(cursor, last_id)
            if new_messages:
                for msg in new_messages:
                    rowid, handle_id, text = msg
                    if text is not None:
                        sender = get_sender(handle_id, cursor)
                        past_messages = fetch_past_messages(cursor, handle_id)
                        response_text = gen_text("\n".join(past_messages))  # Assuming gen_text can handle multiple messages
                        print(past_messages)
                        send_imessage(sender, response_text)
                        print(f"Responded to {sender} based on the last 10 messages.")
                    last_id = rowid  # Update the last seen message ID

            # Wait for 5 seconds before checking again
            time.sleep(5)
    except KeyboardInterrupt:
        print("Stopping the script.")
    finally:
        conn.close()

In [48]:
main()

['Yoooo ', ' I need that', 'then come back to me', 'https://www.vice.com/en/article/9bv7qv/the-vice-guide-to-partying-parties', 'This is insane ', 'What is this article ', 'WHY DID Y UNSEND', '😡😡😡😡', 'I did.', 'I read it']


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Message sent to +16263531677: We’re partying like it’s 1929
Responded to +16263531677 based on the last 10 messages.
['Yoooo ', ' I need that', 'then come back to me', 'https://www.vice.com/en/article/9bv7qv/the-vice-guide-to-partying-parties', 'This is insane ', 'What is this article ', 'WHY DID Y UNSEND', '😡😡😡😡', 'I did.', 'I read it']
Message sent to +16263531677: Fuck
Responded to +16263531677 based on the last 10 messages.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


['why you look so sad ', 'please respond to me', 'i need you', 'i can’t live without you', 'i’m ', 'um ', 'yes cat yes ', 'will you give me the cat ', 'will you marry me? ', 'LETS GOOOO']
Message sent to +13108803214: I’ve been saving this post for after 6 months
Responded to +13108803214 based on the last 10 messages.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[' I need that', 'then come back to me', 'https://www.vice.com/en/article/9bv7qv/the-vice-guide-to-partying-parties', 'This is insane ', 'What is this article ', 'WHY DID Y UNSEND', '😡😡😡😡', 'I did.', 'I read it', 'Oh good']
Message sent to +16263531677: I just rewatched the documentary on it
Responded to +16263531677 based on the last 10 messages.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


['https://www.vice.com/en/article/9bv7qv/the-vice-guide-to-partying-parties', 'This is insane ', 'What is this article ', 'WHY DID Y UNSEND', '😡😡😡😡', 'I did.', 'I read it', 'Oh good', 'Loved “WHY DID Y UNSEND”', 'Loved “I read it”']
Message sent to +16263531677: Vice
Responded to +16263531677 based on the last 10 messages.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


['https://www.vice.com/en/article/9bv7qv/the-vice-guide-to-partying-parties', 'This is insane ', 'What is this article ', 'WHY DID Y UNSEND', '😡😡😡😡', 'I did.', 'I read it', 'Oh good', 'Loved “WHY DID Y UNSEND”', 'Loved “I read it”']
Message sent to +16263531677: THAT IS AWESOME
Responded to +16263531677 based on the last 10 messages.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


['This is insane ', 'What is this article ', 'WHY DID Y UNSEND', '😡😡😡😡', 'I did.', 'I read it', 'Oh good', 'Loved “WHY DID Y UNSEND”', 'Loved “I read it”', 'EXSCTLY ']
Message sent to +16263531677: I found that article afterwards
Responded to +16263531677 based on the last 10 messages.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


['WHY DID Y UNSEND', '😡😡😡😡', 'I did.', 'I read it', 'Oh good', 'Loved “WHY DID Y UNSEND”', 'Loved “I read it”', 'EXSCTLY ', 'WE JUST NEED THE TRANS PERSON', 'BAHSGSGSGSG']
Message sent to +16263531677: D2
Responded to +16263531677 based on the last 10 messages.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


['WHY DID Y UNSEND', '😡😡😡😡', 'I did.', 'I read it', 'Oh good', 'Loved “WHY DID Y UNSEND”', 'Loved “I read it”', 'EXSCTLY ', 'WE JUST NEED THE TRANS PERSON', 'BAHSGSGSGSG']
Message sent to +16263531677: I’m so happy
Responded to +16263531677 based on the last 10 messages.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Stopping the script.
