In [None]:
import requests
import os
from dotenv import load_dotenv
load_dotenv()

HUGGING_FACE_API_KEY = os.getenv('HUGGING_FACE_API_KEY')
HUGGING_FACE_ENDPOINT_URL = os.getenv('HUGGING_FACE_ENDPOINT_URL')

headers = {
	"Authorization": f"Bearer {HUGGING_FACE_API_KEY}",
	"Content-Type": "application/json"
}

def query(payload):
	response = requests.post(HUGGING_FACE_ENDPOINT_URL, headers=headers, json=payload)
	return response.json()

output = query({
	"inputs": "Hey, are you conscious? Can you talk to me?",
    
})
output

In [19]:
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cpu"  # the device to load the model onto

# model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")

def get_num_tokens(prompt):
    tokens = tokenizer([prompt], return_tensors="pt").to(device)
    return len(tokens['input_ids'][0])

In [20]:
prompt = "Hey, are you conscious? Can you talk to me?"
get_num_tokens(prompt)

13

In [27]:
def get_num_tokens_list(string_list: list) -> int:
    return sum(get_num_tokens(s) for s in string_list)

def maintain_token_limit(string_list: str, new_string: str, token_limit: int = 1024):
    if len(string_list) == 0:
        string_list.append(new_string)
        return
    # Calculate the total token count of the existing strings in the list
    total_tokens = get_num_tokens_list(string_list)
    # If adding the new string would exceed the token limit, remove older strings
    while total_tokens + get_num_tokens(new_string) > token_limit:
        removed_string = string_list.pop(0)  # Remove the oldest string from the list
        total_tokens -= get_num_tokens(removed_string)  # Update the total token count
    # Add the new string to the list
    string_list.append(new_string)

# Example usage:
token_limit = 15
string_list = []

# Adding strings while maintaining the token limit
maintain_token_limit(string_list, "This is a long string.", token_limit)
print(string_list)
print(get_num_tokens_list(string_list))
maintain_token_limit(string_list, "Short string.", token_limit)
print(string_list)
print(get_num_tokens_list(string_list))
maintain_token_limit(string_list, "This is another long string.", token_limit)
print(string_list)
print(get_num_tokens_list(string_list))


# maintain_token_limit(string_list, "Another string.", token_limit)
# print(string_list)

['This is a long string.']
7
['This is a long string.', 'Short string.']
11
['Short string.', 'This is another long string.']
11


In [80]:
prompt = "Hey, are you conscious? Can you talk to me?"

def chat(prompt: str) -> str:
    response = ""
    deltas = []
    while True:
        output = query({
            "inputs": prompt + response,
            "parameters": {
                'max_new_tokens': 30,  # controls chunk/delta size
            },
        })
        if isinstance(output, dict) and 'error' in output:
            response += f"\n\n{output['error']}"
            break
        if not output:
            break
        delta = output[0]['generated_text']
        if delta.strip() == '':
            break
        print(delta, end='|', flush=True)
        response += delta
    return response

# response = chat(prompt)
# print(response)

In [28]:
prompt = "Hey, are you conscious? Can you talk to me?"

def chat(prompt: str) -> str:
    response = ""
    deltas = [prompt]

    while True:
        output = query({
            "inputs": ''.join(deltas),
            "parameters": {
                'max_new_tokens': 32,  # controls chunk/delta size
            },
        })
        if isinstance(output, dict) and 'error' in output:
            response += f"\n\n{output['error']}"
            break
        if not output:
            break
        delta = output[0]['generated_text']
        maintain_token_limit(deltas, delta, token_limit=1024)
        if delta.strip() == '':
            break
        print(delta, end='|', flush=True)
        response += delta
    return response

# response = chat(prompt)
# print(response)

In [29]:
prompt = "Write a python function that masks email addresses from a string."
response = chat(prompt)
# print(response)



```python
import re

def mask_emails(text):
    pattern = r'\b[A-Za-z|0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,|}\b'
    return re.sub(pattern, '[REDACTED]', text)
```

This function uses regular expressions to| find all email addresses in the input string and replaces them with the string '[REDACTED]'. The pattern used in the regular expression matches most common| email formats, but it's not a comprehensive list of all possible email formats.|

In [78]:
# diplay markdown
from IPython.display import Markdown, display
display(Markdown(response))



```python
import re

def mask_emails(text):
    pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
    return re.sub(pattern, '[REDACTED]', text)
```

This function uses regular expressions to find all email addresses in the input string and replaces them with the string '[REDACTED]'. The pattern used in the regular expression matches most common email formats, but it's not a comprehensive list of all possible email formats.

In [30]:
prompt = f"Write assertion statements to test edge cases for the following function: {response}"
# prompt
response = chat(prompt)
display(Markdown(response))

 Therefore, it's important to test the function with edge cases to ensure that it works correctly in all scenarios.

Here are some edge cases to test|:

1. Empty string:
```python
assert mask_emails('') == ''
```
2. String with no| email addresses:
```python
assert mask_emails('This is a test string with no email addresses.') == 'This is a test string| with no email addresses.'
```
3. String with one email address:
```python
assert mask_emails('Please contact us at| [john@example.com](mailto:john@example.com) for more information.') == 'Please contact us at [REDA|CTED] for more information.'
```
4. String with multiple email addresses:
```python
assert mask_emails('Please contact| us at [john@example.com](mailto:john@example.com) or [jane@example.com](mailto:|jane@example.com) for more information.') == 'Please contact us at [REDACTED] or [REDACTED] for| more information.'
```
5. String with an email address that contains special characters:
```python
assert mask_emails('Please contact

KeyboardInterrupt: 