In [1]:
!pip install -q transformers torch langchain-community youtube_transcript_api

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.4/2.5 MB[0m [31m12.2 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m2.4/2.5 MB[0m [31m29.9 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.5/2.5 MB[0m [31m28.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m622.3/622.3 kB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m26.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m411.6/411.6 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━

# 1. Chat with the data
You can use `document_loaders` from `langchain-community` to load different types of data sources and chat with them using LLMs.

There are a lot of document loaders available in the `langchain-community` library, such as:
- [Web](https://python.langchain.com/docs/integrations/document_loaders/web_base/)
- [Twitter](https://python.langchain.com/docs/integrations/document_loaders/twitter/)
- [Discord](https://python.langchain.com/docs/integrations/document_loaders/discord/)
- [Github](https://python.langchain.com/docs/integrations/document_loaders/github/)
- [CSV](https://python.langchain.com/docs/integrations/document_loaders/csv/)
- [Youtube](https://python.langchain.com/docs/integrations/document_loaders/youtube_transcript/)

and many more.

### Import the Loader
First, you need to import the loader you want to use from `langchain_community.document_loaders`. You can find all the loaders [here](https://python.langchain.com/docs/integrations/document_loaders/).

Note: some loaders require additional dependencies, so make sure to install them before using the loader.

In [None]:
from langchain_community.document_loaders import YOUR_LOADER

### Load the Data
Create a loader and use the `load` method of the loader to load the data.

In [None]:
loader = YOUR_LOADER() # Create your loader here

data = loader.load()

### Load the Model

In [None]:
from transformers import pipeline  # huggingface

model_id = ""  # Example unsloth/Llama-3.2-1B-Instruct
model = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype="auto",
)

### Implement `generate` Function


In [None]:
def generate(prompt: str) -> str:
    # 1. Transform `input` into a desired format (e.g. it can be simply a string, or a list of dictionaries)
    # The format of the input depends on the model you are using. You should check the model's documentation.
    input = prompt

    response = model(input, max_new_tokens=512)

    # 2. Make sure to return just the content of the AI response (most of the time the model returns a dictionary with additional information)
    return response

In [None]:
# Test the `generate` function
generate("Hello World!")

### Write a Prompt
Write a base prompt that will generate an answer to the user query based on the provided data.

In [None]:
BASE_PROMPT = """
Your prompt here. Make sure to include the placeholders for `query` and `data`.
"""

### Generate the Response

In [None]:
query = "Your query here. For example, 'What is the data about?'"
prompt = BASE_PROMPT.format(query=query, data=data)

In [None]:
generate(prompt)

# 2. Prompt Chaining
You can chain multiple prompts one after another to perform transformations or additional processes on the generated responses before reaching a final desired state.

In this task we will try to convert a coding question into a code snippet. We will use the following chain:
1. Generate a step-by-step plan to solve the problem.
2. Generate additional considerations to take into account.
3. Generate the final code snippet.

### Define the Prompts

Hints:
- Make sure to structure the prompt. You can use HTML tags, markdown, or any other formatting options.
- Use placeholders in the prompts.

In [None]:
GENERATE_PLAN_PROMPT = """
Your prompt here. Include the placeholder for `query`.

Make sure that this prompt generates a step-by-step plan to solve the problem, not the final code.
"""

In [None]:
GENERATE_CONSIDERATIONS_PROMPT = """
Your prompt here. Include the placeholder for `query` and `plan`.

Make sure that this prompt generates additional considerations, not the final code or the new plan.
"""

In [None]:
GENERATE_CODE_PROMPT = """
Your prompt here. Include the placeholders for `query`, `plan`, and `considerations`.

Make sure that this prompt generates just the final code snippet without any additional information or comments from the model.
"""

### Create the Chain

In [None]:
def run_chain(query: str) -> str:
    # 1. Generate a step-by-step plan
    print("Generating a step-by-step plan...")
    prompt = GENERATE_PLAN_PROMPT.format(query=query)
    plan = generate(prompt)
    print(plan)

    # 2. Generate additional considerations
    print("\n\nGenerating additional considerations...")
    prompt = GENERATE_CONSIDERATIONS_PROMPT.format(query=query, plan=plan)
    considerations = generate(prompt)
    print(considerations)

    # 3. Generate the final code snippet
    print("\n\nGenerating the final code snippet...")
    prompt = GENERATE_CODE_PROMPT.format(query=query, plan=plan, considerations=considerations)
    code = generate(prompt)
    print(code)

    return code

### Test the Chain

In [None]:
example_query_1 = "Write a Python function to find all prime numbers in a range from 1 to n."
example_query_2 = "Write a function that takes a list of words and a single word, and returns all the words in the list that are anagrams of the given word."
example_query_3 = ""  # Add your own query to test the chain

In [None]:
code_snippet = run_chain(example_query_1)

In [None]:
from IPython.display import display, Code

# Display the generated code snippet
display(Code(code_snippet, language='python'))

You can copy-paste the generated code snippet and run it below to see if it works as expected!

In [None]:
# Paste the generated code snippet here

# 3. Text Validator - Zadanie domowe
Write a text validator that will check if the text is not breaking any criteria. If the text is not valid, the validator should provide feedback.

### Define the Rules

In [None]:
RULES = {
    "no_personal_info": "Should not contain any personal information.",
    "english_only": "Should be in English.",
    "no_questions": "Should not contain any questions.",
    # Feel free to add more rules here
}

### Implement the Validator

In [None]:
VALIDATION_PROMPT = """
You are a validator. You need to ensure that the provided text meets the criteria.

<Criteria>
Code: {rule_code}
Description: {rule_description}
</Criteria>

<Text to check>
{text_to_check}
</Text to check>

# Output format
Output the result in the following JSON format:
{{
    "criteria_met": bool,  # True if the criteria is met, False otherwise
    "feedback": str  # Provide feedback if the criteria is not met, otherwise leave empty string
}}

Return just the JSON without any additional information or comments.
"""

In [None]:
import json


def validate_rule(text: str, rule_code: str) -> dict:
    # 1. Load the rule description from the RULES dictionary for the given `rule_code`
    # 2. Prepare the prompt using `VALIDATION_PROMPT` and `format` method
    # 3. Run the `generate` function
    # 4. Use `json.dumps` to transform the string output into a dictionary
    # 5. Add the `rule_code` to the dictionary
    # 6. Return the dictionary. The dictionary should contain the following keys: "criteria_met", "feedback", "rule_code"
    pass

### Test the Validator

In [None]:
def run_validator(text: str):
    for rule_code in RULES.keys():
        print(f"Checking rule '{rule_code}'...")
        result = validate_rule(text, rule_code)

        assert result["criteria_met"], f"Rule '{rule_code}' is not met. Feedback: {result['feedback']}"

        print("Rule is met.")

In [None]:
text_to_check = "My name is John and I like to play basketball. Do you know how to play basketball?"

In [None]:
run_validator(text_to_check)

### Implement the `anonymize` Function - Zadanie dodatkowe
If the text contains personal information, you can implement the `anonymize` function that will replace the personal information with a placeholder.

In [None]:
ANONYMIZE_PROMPT = """
Your prompt here.
"""


def anonymize(text: str) -> str:
    # Implement the function that will replace the personal information with a placeholder
    # Make sure to return the anonymized text (string)
    pass

In [None]:
print(f"Checking rule 'no_personal_info'...")
result = validate_rule(text_to_check, rule_code="no_personal_info")

if not result["criteria_met"]:
    print("Personal information found. Anonymizing the text...")
    anonymized_text = anonymize(text_to_check)
    print(anonymized_text)

    print("Re-running the validation...")
    validate_rule(anonymized_text, rule_code="no_personal_info")

    assert result["criteria_met"], "Anonymized text still contains personal information. Refine your prompt."

print("Rule is met.")