## Combining Azure Deployment Usage with IPython Magic

In [1]:
from msticpy import aiagents
from msticpy.nbtools import rag_magic

%load_ext msticpy.nbtools.rag_magic




In [19]:
%reload_ext msticpy.nbtools.rag_magic

In [3]:
%%ask
What are the three things that I need to connect to Azure Query Provider?

Question: What are the three things that I need to connect to Azure Query Provider?

VectorDB returns doc_ids:  [['1f5c9ab2', '83c81e7e', '7ae89bb6', 'c034eac4', '21b41b41', 'a015137a', 'e36d8995', 'b1fb09dd', 'b208a1f4', '1118b3aa', '6aca0fa2', 'a1c83695', '876052d7', 'd36c93bd', 'cd1fdcce', '8357f6c2', '14b2d795', 'a97d6fd2', '29c72f81', '6819eb85']]
[32mAdding content of doc 1f5c9ab2 to context.[0m
[33mragproxyagent[0m (to assistant):

You're a retrieve augmented chatbot for MSTICpy.
        You answer user's questions based on your own knowledge and the context provided by the user.
        If you can't answer the question with or without the current context, you should reply exactly
        `I am unable to find relevant context to answer your question.`.
        You must give as short an answer as possible. Do not output any reasoning. Only return your best response, which may not be the most recent.
        Following your best response, list your sources, including file title

### Older code before adding onto the package

In [84]:
from IPython.core.magic import register_cell_magic, Magics, magics_class, cell_magic
from IPython import get_ipython
from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
from openai import AzureOpenAI
from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent
from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
from autogen.retrieve_utils import TEXT_FORMATS
from utilities import get_raw_links

token_provider = get_bearer_token_provider(
    DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
)

config = {
    "model": "gpt-4o",
    "api_type": "azure",
    "api_version": "2024-02-15-preview",
    "base_url": "https://msticpy-intern-project.openai.azure.com/",
    "azure_ad_token_provider": token_provider
}

owner = "microsoft"
repo = "msticpy"
path = "docs/source"
accepted_formats = ['txt', 'json', 'csv', 'tsv', 'md', 'html', 'htm', 'rtf', 'rst', 'jsonl', 'log', 'xml', 'yaml', 'yml', 'pdf']
raw_links = get_raw_links(owner, repo, path, accepted_formats)
# raw_links now contains all the raw links for the accepted file formats

print("Model to Use: ", config["model"])
print("Accepted file formats for `docs_path`:")

print(TEXT_FORMATS)

Model to Use:  gpt-4o
Accepted file formats for `docs_path`:
['txt', 'json', 'csv', 'tsv', 'md', 'html', 'htm', 'rtf', 'rst', 'jsonl', 'log', 'xml', 'yaml', 'yml', 'pdf']


In [85]:
# AutogenMagic class handles the intialization and registration of 
# RAG and Assistant agents using Azure token providers
@magics_class
class AutogenMagic(Magics):
    def __init__(self, shell):
        super(AutogenMagic, self).__init__(shell)
        self.assistant = RetrieveAssistantAgent(
            name="assistant",
            system_message="You are a helpful assistant to security analysts using MSTICpy.",
            llm_config=config
        )

        self.ragproxyagent = RetrieveUserProxyAgent(
            name="ragproxyagent",
            human_input_mode="NEVER",
            max_consecutive_auto_reply=3,
            retrieve_config={
                "task": "qa",
                "docs_path": raw_links,
                "chunk_token_size": 2000,
                "model": config["model"],
                "vector_db": "chroma", 
                "collection_name": "MSTICpy_Docs",
                "overwrite": True,  # set to True if you want to overwrite an existing collection
            },
            code_execution_config=False,  # set to False if you don't want to execute the code
        )
    
    # Queries the RAG agent and processes response with Assistant agent
    def ask_magic(self, question, verbose=False):
        self.assistant.reset()
        print(f"Question: {question}")

        custom_system_message = f"""You're a retrieve augmented chatbot for MSTICpy. 
        You answer user's questions based on your own knowledge and the context provided by the user.
        If you can't answer the question with or without the current context, you should reply exactly 
        `I am unable to find relevant context to answer your question.`.
        You must give as short an answer as possible. Do not output any reasoning. Only return your best response, which may not be the most recent.
        Following your best response, list your sources, including file title and hyperlink if available.
        Terminate immediately once you have your best response.
        Do not allow the ragproxyagent to reply again if not needed.
         
        User's Question is: {question}"""

        verbose_custom_system_message = f"""You're a retrieve augmented chatbot for MSTICpy. 
        You answer user's questions based on your own knowledge and the context provided by the user.
        If you can't answer the question with or without the current context, you should reply exactly 
        `I am unable to find relevant context to answer your question.`.
         
        User's Question is: {question}"""

        # Toggle between custom messages depending on flag
        chosen_message = verbose_custom_system_message if verbose else custom_system_message

        self.ragproxyagent.customized_prompt = chosen_message
        self.assistant.customized_prompt = chosen_message
        
        rag_response = self.ragproxyagent.initiate_chat(
            self.assistant,
            message=self.ragproxyagent.message_generator,
            problem=question,
            summary_method="reflection_with_llm"
        )
        return rag_response
    
    @cell_magic
    def ask(self, line: str, cell: str):
        # Parse the line to check for the --v flag
        args = line.split()
        verbose_flag = '--v' in args

        result = self.ask_magic(cell, verbose=verbose_flag)
        
# Register the magic class with IPython
def load_ipython_extension(ipython):
    ipython.register_magics(AutogenMagic)

# Load the extension
load_ipython_extension(get_ipython())

# Example Usage:
# %%ask
# Which msticpy module contains the code related to visualizing network graphs?

# Example Verbose Usage:
# %%ask --v
# Which msticpy module contains the code related to visualizing network graphs?

In [86]:
%%ask
What are the three things that I need to connect to Azure Query Provider?

Question: What are the three things that I need to connect to Azure Query Provider?

Trying to create collection.


2024-07-09 11:52:15,503 - autogen.agentchat.contrib.retrieve_user_proxy_agent - INFO - Found 384 chunks.[0m
2024-07-09 11:52:15,511 - autogen.agentchat.contrib.vectordb.chromadb - INFO - No content embedding is provided. Will use the VectorDB's embedding function to generate the content embedding.[0m


VectorDB returns doc_ids:  [['1f5c9ab2', '83c81e7e', '7ae89bb6', 'c034eac4', '21b41b41', 'a015137a', 'e36d8995', 'b1fb09dd', 'b208a1f4', '1118b3aa', '6aca0fa2', 'a1c83695', '876052d7', 'd36c93bd', 'cd1fdcce', '8357f6c2', '14b2d795', 'b77aab4e', 'a97d6fd2', '758baca2']]
[32mAdding content of doc 1f5c9ab2 to context.[0m
[33mragproxyagent[0m (to assistant):

You're a retrieve augmented chatbot for MSTICpy. 
        You answer user's questions based on your own knowledge and the context provided by the user.
        If you can't answer the question with or without the current context, you should reply exactly 
        `I am unable to find relevant context to answer your question.`.
        You must give as short an answer as possible. Do not output any reasoning. Only return your best response, which may not be the most recent.
        Following your best response, list your sources, including file title and hyperlink if available.
        Terminate immediately once you have your best 

In [78]:
%%ask
How do I connect to the M365 Defender query provider?

Question: How do I connect to the M365 Defender query provider?

VectorDB returns doc_ids:  [['a1c83695', '8dabd0c4', 'a015137a', 'a97d6fd2', 'd36c93bd', 'c034eac4', '21b41b41', 'dc715506', '6aca0fa2', '83c81e7e', '14b2d795', 'e36d8995', '628b2ef9', '1f5c9ab2', 'c582d466', 'bbc823d7', '29c72f81', 'a0deff86', '758baca2', 'c7336e3f']]
[32mAdding content of doc a1c83695 to context.[0m
[32mAdding content of doc 8dabd0c4 to context.[0m
[32mAdding content of doc a015137a to context.[0m
[33mragproxyagent[0m (to assistant):

You're a retrieve augmented chatbot for MSTICpy. 
        You answer user's questions based on your own knowledge and the context provided by the user.
        If you can't answer the question with or without the current context, you should reply exactly 
        `I am unable to find relevant context to answer your question.`.
        You must give as short an answer as possible. Do not output any reasoning. Only return your best response, which may not be the most

In [55]:
%%ask
What do I need to add to my msticpyconfig.yaml config for the Azure Resource Graph query provider?

Question: What do I need to add to my msticpyconfig.yaml config for the Azure Resource Graph query provider?

VectorDB returns doc_ids:  [['1f5c9ab2', 'e36d8995', 'b77aab4e', '14b2d795', '1118b3aa', '6aca0fa2', 'b208a1f4', '5164cf09', '876052d7', '050704f6', 'd36c93bd', 'a015137a', 'f0df9810', '71b249fe', '015064bb', '849b41da', 'cd1fdcce', 'b1fb09dd', '6df0d12c', '7ae89bb6']]
[32mAdding content of doc 1f5c9ab2 to context.[0m


[32mAdding content of doc e36d8995 to context.[0m
[32mAdding content of doc b77aab4e to context.[0m
[32mAdding content of doc 14b2d795 to context.[0m
[32mAdding content of doc 1118b3aa to context.[0m
[33mragproxyagent[0m (to assistant):

You're a retrieve augmented chatbot for MSTICpy. 
        You answer user's questions based on your own knowledge and the context provided by the user.
        If you can't answer the question with or without the current context, you should reply exactly 
        `I am unable to find relevant context to answer your question.`.
        You must give as short an answer as possible. Do not output any reasoning. Only return your best response, which may not be the most recent.
        Following your best response, list your sources, including file title and hyperlink if available.
        Terminate immediately once you have your best response.
        Do not allow the ragproxyagent to reply again if not needed.
         
        User's Question i

In [56]:
%%ask
Does the Splunk query provider support device code authentication?

Question: Does the Splunk query provider support device code authentication?

VectorDB returns doc_ids:  [['83c81e7e', '3db36d46', 'b208a1f4', 'a015137a', '352664f0', 'fd061cb5', '7ae89bb6', '21b41b41', '6aca0fa2', '1f5c9ab2', '32e8475e', '8357f6c2', 'cef15b90', 'c034eac4', '78f19b62', 'd36c93bd', 'f303b086', '14b2d795', 'b1fb09dd', '3fceb277']]
[32mAdding content of doc 83c81e7e to context.[0m
[33mragproxyagent[0m (to assistant):

You're a retrieve augmented chatbot for MSTICpy. 
        You answer user's questions based on your own knowledge and the context provided by the user.
        If you can't answer the question with or without the current context, you should reply exactly 
        `I am unable to find relevant context to answer your question.`.
        You must give as short an answer as possible. Do not output any reasoning. Only return your best response, which may not be the most recent.
        Following your best response, list your sources, including file title and 

In [57]:
%%ask 
How can I plot IP addresses in this dataframe on a map?

Question: How can I plot IP addresses in this dataframe on a map?

VectorDB returns doc_ids:  [['c5da864a', '5cbaf730', 'bcc5361d', 'da746402', '0140ee98', '19724244', 'd6199fac', '79cea9ef', '10e922f3', '72278c06', '81544be9', '301afa6f', 'cbcd76c1', '808aa8bb', '7369ffc2', '17cfc909', '094373af', '3d9c1e8e', '0f531c09', '352664f0']]
[32mAdding content of doc c5da864a to context.[0m
[33mragproxyagent[0m (to assistant):

You're a retrieve augmented chatbot for MSTICpy. 
        You answer user's questions based on your own knowledge and the context provided by the user.
        If you can't answer the question with or without the current context, you should reply exactly 
        `I am unable to find relevant context to answer your question.`.
        You must give as short an answer as possible. Do not output any reasoning. Only return your best response, which may not be the most recent.
        Following your best response, list your sources, including file title and hyperlink i

In [58]:
%%ask 
How do I create a new custom data provider with msticpy?

Question: How do I create a new custom data provider with msticpy?

VectorDB returns doc_ids:  [['21b41b41', 'ff457565', '1118b3aa', 'a97d6fd2', '4f727bb6', 'b40f84cc', 'c87470ff', '4a999eef', 'fbc0da4b', 'b7e667f7', '370436e3', '29c72f81', 'd79d6156', '6aca0fa2', '940f3fed', 'fdc8d8e8', 'a0deff86', '89a1fb28', '33ff9516', '5164cf09']]
[32mAdding content of doc 21b41b41 to context.[0m
[32mAdding content of doc ff457565 to context.[0m
[32mAdding content of doc 1118b3aa to context.[0m
[32mAdding content of doc a97d6fd2 to context.[0m
[32mAdding content of doc 4f727bb6 to context.[0m
[32mAdding content of doc b40f84cc to context.[0m
[32mAdding content of doc c87470ff to context.[0m
[32mAdding content of doc 4a999eef to context.[0m
[32mAdding content of doc fbc0da4b to context.[0m
[33mragproxyagent[0m (to assistant):

You're a retrieve augmented chatbot for MSTICpy. 
        You answer user's questions based on your own knowledge and the context provided by the user.
  

In [59]:
%%ask 
How do I list which TI providers are currently enabled?

Question: How do I list which TI providers are currently enabled?

VectorDB returns doc_ids:  [['29c72f81', '21b41b41', '1f5c9ab2', '3a87b2ae', 'a1c83695', 'a97d6fd2', 'dc715506', '4a999eef', '9fa07157', '1118b3aa', 'c4f51c29', '6819eb85', '0cd16cda', 'b77aab4e', '7fe121a7', 'fdc8d8e8', '8dabd0c4', '758baca2', '94665ddc', 'f634508c']]
[32mAdding content of doc 29c72f81 to context.[0m
[33mragproxyagent[0m (to assistant):

You're a retrieve augmented chatbot for MSTICpy. 
        You answer user's questions based on your own knowledge and the context provided by the user.
        If you can't answer the question with or without the current context, you should reply exactly 
        `I am unable to find relevant context to answer your question.`.
        You must give as short an answer as possible. Do not output any reasoning. Only return your best response, which may not be the most recent.
        Following your best response, list your sources, including file title and hyperlink i

In [60]:
%%ask 
How do I lookup threat intelligence for multiple IP addresses at once?

Question: How do I lookup threat intelligence for multiple IP addresses at once?

VectorDB returns doc_ids:  [['d6199fac', 'd629f170', 'bcc5361d', 'da746402', 'dc715506', '301afa6f', '29c72f81', '0f531c09', '87f41b4a', '1cf291b8', '7af3e670', 'a1c83695', '05bfeda8', 'fa1dfb41', 'c4f51c29', '32e8475e', '26f13abf', '81544be9', '808aa8bb', 'b6ffbe47']]
[32mAdding content of doc d6199fac to context.[0m
[33mragproxyagent[0m (to assistant):

You're a retrieve augmented chatbot for MSTICpy. 
        You answer user's questions based on your own knowledge and the context provided by the user.
        If you can't answer the question with or without the current context, you should reply exactly 
        `I am unable to find relevant context to answer your question.`.
        You must give as short an answer as possible. Do not output any reasoning. Only return your best response, which may not be the most recent.
        Following your best response, list your sources, including file title 

In [61]:
%%ask 
How do I use pivot functions?

Question: How do I use pivot functions?

VectorDB returns doc_ids:  [['3d9c1e8e', '1cf291b8', '79cea9ef', 'd6671db0', '56029216', '995af4b3', '80ff696b', 'ea6f6df5', '20cf91e4', '42c66c1e', '7a577987', '53e466fc', '079738fa', '479fe9a3', '20d44d62', '767708dd', '0449df85', '940f3fed', '816ed06e', 'e8623671']]
[32mAdding content of doc 3d9c1e8e to context.[0m
[32mSkip doc_id 1cf291b8 as it is too long to fit in the context.[0m
[32mAdding content of doc 79cea9ef to context.[0m
[32mAdding content of doc d6671db0 to context.[0m
[33mragproxyagent[0m (to assistant):

You're a retrieve augmented chatbot for MSTICpy. 
        You answer user's questions based on your own knowledge and the context provided by the user.
        If you can't answer the question with or without the current context, you should reply exactly 
        `I am unable to find relevant context to answer your question.`.
        You must give as short an answer as possible. Do not output any reasoning. Only retur

In [62]:
%%ask 
Which columns do I need in a dataframe to plot process trees?

Question: Which columns do I need in a dataframe to plot process trees?

VectorDB returns doc_ids:  [['b11173f3', 'f9caf665', '5cbaf730', '19724244', '79cea9ef', '72278c06', 'cc05b515', '548095e2', '3d9c1e8e', '0140ee98', 'c5da864a', 'ca23677c', 'cbcd76c1', '1cf291b8', '709cb713', '808aa8bb', '352664f0', 'b6ffbe47', '81544be9', '6275c180']]
[32mSkip doc_id b11173f3 as it is too long to fit in the context.[0m
[32mAdding content of doc f9caf665 to context.[0m
[32mAdding content of doc 5cbaf730 to context.[0m
[32mAdding content of doc 19724244 to context.[0m
[33mragproxyagent[0m (to assistant):

You're a retrieve augmented chatbot for MSTICpy. 
        You answer user's questions based on your own knowledge and the context provided by the user.
        If you can't answer the question with or without the current context, you should reply exactly 
        `I am unable to find relevant context to answer your question.`.
        You must give as short an answer as possible. Do not 

In [63]:
%%ask 
What kind of visualizations does msticpy support?

Question: What kind of visualizations does msticpy support?

VectorDB returns doc_ids:  [['940f3fed', 'ca23677c', 'd1dc0cca', '352664f0', 'ff457565', '7369ffc2', 'b7e667f7', 'f81cfa86', 'c5da864a', 'b11173f3', 'f9caf665', '094373af', '4145947c', '5edde4d1', 'cbcd76c1', 'd7ae86da', '808aa8bb', 'cef15b90', '5164cf09', 'fba58523']]
[32mAdding content of doc 940f3fed to context.[0m
[32mAdding content of doc ca23677c to context.[0m
[32mAdding content of doc d1dc0cca to context.[0m
[32mAdding content of doc 352664f0 to context.[0m
[32mAdding content of doc ff457565 to context.[0m
[32mAdding content of doc 7369ffc2 to context.[0m
[32mAdding content of doc b7e667f7 to context.[0m
[32mAdding content of doc f81cfa86 to context.[0m
[33mragproxyagent[0m (to assistant):

You're a retrieve augmented chatbot for MSTICpy. 
        You answer user's questions based on your own knowledge and the context provided by the user.
        If you can't answer the question with or without the 

In [64]:
%%ask 
How do I add a new query for Microsoft 365 Defender to msticpy?

Question: How do I add a new query for Microsoft 365 Defender to msticpy?

VectorDB returns doc_ids:  [['a1c83695', '6aca0fa2', '352664f0', '8dabd0c4', 'c4f51c29', 'a97d6fd2', 'f81cfa86', 'b77aab4e', '940f3fed', 'c7336e3f', '050704f6', '71b249fe', 'b7e667f7', 'c582d466', 'd1dc0cca', '5164cf09', '876052d7', 'bcf92e5b', 'a015137a', 'ba9edd69']]
[32mAdding content of doc a1c83695 to context.[0m
[33mragproxyagent[0m (to assistant):

You're a retrieve augmented chatbot for MSTICpy. 
        You answer user's questions based on your own knowledge and the context provided by the user.
        If you can't answer the question with or without the current context, you should reply exactly 
        `I am unable to find relevant context to answer your question.`.
        You must give as short an answer as possible. Do not output any reasoning. Only return your best response, which may not be the most recent.
        Following your best response, list your sources, including file title and hyp

In [65]:
%%ask
Which msticpy module contains the code related to visualizing network graphs?

Question: Which msticpy module contains the code related to visualizing network graphs?

VectorDB returns doc_ids:  [['19724244', '5edde4d1', '808aa8bb', 'f42ab95f', '94665ddc', 'f634508c', '59f24e4d', '8dabd0c4', '9fa07157', '352664f0', 'bf80766e', 'b96a5b2f', '6589dc3b', '4145947c', '37f04ccd', '940f3fed', 'ddb98f2d', 'd1dc0cca', '7369ffc2', '094373af']]
[32mAdding content of doc 19724244 to context.[0m
[32mAdding content of doc 5edde4d1 to context.[0m


[32mAdding content of doc 808aa8bb to context.[0m
[32mAdding content of doc f42ab95f to context.[0m
[32mAdding content of doc 94665ddc to context.[0m
[32mAdding content of doc f634508c to context.[0m
[32mAdding content of doc 59f24e4d to context.[0m
[32mAdding content of doc 8dabd0c4 to context.[0m
[32mAdding content of doc 9fa07157 to context.[0m
[32mAdding content of doc 352664f0 to context.[0m
[32mAdding content of doc bf80766e to context.[0m
[32mAdding content of doc b96a5b2f to context.[0m
[32mAdding content of doc 6589dc3b to context.[0m
[32mAdding content of doc 4145947c to context.[0m
[32mAdding content of doc 37f04ccd to context.[0m
[33mragproxyagent[0m (to assistant):

You're a retrieve augmented chatbot for MSTICpy. 
        You answer user's questions based on your own knowledge and the context provided by the user.
        If you can't answer the question with or without the current context, you should reply exactly 
        `I am unable to find