In [None]:
from sqlalchemy import create_engine, Column, Integer, String, DateTime, Text, Boolean
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
import random
from datetime import datetime

Base = declarative_base()

class DatabaseLogs(Base):
    __tablename__ = 'DatabaseLogs'
    DBLogID = Column(Integer, primary_key=True, autoincrement=True)
    Timestamp = Column(DateTime, default=datetime.now)
    DatabaseName = Column(String(255))
    UserName = Column(String(255))
    ClientHost = Column(String(255))
    QueryText = Column(Text)
    QueryType = Column(String(50))
    ExecutionTime = Column(Integer)
    RowsAffected = Column(Integer)
    TransactionID = Column(String(255))
    ErrorCode = Column(Integer)
    ErrorDescription = Column(Text)
    QueryPlan = Column(Text)
    LockWaitTime = Column(Integer)
    Deadlock = Column(Boolean)
    CPUUsage = Column(Integer)
    MemoryUsage = Column(Integer)
    DiskIO = Column(Integer)
    NetworkIO = Column(Integer)
    AdditionalInfo = Column(Text)

# Create an in-memory SQLite database and initialize the schema
engine = create_engine('sqlite:///data/sqlite.db')
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()

# Define a limited set of values for certain columns
database_names = ['DB1', 'DB2', 'DB3']
usernames = ['user1', 'user2', 'user3']
error_codes = [None, 404, 500]
error_descriptions = [None, 'Not Found', 'Internal Server Error']

# Randomly generate and insert 100 rows of data
for _ in range(100):
    new_log = DatabaseLogs(
        Timestamp=datetime.now(),
        DatabaseName=random.choice(database_names),
        UserName=random.choice(usernames),
        ClientHost='192.168.{}.{}'.format(random.randint(0, 255), random.randint(0, 255)),
        QueryText='SELECT * FROM some_table;',
        QueryType=random.choice(['SELECT', 'INSERT', 'UPDATE', 'DELETE']),
        ExecutionTime=random.randint(1, 1000),
        RowsAffected=random.randint(1, 100),
        TransactionID=str(random.randint(1000, 9999)),
        ErrorCode=random.choice(error_codes),
        ErrorDescription=random.choice(error_descriptions),
        QueryPlan=None,
        LockWaitTime=random.randint(0, 100),
        Deadlock=random.choice([True, False]),
        CPUUsage=random.randint(1, 100),
        MemoryUsage=random.randint(1, 1000),
        DiskIO=random.randint(1, 1000),
        NetworkIO=random.randint(1, 1000),
        AdditionalInfo='Randomly generated data'
    )
    session.add(new_log)

# Commit the session to insert the logs into the database
session.commit()

# Verify the number of inserted rows
print(f"Total rows inserted: {session.query(DatabaseLogs).count()}")


In [None]:
from sqlalchemy import create_engine, Column, Integer, String, DateTime, Text, BigInteger, Boolean
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
import random
from datetime import datetime
from faker import Faker

Base = declarative_base()
fake = Faker()

class WebServerLogs(Base):
    __tablename__ = 'WebServerLogs'
    WebLogID = Column(Integer, primary_key=True, autoincrement=True)
    Timestamp = Column(DateTime, default=datetime.now)
    Hostname = Column(String(255))
    ServerIP = Column(String(15))
    ClientIP = Column(String(15))
    UserAgent = Column(String(512))
    RequestMethod = Column(String(10))
    RequestURL = Column(Text)
    RequestProtocol = Column(String(10))
    StatusCode = Column(Integer)
    ResponseSize = Column(BigInteger)
    ReferrerURL = Column(Text)
    SessionID = Column(String(255))
    UserID = Column(Integer)
    ResponseTime = Column(Integer)
    SSLProtocol = Column(String(50))
    TLSCipher = Column(String(100))
    ErrorLog = Column(Text)
    AdditionalInfo = Column(Text)

# Create an in-memory SQLite database and initialize the schema
engine = create_engine('sqlite:///data/sqlite.db')
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()

# Define a limited set of values for certain columns
hostnames = ['server1.example.com', 'server2.example.com']
status_codes = [200, 301, 404, 500]
request_methods = ['GET', 'POST', 'PUT', 'DELETE']
protocols = ['HTTP/1.1', 'HTTPS/1.1', 'HTTP/2']

# Randomly generate and insert 100 rows of data
for _ in range(100):
    new_log = WebServerLogs(
        Timestamp=fake.date_time_this_month(),
        Hostname=random.choice(hostnames),
        ServerIP=fake.ipv4(),
        ClientIP=fake.ipv4(),
        UserAgent=fake.user_agent(),
        RequestMethod=random.choice(request_methods),
        RequestURL=fake.uri(),
        RequestProtocol=random.choice(protocols),
        StatusCode=random.choice(status_codes),
        ResponseSize=random.randint(100, 100000),
        ReferrerURL=fake.uri(),
        SessionID=fake.uuid4(),
        UserID=random.randint(1, 100),
        ResponseTime=random.randint(1, 1000),
        SSLProtocol=random.choice(['TLS 1.2', 'TLS 1.3', None]),
        TLSCipher=random.choice(['ECDHE-RSA-AES128-GCM-SHA256', 'AES128-GCM-SHA256', None]) if random.choice([True, False]) else None,
        ErrorLog=fake.text() if random.choice([True, False]) else None,
        AdditionalInfo='Randomly generated data'
    )
    session.add(new_log)

# Commit the session to insert the logs into the database
session.commit()

# Verify the number of inserted rows
print(f"Total rows inserted: {session.query(WebServerLogs).count()}")


In [None]:
from sqlalchemy import create_engine, Column, Integer, String, DateTime, Text, Boolean
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
import random
from datetime import datetime
from faker import Faker

Base = declarative_base()
fake = Faker()

class AuthenticationLogs(Base):
    __tablename__ = 'AuthenticationLogs'
    AuthLogID = Column(Integer, primary_key=True, autoincrement=True)
    Timestamp = Column(DateTime, default=datetime.now)
    UserID = Column(String(255))
    Username = Column(String(255))
    AuthMethod = Column(String(50))
    Success = Column(Boolean)
    FailureReason = Column(String(255), nullable=True)
    ClientIP = Column(String(15))
    DeviceType = Column(String(50))
    OperatingSystem = Column(String(255))
    Browser = Column(String(255))
    SessionID = Column(String(255), nullable=True)
    TokenID = Column(String(255), nullable=True)
    ExpiryTime = Column(DateTime, nullable=True)
    AdditionalInfo = Column(Text)

# Create an in-memory SQLite database and initialize the schema
engine = create_engine('sqlite:///data/sqlite.db')
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()

# Define a limited set of values for certain columns
auth_methods = ['password', 'otp', 'biometric', 'sso']
device_types = ['mobile', 'desktop', 'tablet']
operating_systems = ['Windows', 'macOS', 'Linux', 'Android', 'iOS']
browsers = ['Chrome', 'Firefox', 'Safari', 'Edge']
failure_reasons = [None, 'Incorrect Password', 'User Not Found', 'MFA Challenge Failed']

# Randomly generate and insert 100 rows of data
for _ in range(100):
    success = random.choice([True, False])
    auth_method = random.choice(auth_methods)
    failure_reason = None if success else random.choice(failure_reasons)
    expiry_time = datetime.now() + fake.time_delta() if success else None
    token_id = fake.uuid4() if success else None
    session_id = fake.uuid4() if success else None

    new_log = AuthenticationLogs(
        Timestamp=fake.date_time_this_month(),
        UserID=fake.uuid4(),
        Username=fake.user_name(),
        AuthMethod=auth_method,
        Success=success,
        FailureReason=failure_reason,
        ClientIP=fake.ipv4(),
        DeviceType=random.choice(device_types),
        OperatingSystem=random.choice(operating_systems),
        Browser=random.choice(browsers),
        SessionID=session_id,
        TokenID=token_id,
        ExpiryTime=expiry_time,
        AdditionalInfo='Randomly generated data'
    )
    session.add(new_log)

# Commit the session to insert the logs into the database
session.commit()

# Verify the number of inserted rows
print(f"Total rows inserted: {session.query(AuthenticationLogs).count()}")


In [None]:
from sqlalchemy import create_engine, Column, Integer, String, DateTime, Text, Boolean
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
import random
from datetime import datetime
from faker import Faker

Base = declarative_base()
fake = Faker()

class NetworkLogs(Base):
    __tablename__ = 'NetworkLogs'
    NetLogID = Column(Integer, primary_key=True, autoincrement=True)
    Timestamp = Column(DateTime, default=datetime.now)
    SourceIP = Column(String(15))
    DestinationIP = Column(String(15))
    SourcePort = Column(Integer)
    DestinationPort = Column(Integer)
    Protocol = Column(String(50))
    PayloadSize = Column(Integer)
    Action = Column(String(50))
    Status = Column(String(50))
    AdditionalInfo = Column(Text)

# Create an in-memory SQLite database and initialize the schema
engine = create_engine('sqlite:///data/sqlite.db')
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()

# Define a limited set of values for certain columns
protocols = ['TCP', 'UDP', 'ICMP', 'HTTP', 'HTTPS']
actions = ['ALLOW', 'DENY', 'DROP', 'REJECT']
statuses = ['SUCCESS', 'FAILURE']

# Randomly generate and insert 100 rows of data
for _ in range(100):
    new_log = NetworkLogs(
        Timestamp=fake.date_time_this_month(),
        SourceIP=fake.ipv4(),
        DestinationIP=fake.ipv4(),
        SourcePort=random.randint(1024, 65535),
        DestinationPort=random.randint(1024, 65535),
        Protocol=random.choice(protocols),
        PayloadSize=random.randint(0, 1500),  # typical MTU size for Ethernet
        Action=random.choice(actions),
        Status=random.choice(statuses),
        AdditionalInfo='Randomly generated data'
    )
    session.add(new_log)

# Commit the session to insert the logs into the database
session.commit()

# Verify the number of inserted rows
print(f"Total rows inserted: {session.query(NetworkLogs).count()}")



In [None]:
class ApplicationLogs(Base):
    __tablename__ = 'ApplicationLogs'
    AppLogID = Column(Integer, primary_key=True, autoincrement=True)
    Timestamp = Column(DateTime, default=datetime.now)
    ApplicationName = Column(String(255))
    LogLevel = Column(String(50))
    Message = Column(Text)
    UserID = Column(String(255))
    SessionID = Column(String(255))
    AdditionalInfo = Column(Text)

# Create an in-memory SQLite database and initialize the schema
Base.metadata.create_all(engine)

# Define a limited set of values for certain columns
application_names = ['WebApp', 'Database', 'AuthService', 'PaymentGateway']
log_levels = ['INFO', 'WARNING', 'ERROR', 'DEBUG']

# Randomly generate and insert 100 rows of data
for _ in range(100):
    new_log = ApplicationLogs(
        Timestamp=fake.date_time_this_month(),
        ApplicationName=random.choice(application_names),
        LogLevel=random.choice(log_levels),
        Message=fake.sentence(),
        UserID=fake.uuid4(),
        SessionID=fake.uuid4(),
        AdditionalInfo='Randomly generated data'
    )
    session.add(new_log)

# Commit the session to insert the logs into the database
session.commit()

# Verify the number of inserted rows
print(f"Total rows inserted: {session.query(ApplicationLogs).count()}")


In [17]:
from IPython import get_ipython
from typing_extensions import Annotated

import autogen

config_list = autogen.config_list_from_json(
    "OAI_CONFIG_LIST",
    filter_dict={
        "model": ["gpt-4-1106"],
    },
)
print(config_list)

[{'model': 'gpt-4-1106', 'api_key': '7b81e8a5d22142f1a69845f53a8d16c8', 'base_url': 'https://canandaeast.openai.azure.com/', 'api_type': 'azure', 'api_version': '2023-12-01-preview'}]


In [None]:

import os
import pandas as pd
import sys
import StringIO
import autogen
engine = create_engine('sqlite:///data/sqlite.db') 

def execute_python_code(python_code):
    def execute_sql_query(sql_query, limit=100):  
        result = pd.read_sql_query(sql_query, engine)
        result = result.infer_objects()
        for col in result.columns:  
            if 'date' in col.lower():  
                result[col] = pd.to_datetime(result[col], errors="ignore")  

        if limit is not None:  
            result = result.head(limit)  # limit to save memory  
        return result

    old_stdout = sys.stdout
    sys.stdout = mystdout = StringIO()

    new_input=""
    try:
        exec(python_code, locals())
        sys.stdout = old_stdout
        std_out = str(mystdout.getvalue())
        if len(std_out)>0:
            new_input +="\n"+ std_out 
    except Exception as e:
        new_input +="\Encounter following error, please fix the bug and give updated code\n"+str(e)+"\n"
        
    return new_input

webserver_schema ="""

For web server logs, it is common to capture HTTP request and response data, along with server performance metrics. Here's an example schema for a web server log table:

CREATE TABLE WebServerLogs (
    WebLogID INT AUTO_INCREMENT PRIMARY KEY,
    Timestamp DATETIME NOT NULL,
    Hostname VARCHAR(255),
    ServerIP VARCHAR(15),
    ClientIP VARCHAR(15),
    UserAgent VARCHAR(512),
    RequestMethod VARCHAR(10),
    RequestURL TEXT,
    RequestProtocol VARCHAR(10),
    StatusCode INT,
    ResponseSize BIGINT,
    ReferrerURL TEXT,
    SessionID VARCHAR(255),
    UserID INT,
    ResponseTime INT,
    SSLProtocol VARCHAR(50),
    TLSCipher VARCHAR(100),
    ErrorLog TEXT,
    AdditionalInfo TEXT
);

Column Descriptions:

WebLogID: A unique identifier for each log entry.
Timestamp: The date and time when the request was processed.
Hostname: The hostname of the server that processed the request.
ServerIP: The IP address of the server.
ClientIP: The IP address of the client making the request.
UserAgent: The user agent string of the client's browser or tool making the request.
RequestMethod: The HTTP method used (e.g., GET, POST, PUT, DELETE).
RequestURL: The URL that was requested.
RequestProtocol: The protocol used for the request (e.g., HTTP/1.1, HTTP/2, HTTPS).
StatusCode: The HTTP status code returned (e.g., 200, 404, 500).
ResponseSize: The size of the response in bytes.
ReferrerURL: The referrer URL if provided by the client.
SessionID: A unique identifier for the user session.
UserID: A system identifier for the user making the request, if authenticated.
ResponseTime: The time taken to serve the request in milliseconds.
SSLProtocol: The SSL protocol used for secure requests (e.g., TLS 1.2, TLS 1.3).
TLSCipher: The TLS cipher suite used for the request, if applicable.
ErrorLog: Any error messages or stack traces if the request resulted in an error.
AdditionalInfo: Any other relevant information that might assist in diagnosing issues or analyzing traffic patterns.

This schema captures a range of data that can be used for troubleshooting, security analysis, performance monitoring, and understanding user behavior on the web server. Depending on the level of detail required and the specific use cases, more fields could be added, such as those capturing cookie data, full request and response headers, or more detailed timing information for various stages of request handling.
"""

webserver_function_spec = [

        {
        "type":"function",
        "function":{

        "name": "execute_python_code",
        "description": "execute a python code for data analysis and visualization in a remote environment. Each call to execute_python_code() is isolated other executions",
        "parameters": {
            "type": "object",
            "properties": {
                "python_code": {
                    "type": "string",
                    "description": "python code snippet that can be executed. You are provided with following utility functions \n 1. execute_sql_query(sql_query: str) a util function to execute SQL query against the SQLITE database with valid SQL syntax and data schema were discovered before this step. This execute_sql_query(sql_query: str) function returns a pandas dataframe that you can use to perform any data analysis and visualization.\n 2. display(data) an util function to display the data analysis and visualization result. This function can take a pandas dataframe, plotly figure or matplotlib figure as input. For example, to visualize a plotly figure, the code can be ```fig=px.line(some_df)\n display(fig)```Output can only be observed by display() util function which can accept a pandas dataframe, plotly figure or matplotlib figure as input. Do not use print() or figure.show() as they do not work in the remote environment."
                },
                "goal": {
                    "type": "string",
                    "description": "description of what you hope to achieve with this python code snippset"
                }

            },
            "required": ["python_code", "goal"],
        },

    }
    }
    
]
BA_AVAILABLE_FUNCTIONS = {
            "execute_python_code": execute_python_code,
        } 

webserver_expert_system_message = f"""
You are webserver system expert. You help retrieve web system metrics. You can write python code to query and analyze data.
You are provided with a utility function execute_sql_query(sql_query) to execute sql query and return the result as a pandas dataframe.
Here is the scehma of the webserver logs table so that you can write the query correctly.

{webserver_schema}

"""
webserver_expert = autogen.AssistantAgent(
    name="webserver_expert",
    llm_config={"config_list": config_list, "tools":webserver_function_spec},
    # the default system message of the AssistantAgent is overwritten here
    system_message=webserver_expert_system_message,
    
)

webserver_user = autogen.UserProxyAgent(
    name="webserver_user",
    max_consecutive_auto_reply=0,  # terminate without auto-reply
    human_input_mode="NEVER",
)


def ask_websever_expert(message):
    webserver_user.initiate_chat(webserver_expert, message=message)
    # return the last message received from the planner
    return webserver_user.last_message()["content"]


In [None]:
main_agent_system_message = """
You are a customer support specialist that help customers understand the cause of their techical issues.
First, you need to look up the guideline for the issue in the knowledge base.
Then follow the guideline to help the customer.
You have access to multiple assistants who can help you query specific information.
Once you gather sufficient information, give customer a better understanding of the issue or a solution.

"""
main_agent = autogen.AssistantAgent(
    name="planner",
    llm_config={"config_list": config_list},
    # the default system message of the AssistantAgent is overwritten here
    system_message=main_agent_system_message,
)

def is_termination_msg(x):
    print("x message is ", x)
    if  x.get("tool_calls") is None and x.get("role") =="user":
            print("return true")
            return True
    else:
        print("return false")
        return False
    


main_agent_proxy = autogen.UserProxyAgent(
    is_termination_msg=is_termination_msg,
    name="planner_user",
    human_input_mode="TERMINATE",
)

@main_agent_proxy.register_for_execution()
@main_agent.register_for_llm(name="look_up_guideline", description="Look up investigation guideline for the issue")
def look_up_guideline(customer_issue: Annotated[str, "summary of customer issue"])-> str:
    guideline = """
### Scenario 2: Slow Performance in Web Application

**Customer Trouble:**
Users are reporting that the web application's performance has significantly degraded, with page load times being much longer than usual.
**Systems Involved:**
1. Web Servers
2. Database Servers
**Specific Guidance for Analysis:**
- **Web Servers:** Monitor server performance metrics such as CPU usage, memory usage, and response time. Check for any recent updates or changes that could have impacted performance.
- **Database Servers:** Analyze query performance and look for slow-running queries or table scans that could be causing bottlenecks. Evaluate the indexing strategy and optimize queries as needed.
    """
    return guideline
@main_agent_proxy.register_for_execution()
@main_agent.register_for_llm(name="ask_websever_expert", description="Ask web server expert for any performance metrics of the web server")
def ask_websever_expert(question: Annotated[str, "Specific question with metrics, timeline, etc.. to ask the web server expert"])-> str:

    return "CPU and Memory usage are normal. Response time is 2x slower than usual."

    return guideline
@main_agent_proxy.register_for_execution()
@main_agent.register_for_llm(name="ask_database_expert", description="Ask database expert for any performance metrics of the web server")
def ask_websever_expert(question: Annotated[str, "Specific question with metrics, timeline, etc.. to ask the database expert"])-> str:

    return "The query coming from user johh took 2 hours to run."

main_agent_proxy.initiate_chat(main_agent, message="My web application is slow")