In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# mypy: disable-error-code="import-not-found"

# The notebook should be executed from the project root directory
import os
import sys
from pathlib import Path

if "_correct_path" not in locals():
    os.chdir("..")
    sys.path.append(".")
    print(f"changed dir to {Path('.').resolve()})")
    _correct_path = True

In [None]:
import os

import pandas as pd

from utils.schema import AnalystDataset

In [None]:
dataset_url = "https://s3.amazonaws.com/datarobot_public_datasets/10k_diabetes_20.csv"

df = pd.read_csv(dataset_url)

# Create dataset dictionary
dataset = AnalystDataset(
    name=os.path.splitext(os.path.basename(dataset_url))[0], data=df.to_dict("records")
)

In [None]:
from utils.api import cleanse_dataframes

cleansed_data = await cleanse_dataframes([dataset])

analysis_data = [ds.dataset for ds in cleansed_data]

In [None]:
from utils.api import suggest_questions

suggested_questions = await suggest_questions(analysis_data)

In [None]:
from utils.api import rephrase_message
from utils.schema import ChatRequest

question = "What is the relationship between length of stay and readmission?"
chat_response = await rephrase_message(
    messages=ChatRequest(
        messages=[
            {
                "role": "user",
                "content": question,
            },
            {
                "role": "user",
                "content": question + "Please order the chart by readmission rate",
            },
        ],
    )
)

In [None]:
chat_response

In [None]:
from utils.api import get_dictionaries

dictionary = await get_dictionaries(analysis_data)

In [None]:
from utils.api import run_analysis
from utils.schema import RunAnalysisRequest

analysis_request = RunAnalysisRequest(
    datasets=analysis_data,
    dictionaries=dictionary,
    question=chat_response,
)
analysis_result = await run_analysis(analysis_request)

In [None]:
import asyncio

from utils.api import get_business_analysis, run_charts
from utils.schema import (
    DataDictionary,
    GetBusinessAnalysisRequest,
    RunChartsRequest,
)

# Prepare requests
chart_request = RunChartsRequest(
    dataset=analysis_result.dataset,
    question=chat_response,
)

business_request = GetBusinessAnalysisRequest(
    dataset=analysis_result.dataset,
    dictionary=DataDictionary.from_analyst_df(analysis_result.dataset.to_df()),
    question=chat_response,
)

# Create and start tasks immediately
charts_task = asyncio.create_task(run_charts(chart_request))
business_task = asyncio.create_task(get_business_analysis(business_request))

In [None]:
import plotly.offline as pyo

from utils.schema import GetBusinessAnalysisResult, RunChartsResult

pyo.init_notebook_mode()

tasks = [charts_task, business_task]

# Wait for each task to complete
for coro in asyncio.as_completed(tasks):
    result = await coro

    # Determine which task completed by checking the result structure
    if isinstance(result, RunChartsResult) and (result.fig1 or result.fig2):
        if result.fig1:
            pyo.iplot(result.fig1)
        if result.fig2:
            pyo.iplot(result.fig2)

    elif isinstance(result, GetBusinessAnalysisResult):
        print(f"Bottom Line:\n{(result.bottom_line or '')}")

        print(f"Additional Insights:\n{result.additional_insights}")

        print("Follow-up Questions:")
        for q in result.follow_up_questions:
            print(f"- {q}")

In [None]:
with open("tests/models/run_analysis_result.json", "w") as f:
    f.write(analysis_result.model_dump_json(indent=4))
with open("tests/models/run_charts_result.json", "w") as f:
    f.write(charts_task.result().model_dump_json(indent=4))
with open("tests/models/run_business_result.json", "w") as f:
    f.write(business_task.result().model_dump_json(indent=4))

In [None]:
from utils.api import get_dictionaries
from utils.database_helpers import Database

db_tables = Database.get_tables()
db_datasets = Database.get_data(*db_tables)
db_dictionaries = await get_dictionaries(db_datasets)

In [None]:
from utils.api import suggest_questions

suggested_questions = await suggest_questions(db_datasets)

In [None]:
suggested_questions

In [None]:
from utils.api import run_database_analysis
from utils.schema import RunDatabaseAnalysisRequest

db_run_analysis = await run_database_analysis(
    RunDatabaseAnalysisRequest(
        datasets=db_datasets,
        dictionaries=db_dictionaries,
        question="How does loan default rate relate to type of loan?",
    )
)