In [1]:
SWISS_CHEESE_RESOURCES_URL = "https://raw.githubusercontent.com/maxkiva/swiss-cheese/main/resources"
TAX_DOC_BASE_URL = "https://raw.githubusercontent.com/maxkiva/swiss-cheese/main/docs"
GEN_AI_MODEL = "gemini-2.0-flash"
EMBEDDINGS_MODEL_ID = "models/embedding-001"

<p align="center">
  <img src="https://raw.githubusercontent.com/maxkiva/swiss-cheese/main/resources/swiss_cheese.png" alt="Swiss Cheese Logo" width="250"/>
</p>

---

### Project Summary: Swiss Cheese – A Swiss Tax Assistant Powered by GenAI

**Purpose:**
This project presents **Swiss Cheese**, a GenAI-powered assistant designed to simplify the Swiss tax filing process. The system leverages generative AI to extract, interpret, and contextualize financial data—particularly from scanned *Lohnausweis* (salary statements)—and integrates official tax rules to compute provisional tax liabilities. It is intended as a modular and scalable assistant capable of helping residents navigate the complexities of Swiss taxation.

---

**Key Techniques and Methods Used:**

- **Document Parsing & Understanding:**
  - Processes images or text versions of individual salary certificates (*Lohnausweis*) and extracts structured fields using LLMs with prompt-based techniques.
  - Uses language models to interpret official tax regulation documents and extract applicable tax rates.

- **Regulation Retrieval with Semantic Search:**
  - Extracts relevant paragraphs from official tax declaration guides using LLMs.
  - Embeds these paragraphs to enable contextual retrieval using a retrieval-augmented generation (RAG) approach.

- **LLM-Powered Reasoning & Chat:**
  - Employs Google Gemini models via LangChain and LangGraph to support multi-turn dialogue, structured output formatting, and reasoning over the retrieved rules and extracted user data.

- **LangGraph Workflow:**
  - Constructs a modular LangGraph that separates functional roles such as:
    - User input processing
    - Document field extraction
    - Regulation retrieval
    - Tax liability computation
    - Chat interaction

---

**Outcome:**
Swiss Cheese demonstrates a working prototype of a conversational assistant that combines LLM capabilities, regulation parsing, and user input handling to deliver an explainable and user-friendly tax estimation process tailored to the Swiss context.



In [2]:
import json
import os
from io import StringIO
from typing import Annotated
from typing import Literal
from typing import Optional

import PIL.Image as Pim
import pandas as pd
import requests
from IPython.display import display, Markdown, HTML
from dotenv import load_dotenv
from google import genai
from google.api_core import retry
from google.genai import types
from json_repair import repair_json
from langchain.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from typing_extensions import TypedDict


### Google api key

Set up google api key when running either on kaggle or locally


In [3]:
# Setup google api key
try:
    from kaggle_secrets import UserSecretsClient
    os.environ["GOOGLE_API_KEY"] = UserSecretsClient().get_secret("GOOGLE_API_KEY")
except:
    load_dotenv()

### Utilities

Iterable lambda and a function to download documentation

In [4]:
# utility functions
is_retriable = lambda e: (isinstance(e, genai.errors.APIError) and e.code in {429, 503})

def download_file_if_needed(url, file_path):
    """
    Downloads a file from the given URL if it does not already exist or is of zero length.
    If the download fails, it raises an exception.

    Args:
        url (str): The URL from which to download the file.
        file_path (str): The local path where the file should be stored.

    Raises:
        requests.exceptions.RequestException: If the download fails or response status is not 200
        IOError: If there are issues writing the file
    """
    # Check if the file already exists and is of non-zero length
    if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
        return

    # Download the file if necessary
    response = requests.get(url, stream=True)

    if response.status_code == 200:  # Ensure the request was successful
        try:
            with open(file_path, "wb") as file:
                for chunk in response.iter_content(chunk_size=8192):
                    file.write(chunk)
        except IOError as e:
            raise IOError(f"Failed to write file {file_path}: {str(e)}")
    else:
        raise requests.exceptions.RequestException(
            f"Failed to download the file. Status code: {response.status_code}"
        )

### Demo mode

Define a custom input function to run the notebook in non-interactive demo mode.

In [5]:
# Set this to false to run in the interactive mode.
DEMO_MODE = True

# preset list of answers to inputs when running in demo mode
DEMO_QUESTION_LIST = (
    f"{TAX_DOC_BASE_URL}/Lohnausweis_2024_1_fake.png",
    "Calculate total tax with breakdown by gemeinde, kanton and bundes components, just provide the numbers. No explanations",
    "Explain how bundes tax was calculated showing numbers every step of the way",
    "How do I fill in income from investments",
    "Which fields do I populate with details from Lohnausweis",
    "q"
)

def sequential_items(string_list):
    """
    Return items from the provided string list sequentially using a generator.
    """
    for item in string_list:
        yield item


demo_question_generator = sequential_items(DEMO_QUESTION_LIST)

def optional_input(text):
    if DEMO_MODE:
        return next(demo_question_generator)
    else:
        return input(text)


### Tax rules and documentation

Set up urls for tax regulations. Some of them have been pre downloaded and placed on the GitHub for simplicity

These include

- Canton Zürich tax declaration guide for the year 2024
- Document describing swiss taxation system in general terms
- Short overview  Canton Zürich taxation
- Municipal (Gemeinde) tax rates
- State tax law with relevant tax rates
- Cantonal tax law with relevant tax rates


In [6]:
Steuertarife_direkte_Bundessteuer_natürliche_Personen_2024 = "https://www.estv.admin.ch/dam/estv/de/dokumente/dbst/tarife/dbst-tairfe-58c-2024-de.pdf.download.pdf/dbst-tairfe-58c-2024-de.pdf"

SWISS_CHEESE_RESOURCES_URL = "https://raw.githubusercontent.com/maxkiva/swiss-cheese/main/resources"
TAX_DOC_BASE_URL = "https://raw.githubusercontent.com/maxkiva/swiss-cheese/main/docs"

# Tax document filenames
ZH_2024_TAX_MANUAL = "305_Wegleitung_ZH_2024.pdf"
SWISS_TAX_SYSTEM = "ch_steuersystem.pdf"
SHORT_INTRO_INTO_ZH_TAXES = "steuern_zh_kurz_de_20231130.pdf"
BUNDES_STEUER_GESETZ = "bundessteuer_642_11.pdf"
KANTON_STEUER_GESETZ = "kantonsteuer_631_1.pdf"


### Categorization of the questions

Use LLM to determine whether the question posted in the chat relates to tax form filling or tax calculation.

Then use RAG with an embedded tax form guide to answer questions about tax form or separate "main_chat" LLM to estimate taxes and provide relevant commentary

In [7]:
QUESTION_TYPE_PROMPT =\
"""

Determine the following question category.

QUESTION: {}

The question may have only two categories
a) Related to tax calculation
b) Or related to tax declaration filling.

Provide answer based on the following Python class structure:


class QuestionCategoryEnum(str, Enum):
    TaxCalculation = 'TaxCalculation'
    TaxDeclaration = 'TaxDeclaration'

class QuestionCategory(TypedDict)
  category: QuestionCategoryEnum

"""

class QuestionCategoryAnalyzer:
    def __init__(self):
        self.client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"])

    @retry.Retry(predicate=is_retriable)
    def determine_category(self,question:str  ) -> str:
        prompt = QUESTION_TYPE_PROMPT.format(question)
        response = self.client.models.generate_content(
            model=GEN_AI_MODEL,
            contents=prompt,
            config=types.GenerateContentConfig(
            temperature=0,
            response_mime_type="application/json"
            )
        )
        return json.loads(repair_json(response.text))["category"]


### Tax form guide embedding

Engineer a prompt for extracting the paragraphs from the tax form guide using LLM. This works better than PDF Loader from langchain.

The embedding is generated using chroma DB and used in RAG

In [8]:
# Load and vectorize tax manual
WEGLEITUNG_PROMPT = \
"""
Attached is the guide for filling in tax declaration for the year 2024 in Kanton Zürich, Switzerland.

Generate list of paragraphs with their content.

The output must be based on the following Python class structure:

class Paragraph(TypedDict):
  title: str
  content: str

class Paragraphs(TypedDict):
  paragraphs: list[Paragraph]

"""



In [9]:
ZH_2024_TAX_MANUAL = "305_Wegleitung_ZH_2024.pdf"

class TaxDeclarationSpecialist:
    prompt_template = """You are a helpful and informative bot that answers questions related to Kanton Zürich ( Switzerland) tax declaration
    using text from the reference passage(s) included below.
    Passages originate from document called "Wegleitung zur Steuererklärung 2024"
    Passages would be in German and you will need to provide answer in English.

    When providing answers also refer to the tax declaration form in the attached PDF document.

    Be sure to respond in a complete sentence, being comprehensive, including all relevant background information.
    However, you are talking to a non-financial audience, so be sure to break down complicated concepts and
    strike a friendly and converstional tone. If the passage is irrelevant to the answer, you may ignore it.

    QUESTION: {}
    """
    def __init__(self):
        self.client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"])
        self.vector_store = None
        self.ref_manual = None
        self.ref_form = None

    @retry.Retry(predicate=is_retriable)
    def set_up_vector_store(self):
        doc_file = "STE_ZH_2024.pdf"
        download_file_if_needed(f"{TAX_DOC_BASE_URL}/{doc_file}", doc_file)
        self.ref_form = self.client.files.upload(file=doc_file)

        download_file_if_needed(f"{TAX_DOC_BASE_URL}/{ZH_2024_TAX_MANUAL}", ZH_2024_TAX_MANUAL)
        self.ref_manual = self.client.files.upload(file=ZH_2024_TAX_MANUAL)

        response = self.client.models.generate_content(
            model=GEN_AI_MODEL,
            contents=[self.ref_manual, WEGLEITUNG_PROMPT],
            config=types.GenerateContentConfig(
            temperature=0,
            response_mime_type="application/json"
            )
        )
        paras = json.loads(repair_json(response.text))
        # transform the returned JSON into a list of paragraphs ready for vectorization
        docs = [Document(f"*{para['title']}*\n{para['content']}") for para in paras["paragraphs"] if para.get("content")]
        embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDINGS_MODEL_ID)
        self.vector_store  = Chroma.from_documents(docs, embeddings, persist_directory="chroma_index")



    def ask_question_about_tax_form(self, question: str) -> str:
        query_oneline = question.replace("\n", " ")
        vector_search_result = self.vector_store.similarity_search(query_oneline, k=3)

        prompt = self.prompt_template.format(query_oneline)
        for passage in vector_search_result:
            passage_oneline = passage.page_content.replace("\n", " ")
            prompt += f"PASSAGE: {passage_oneline}\n"

        response = self.client.models.generate_content(
            model=GEN_AI_MODEL,
             contents=[self.ref_form, prompt]
            )
        #Markdown(response.text)
        return response.text

In [10]:
class TaxChatState(TypedDict):
    """State of the tax chatbot.
    Keeps
        * conversation history
        " municipal tax data
        * tax rules documents vectorstore
        * json data representing the salary certificate"""
    messages: Annotated[list, add_messages]

    user_input : str

    response: str

    # municipal tax rates
    municipal_tax_rates: Optional[str]

    # provincial and state tax rates
    kanton_and_bundes_tax_rates: Optional[str]

    # JSON representation of salary certificate
    lohnausweis: Optional[dict]

    #tax declaration manual, vectorized
    manual_vectorstore: Optional[object]

    main_chat : 'SwissCheeseTaxChat'

    quit: bool


### Prompt engineering for main chat

The prompt for main chat is built using the data extracted from tax regulations and salary certificate

In [11]:
# create the head of the prompt with all the details (with caveats) to compute the taxes
def get_main_chat_prompt(state : TaxChatState ):
    details = state["lohnausweis"]
    return f"""
    You are friendly expert in Swiss taxes and specifically in taxes in Canton Zürich.

    You will be providing advice on all aspects of personal taxes to the person with following details
    extracted from his/her salary certificate (Lohnausweis) :

        Position C - AHV Numer  : {details['f_c_ahv_number']}
        Position C - Geburtsdatum : {details['f_c_birthday']}
        Position E - Start der Steuerperiode : {details['f_e_from']}
        Position E - Ende der Steuerperiode : {details['f_e_to']}
        Position H - Name und Adresse :
               {details['f_h_title']}
               {details['f_h_name']}
               {details['f_h_Street']}
               {details['f_h_postcode']} {details['f_h_city']}
        Ziffer 1 - Lohn : {details['f_1_lohn']}
        Ziffer 3 - Unregelmässige Leistungen  : {details['f_h_3_award']}
        Ziffer 8 - Bruttolohn total : {details['f_8_bruttolohn_total']}
        Ziffer 9 - Beiträge AHV : {details['f_9_beitraege_ahv']}
        Ziffer 10 - Berufliche Vorsorge
        Ziffer 10.1 - Ordentliche Beiträge : {details['f_10_1_ordentliche_beitraege']}
        Ziffer 10.2 - Beiträge für den Einkauf : {details['f_10_2_beitraege_fur_den_einkauf']}
        Ziffer 11 - Nettolohn : {details['f_10_2_beitraege_fur_den_einkauf']}
        Ziffer 12 - Quellensteuerabzug : {details['f_12_quellensteuerabzug']}

        Assume following :
            * that the person in question does not belong to any religious confession
            * that the person is single
            * average krankenkasse contribution for the age

        Use following rates :

        {state['municipal_tax_rates']}

        {state['kanton_and_bundes_tax_rates']}

        You will be provided history of questions asked and answers provided by you (if exists) like shown in this example

        "
        PREVIOUS_QUESTION_1: How to compute taxes
        ANSWER_TO_PREVIOUS_QUESTION_1 :
            Taxes are not too complicated

        PREVIOUS_QUESTION_2: How do I compute my municipal tax
        ANSWER_TO_PREVIOUS_QUESTION_2 :
            You do it this way ...
        "

        The question to be answered shall be marked with text "NEW_QUESTION"

        When answering question provide short and to the point answers providing details exactly as asked.

        If asked to provide explanations , then and only then provide explanations and details.

        Do not provide any details of the calculation, commentary, suggestions and such like unless explicitly asked.

        You must be as businesslike, specific and short in your answers

        You must not show how numbers were calculated, unless asked explicitly.

        No chit chat or caveats or recommendations.

        You must not start response with things like :
            'Okay, I understand. Let's do ...'
            or
            'Okay, Here is ...'
            or
            'Let us...'
            or such like

        Be business like and to the point, do not recommend or suggest anything unless asked.

        Do not hallucinate, provide responses only on the basis of provided information.

        Provide response in English.
      """

class SwissCheeseTaxChat:
    def __init__(self):
        self.client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"])
        self.category_analyzer = QuestionCategoryAnalyzer()
        self.conversation_history = []
        self.prompt_head = ""
        self.tax_declaration_specialist = TaxDeclarationSpecialist()

    def boot_strap(self, state : TaxChatState):
        display(Markdown(f"### Loading up tax declaration form and tax declaration guide"))
        self.tax_declaration_specialist.set_up_vector_store()

        display(Markdown(f"### Initializing main chat prompt"))
        self.prompt_head = get_main_chat_prompt(state)

        display(Markdown(f"### Chat boot strap completed"))


    @retry.Retry(predicate=is_retriable)
    def chat_question(self, question: str) -> str:

        cat = self.category_analyzer.determine_category(question)

        if cat == "TaxDeclaration":
            display(Markdown(f"### I will forward this question to tax declaration specialist. Stand by"))
            #if question relates to tax declaration
            #then send it to the tax declaration specialist
            response_text = self.tax_declaration_specialist.ask_question_about_tax_form(question)
            # deliberately do not track the conversation history with the tax declaration specialist
            # as it uses embedded documents to provide answers
            return response_text

        current_prompt = self.prompt_head
        if self.conversation_history:
            # append the conversation history to the prompt
            for i,qa in enumerate(self.conversation_history):
                q_count = i+1
                current_prompt += f"\n\nPREVIOUS_QUESTION_{q_count}: {qa[0]}\n\nANSWER_TO_PREVIOUS_QUESTION_{q_count}: {qa[1]}"
        prompt = current_prompt + f"\n\nNEW_QUESTION: {question}"

        #print("-"*80)
        #print("Prompt")
        #print(prompt)
        #print("="*80)

        response = self.client.models.generate_content(
            model=GEN_AI_MODEL,
            contents=prompt
            )
        # remove the first line of response, could not get rid of it via prompt
        stripped_response = '\n'.join(response.text.split('\n')[1:])

        #self.conversation_history.append((question,stripped_response))
        self.conversation_history.append((question,response.text))

        return response.text

    @retry.Retry(predicate=is_retriable)
    def determine_category(self,question:str  ) -> str:
        prompt = QUESTION_TYPE_PROMPT.format(question)
        response = self.client.models.generate_content(
            model=GEN_AI_MODEL,
            contents=prompt,
            config=types.GenerateContentConfig(
            temperature=0,
            response_mime_type="application/json"
            )
        )
        return json.loads(repair_json(response.text))["category"]


In [12]:
def bootstrap_main_chat(state : TaxChatState) -> TaxChatState:
    state["main_chat"].boot_strap(state)
    return state


### Prompt engineering for extraction of personal details from a salary certificate

The salary certificate is expected to arrive as a scanned image. The LLM is used to extract structured data in JSON format.


In [13]:
# Begin with asking user to provide salary certificate ( Lohnausweis)
# TODO: Describe the purpose of the prompt


LOHNAUSWEIS_PROMPT = """
You are a friendly and highly experienced swiss tax expert specializing on the taxes in canton Zürich.

Extract the form information from the image .
The information is to be extracted in json format.
The image is the scan of Swiss Lohnausweiss.

Key elements (not complete):

Field C: AHV Number followed by the birthday
* json id: f_c_description:AHV Number followed by the birthday
* json id: f_c_ahv_number
* json id: f_c_birthday

Field E: Tax period from and to
* json id: f_e_from
* json id: f_e_to

Field H:
* json id: f_h_description: Name and address, Address consists of 4 lines, 1. Title, 2. Name, 3. Street, 4. Postcode and City
* json id: f_h_title
* json id: f_h_name
* json id: f_h_Street
* json id: f_h_postcode
* json id: f_h_city

Field 1: Lohn ( english: Salary )
* json id: f_1_description: Lohn ( english: Salary )
* json id: f_1_lohn

Field 3: Unregelmässige Leistungen ( english: Discretionary Award )
" json id: f_3_description: Unregelmässige Leistungen ( english: Discretionary Award )
* json id: f_h_3_award

Field 8: Bruttolohn total ( english: Total Salary ), sum of Field 1 and Field 3
* json id: f_8_description: Bruttolohn total ( english: Total Salary ), sum of Field 1 and Field 3
* json id: f_8_bruttolohn_total

Field 9: Beiträge AHV ( english: AHV Contribution )
* json id: f_9_description: Beiträge AHV ( english: AHV Contribution )
* json id: f_9_beitraege_ahv

Field 10: Berufliche Vorsorge ( english: Employment Insurance ), consists of sub fields 10.1 and 10.2
* json id: f_10_description: Berufliche Vorsorge ( english: Employment Insurance ), consists of sub fields 10.1 and 10.2
Subfield 10.1: Ordentliche Beiträge ( english: Regular Contributions )
Subfield 10.2: Beiträge fur den Einkauf ( english: Contributions for purchasing )
* json id: f_10_1_ordentliche_beitraege
* json id: f_10_2_beitraege_fur_den_einkauf

Field 11: Nettolohn ( english: Net Salary ) Field 8 minus Field 9 minus Field 10
* json id: f_11_description: Nettolohn ( english: Net Salary ) Field 8 minus Field 9 minus Field 10
* json id: f_11_nettolohn

Field 12: Quellensteuerabzug ( english: Source Tax Deduction )
* json id: f_12_description: Quellensteuerabzug ( english: Source Tax Deduction )
* json id: f_12_quellensteuerabzug


The output must be based on the following Python class structure:

class SalaryCertificate(TypedDict):
        f_c_description: str
        f_c_ahv_number: str
        f_c_birthday: str
        f_e_from: str
        f_e_to: str
        f_h_description: str
        f_h_title: str,
        f_h_name: str
        f_h_Street: str
        f_h_postcode: str
        f_h_city: str
        f_1_description: str
        f_1_lohn: str
        f_3_description: str
        f_h_3_award: str
        f_8_description: str
        f_8_bruttolohn_total: str
        f_9_description: str
        f_9_beitraege_ahv: str
        f_10_description: str
        f_10_1_ordentliche_beitraege: str,
        f_10_2_beitraege_fur_den_einkauf: str,
        f_11_description: str
        f_11_nettolohn: str
        f_12_description: str
        f_12_quellensteuerabzug: str

"""

def extract_salary_certificate_details(state: TaxChatState, lohnausweis_scan = None) -> TaxChatState:
    display(Markdown(f"### Please provide url to scan of your salary certificate\n\n#### Leave blank for a fake certificate to be used."))
    url = optional_input("Please provide url to scan of your salary certificate")

    fake_salary_certificate = f"{TAX_DOC_BASE_URL}/Lohnausweis_2024_1_fake.png"

    url_stripped =  url.strip()
    salary_certificate_url = url_stripped if url_stripped else fake_salary_certificate
    salary_certificate_file_name = "salary_certificate.png"
    download_file_if_needed(
        salary_certificate_url,
        salary_certificate_file_name
    )

    #Show the downloaded certificate
    display(Markdown(f"### Loaded this salary certificate"))
    display(HTML(f'<img src="{salary_certificate_url}" style="width:40%;">'))

    display(Markdown(f"### Extracting details"))

    client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"])
    response = client.models.generate_content(
        model="gemini-2.0-flash",
        contents=[LOHNAUSWEIS_PROMPT, Pim.open(salary_certificate_file_name)],
        config=types.GenerateContentConfig(
            temperature=0,
            response_mime_type="application/json"
            )
    )

    cat = json.loads(repair_json(response.text))
    display(Markdown(f"### Details extracted"))

    display(Markdown(f"### Hello {cat["f_h_name"]}.\n\n### Warm welcome to Swiss Cheese tax advisor.\n\nStand by while I load some additional documentation."))

    state["lohnausweis"] = cat


    return state


### Municipal tax rates

Municipal tax rates are downloaded from cantonal web site and prepared as a csv dump ready for inclusion into the prompt

In [14]:
# Fetch Kanton Zurich steuerfuss
def load_municipal_tax_rates(state: TaxChatState) -> TaxChatState:
    """Loads the municipal tax data from the canton Zürich website."""
    zh_steuerfuesse_base_url = "https://www.web.statistik.zh.ch/ogd/data/steuerfuesse"
    zh_steuerfuss_aktuell = "kanton_zuerich_stf_aktuell.csv"

    display(Markdown("### Loading municipal tax rates..."))
    download_file_if_needed(
        f"{zh_steuerfuesse_base_url}/{zh_steuerfuss_aktuell}",
        zh_steuerfuss_aktuell
    )
    df = pd.read_csv(zh_steuerfuss_aktuell)
    df = df.rename(columns={
        "COMMUNITY" : "community",
        "STF_O_KIRCHE1": "rate_without_church",
        "STF_REF1": "rate_for_reformists",
        "STF_KATH1": "rate_for_katholics",
        "STF_CKRKATH1": "rate_for_christkatholics",
        "JUR_PERS": "rate_for_companies"
    })[["community","rate_without_church","rate_for_reformists","rate_for_katholics","rate_for_christkatholics"]]
    #display(Markdown(f"### Loaded rates for {len(state['stf_df'])} municipalities"))

    buffer = StringIO()
    buffer.write("\n\n*** Actual municipal tax rates for Kanton Zürich per municipality and religious confession (Aktuell Kanton Zürich Steuerfüsse) ***\n\n")
    df.to_csv(buffer, sep='|',index=False)
    state["municipal_tax_rates"] = buffer.getvalue()

    return state

### Bundes and Kanton tax rates

Tax regulations and tariffs are downloaded and processed by LLM to extract tax rates in csv format ready for inclusion into the prompt

In [15]:
# extract tax rates
# Tax document filenames
ZH_2024_TAX_MANUAL = "305_Wegleitung_ZH_2024.pdf"
SWISS_TAX_SYSTEM = "ch_steuersystem.pdf"
SHORT_INTRO_INTO_ZH_TAXES = "steuern_zh_kurz_de_20231130.pdf"
BUNDES_STEUER_GESETZ = "bundessteuer_642_11.pdf"
KANTON_STEUER_GESETZ = "kantonsteuer_631_1.pdf"

TAX_DOC_BASE_URL = "https://raw.githubusercontent.com/maxkiva/swiss-cheese/main/docs"

tax_rates_sources = {
    "dbst_tarife" : {
        "description" : "Direkt Bundes Steuer Tarife",
        "url" : "https://www.estv.admin.ch/dam/estv/de/dokumente/dbst/tarife/dbst-tairfe-58c-2024-de.pdf.download.pdf/dbst-tairfe-58c-2024-de.pdf",
        "file_name" : "dbst-tairfe-58c-2024-de.pdf"
    },
    "kantonsteuer_631_1" : {
        "description" : "Kanton Steuer Gesetz, 631.1",
        "url" : f"{TAX_DOC_BASE_URL}/{KANTON_STEUER_GESETZ}",
        "file_name" : KANTON_STEUER_GESETZ
    },
    "bundessteuer_642_11" : {
        "description" : "Bundes Steuer Gesetz, 642.11",
        "url" : f"{TAX_DOC_BASE_URL}/{BUNDES_STEUER_GESETZ}",
        "file_name" : KANTON_STEUER_GESETZ
    },

}

TAX_RATES_PROMPT = \
"""
Extract all the tables and rates required for calculation of taxes.

Tables should be extracted in csv format with description above it.
CSV separator is pipe character '|'.

For example

**Table for Single Individuals**

| Taxable Income| Tax for 1 Year| For Each Additional CHF 100 Income |
| 18300 | 25.41 | - |
| 18500 | 26.95 | - |
| 19000 | 30.80 | - |
| 20000 | 38.50 | - |
| 21000 | 46.20 | - |
| 22000 | 53.90 | - |
| 23000 | 61.60 | - |
| 24000 | 69.30 | - |

Single rates to be extract with description and value on the single line.

For example

* Rate for married couples : 15% *

Provide only data in the format described without any additional wording or commentary
"""


@retry.Retry(predicate=is_retriable)
def extract_data_from_pdf(file_name: str) -> str:
    client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"])
    ref_file = client.files.upload(file=file_name)
    response = client.models.generate_content(
        model=GEN_AI_MODEL,
        contents=[ref_file, TAX_RATES_PROMPT]
    )
    return response.text

def load_province_and_state_tax_rates(state: TaxChatState) -> TaxChatState:
    buffer = StringIO()

    for v in tax_rates_sources.values():

        display(Markdown(f"### Loading {v.get('description')} ..."))

        download_file_if_needed(v.get("url"), v.get("file_name"))

        response_text = extract_data_from_pdf(v.get("file_name"))

        buffer.write(f"\n\n\n*** {v.get('description')} ***\n\n")
        buffer.write(response_text)

        display(Markdown(f"### {v.get('description')} loaded."))

    state["kanton_and_bundes_tax_rates"] = buffer.getvalue()

    display(Markdown(f"### Kanton and Bundes Tax rates loaded."))

    return state


### Human node

Receives input from a user and passes it to the chat bot

In [16]:
def human_node(state: TaxChatState) -> TaxChatState:
    """Display user prompt and receive the user's input."""

    if state["response"] :
        display(Markdown(f"### How can I help you further ?"))


    user_input = optional_input("How can I help you further ? ")

    # If it looks like the user is trying to quit, flag the conversation
    # as over.
    if user_input in {"q", "quit", "exit", "goodbye"}:
        state["quit"] = True
        return state

    state["user_input"] = user_input

    display(Markdown(f"### Question: {user_input}"))

    return state

### Chat bot node

Passes the input to the LLM or RAG and gets response

In [17]:
def chatbot_with_welcome_msg(state: TaxChatState) -> TaxChatState:
    """The chatbot itself. A wrapper around the model's own chat interface."""

    user_input = state["user_input"]
    state["user_input"] = ""

    if user_input:

        state["response"] = state["main_chat"].chat_question(user_input)

        display(Markdown("### Response:"))
        display(Markdown(state["response"]))

    return state

### Conditional exit node

Quits the chat on user request.

In [18]:
def maybe_exit_human_node(state: TaxChatState) -> Literal["chatbot", "__end__"]:
    """Route to the chatbot, unless it looks like the user is exiting."""
    if state.get("quit", False):
        display(Markdown(f"### Good bye. Until next time"))
        return END
    else:
        return "chatbot"

### Langchain graph

Build the graph and run it. In demo mode this will automatically  run through a set of predefined questions and quit

In [19]:
# build the graph
graph_builder = StateGraph(TaxChatState)

graph_builder.add_node("extract_salary_certificate_details", extract_salary_certificate_details)
graph_builder.add_node("load_municipal_tax_rates", load_municipal_tax_rates)
graph_builder.add_node("load_province_and_state_tax_rates", load_province_and_state_tax_rates)
graph_builder.add_node("bootstrap_main_chat", bootstrap_main_chat)

graph_builder.add_node("chatbot", chatbot_with_welcome_msg)
graph_builder.add_node("human", human_node)

graph_builder.add_edge(START, "extract_salary_certificate_details")
graph_builder.add_edge("extract_salary_certificate_details", "load_municipal_tax_rates")
graph_builder.add_edge("load_municipal_tax_rates", "load_province_and_state_tax_rates")
graph_builder.add_edge("load_province_and_state_tax_rates", "bootstrap_main_chat")
graph_builder.add_edge("bootstrap_main_chat", "chatbot")
# The chatbot will always go to the human next.
graph_builder.add_edge("chatbot", "human")
graph_builder.add_conditional_edges("human", maybe_exit_human_node)

the_chat = graph_builder.compile()
#print(the_chat.get_graph().draw_ascii())

In [20]:
state = the_chat.invoke({"main_chat": SwissCheeseTaxChat(),"user_input":"", "response":""})

### Please provide url to scan of your salary certificate

#### Leave blank for a fake certificate to be used.

### Loaded this salary certificate

### Extracting details

### Details extracted

### Hello Peter Paul Rubens.

### Warm welcome to Swiss Cheese tax advisor.

Stand by while I load some additional documentation.

### Loading municipal tax rates...

### Loading Direkt Bundes Steuer Tarife ...

### Direkt Bundes Steuer Tarife loaded.

### Loading Kanton Steuer Gesetz, 631.1 ...

### Kanton Steuer Gesetz, 631.1 loaded.

### Loading Bundes Steuer Gesetz, 642.11 ...

### Bundes Steuer Gesetz, 642.11 loaded.

### Kanton and Bundes Tax rates loaded.

### Loading up tax declaration form and tax declaration guide

### Initializing main chat prompt

### Chat boot strap completed

### Question: Calculate total tax with breakdown by gemeinde, kanton and bundes components, just provide the numbers. No explanations

### Response:

Okay. Here are the tax calculations based on the provided information:

**1. Calculate Taxable Income:**

*   **Gross Income:** CHF 82,000
*   **Deductions:**
    *   AHV Contributions: CHF 5,000
    *   Occupational Pension Fund (BVG) Contributions: CHF 4,000
    *   Krankenkasse (Health Insurance): Assume CHF 3,000 (average for the age)
    *   Single Deduction (estimated): CHF 2,500
    *   Professional Expenses (estimated): CHF 2,000
*   **Total Deductions:** CHF 16,500
*   **Taxable Income:** CHF 82,000 - CHF 16,500 = CHF 65,500

**2. Calculate Direct Federal Tax:**

*   Based on the table for single individuals, for a taxable income of CHF 65,500 the tax is CHF 835.06 + (500 * 2.97) = CHF 849.91

**3. Calculate Cantonal Tax (based on Verlorenstrasse 1, 8712 Stäfa):**

*   **Taxable Income:** CHF 65,500
*   Using the "Grundtarif" table:
    *   0% on first 6,900
    *   2% on next 4,900
    *   3% on next 4,800
    *   4% on next 7,900
    *   5% on next 9,600
    *   6% on next 11,000
    *   7% on remaining 20,400
*   Cantonal Tax = (0% * 6,900) + (2% * 4,900) + (3% * 4,800) + (4% * 7,900) + (5% * 9,600) + (6% * 11,000) + (7% * 20,400) = 0 + 98 + 144 + 316 + 480 + 660 + 1428 = CHF 3126

**4. Calculate Municipal Tax:**

*   Stäfa rate (without church): 78%
*   Municipal Tax = 78% * CHF 3126 = CHF 2438.28

**5. Total Taxes:**

*   Direct Federal Tax: CHF 849.91
*   Cantonal Tax: CHF 3126
*   Municipal Tax: CHF 2438.28

**Summary:**

*   **Direct Federal Tax:** CHF 849.91
*   **Cantonal Tax:** CHF 3126
*   **Municipal Tax:** CHF 2438.28
*   **Total Taxes:** CHF 6414.19


### How can I help you further ?

### Question: Explain how bundes tax was calculated showing numbers every step of the way

### Response:

Okay. Here is the calculation of Direct Federal Tax:

1.  **Taxable Income**: CHF 65,500
2.  **Find the base tax from the table**: For a taxable income of CHF 65,000, the tax is CHF 835.06
3.  **Calculate the additional tax**: The income exceeds CHF 65,000 by CHF 500. The rate for each additional CHF 100 is CHF 2.97. So, 5 \* CHF 2.97 = CHF 14.85
4.  **Total Direct Federal Tax**: CHF 835.06 + CHF 14.85 = CHF 849.91

### How can I help you further ?

### Question: How do I fill in income from investments

### I will forward this question to tax declaration specialist. Stand by

### Response:

To report investment income on your Kanton Zürich tax return for 2024, you'll need to fill out the "Wertschriftenverzeichnis" (securities schedule). This schedule requires you to provide detailed information from your bank statements or the e-tax statement, which you can import into tax declaration software. Make sure to include the assets of the taxpayer, spouse, and any minor children born in 2007 or later, as indicated in the instructions. The investment income should be reported on page 4, section 30.1 of the tax declaration form, and make sure to evaluate with the current exchange rate list or add missing values.

### How can I help you further ?

### Question: Which fields do I populate with details from Lohnausweis

### I will forward this question to tax declaration specialist. Stand by

### Response:

Okay, I can help you with that! Based on the provided text, here are the fields that you need to populate with details from Lohnausweis (salary statement):

*   **1.1 Haupterwerb (Main Income)**

    *   Person 1: 100
    *   Person 2: 101
*   **1.2 Nebenerwerb (Secondary Income)**

    *   Person 1: 102
    *   Person 2: 103

### How can I help you further ?

### Good bye. Until next time

In [22]:
state["lohnausweis"]

{'f_c_description': 'AHV-NR. - No AVS - N. AVS',
 'f_c_ahv_number': '756.1111.2222.33',
 'f_c_birthday': '11.12.1990',
 'f_e_from': '01.01.2024',
 'f_e_to': '31.12.2024',
 'f_h_description': 'Name and address, Address consists of 4 lines, 1. Title, 2. Name, 3. Street, 4. Postcode and City',
 'f_h_title': 'Herr',
 'f_h_name': 'Peter Paul Rubens',
 'f_h_Street': 'Verlorenstrasse 1',
 'f_h_postcode': '8712',
 'f_h_city': 'Stäfa',
 'f_1_description': 'Lohn',
 'f_1_lohn': "80'000",
 'f_3_description': 'Unregelmässige Leistungen',
 'f_h_3_award': "2'000",
 'f_8_description': 'Bruttolohn total',
 'f_8_bruttolohn_total': "82'000",
 'f_9_description': 'Beiträge AHV',
 'f_9_beitraege_ahv': "5'000",
 'f_10_description': 'Berufliche Vorsorge',
 'f_10_1_ordentliche_beitraege': "4'000",
 'f_10_2_beitraege_fur_den_einkauf': '0',
 'f_11_description': 'Nettolohn',
 'f_11_nettolohn': "73'000",
 'f_12_description': 'Quellensteuerabzug',
 'f_12_quellensteuerabzug': "12'000"}

In [24]:
print(state["kanton_and_bundes_tax_rates"])




*** Direkt Bundes Steuer Tarife ***

**Table for Single Individuals**

|Taxable Income|Tax for 1 Year|For Each Additional CHF 100 Income|
|---|---|---|
|18300|25.41|-|
|18500|26.95|-|
|19000|30.80|-|
|20000|38.50|-|
|21000|46.20|-|
|22000|53.90|-|
|23000|61.60|-|
|24000|69.30|-|
|25000|77.00|0.77|
|26000|84.70|-|
|27000|92.40|-|
|28000|100.10|-|
|28700|105.49|-|
|29000|107.80|-|
|30600|120.12|-|
|31800|129.36|-|
|32800|137.05|-|
|32900|137.93|-|
|33500|143.21|-|
|34000|147.61|-|
|35000|156.41|-|
|36000|165.21|-|
|37000|174.01|0.88|
|38000|182.81|-|
|39000|191.61|-|
|40000|200.41|-|
|41000|209.21|-|
|42900|225.90|-|
|43000|228.54|-|
|43500|241.74|-|
|44000|254.94|-|
|45000|281.34|-|
|46000|307.74|-|
|47000|334.14|-|
|48000|360.54|-|
|49000|386.94|2.64|
|50000|413.34|-|
|52700|484.62|-|
|52800|487.26|-|
|53000|492.54|-|
|54000|518.94|-|
|55000|545.34|-|
|56000|571.74|-|
|57200|603.40|-|
|57300|606.37|-|
|57500|612.31|-|
|58000|627.16|-|
|60500|701.41|-|
|60600|704.38|2.97|
|62000|745.