In [90]:
import torch

from dotenv import load_dotenv

import chromadb
from chromadb.utils import embedding_functions
from chromadb.config import Settings


from llama_index import GPTChromaIndex, LLMPredictor, LangchainEmbedding, ServiceContext, PromptHelper, TrafilaturaWebReader, SimpleDirectoryReader
from llama_index.prompts.prompts import QuestionAnswerPrompt
from llama_index import download_loader

from llama_index.vector_stores import ChromaVectorStore

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.chat_models import ChatOpenAI



In [16]:
EMBEDDING_PERSIST_FOLDER = "/tmp/embeddings"
CHROMA_COLLECTION_NAME = "demo-collection"

In [4]:
load_dotenv()

True

In [10]:
# Check if GPU is present
torch.cuda.is_available()

True

In [145]:
QUESTION_ANSWER_PROMPT_TMPL = (
    "Context information is provided below. \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Given the context information and not prior knowledge, "
    "Please answer this question: ```{query_str}```\n"
)

In [146]:
QUESTION_ANSWER_PROMPT = QuestionAnswerPrompt(QUESTION_ANSWER_PROMPT_TMPL)

In [171]:
EMBEDDINGS_MODEL_NAME = "all-MiniLM-L6-v2"

In [172]:
embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name=EMBEDDINGS_MODEL_NAME))

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2
INFO:sentence_transformers.SentenceTransformer:Use pytorch device: cuda


In [173]:
sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=EMBEDDINGS_MODEL_NAME)

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2
INFO:sentence_transformers.SentenceTransformer:Use pytorch device: cuda


In [174]:
chroma_client = chromadb.Client(Settings(
    chroma_db_impl="duckdb+parquet",
    persist_directory=EMBEDDING_PERSIST_FOLDER
))

INFO:chromadb.telemetry.posthog:Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.
INFO:chromadb:Running Chroma using direct local API.
INFO:chromadb.db.duckdb:No existing DB found in /tmp/embeddings, skipping load
INFO:chromadb.db.duckdb:No existing DB found in /tmp/embeddings, skipping load


## Set LLM Predictor

In [175]:
llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo"))

In [56]:
# define prompt helper
# set maximum input size
max_input_size = 2048
# set number of output tokens
num_output = 256
# set maximum chunk overlap
max_chunk_overlap = 20

In [57]:
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)

In [176]:
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model,
                                               prompt_helper=prompt_helper)


### Generate index - collection of pdfs

In [177]:
from pathlib import Path

In [178]:
FOLDER_PATH = '../sample_data/ato'
COLLENTION_NAME = "sample-ato-documents"

# FOLDER_PATH = '../sample_data/standards'
# COLLENTION_NAME = "sample-standards"

In [179]:
chroma_collection = chroma_client.get_or_create_collection(COLLENTION_NAME, embedding_function=sentence_transformer_ef)

In [180]:

PDFReader = download_loader("PDFReader")

In [181]:
# Scan file folder for .pdf
root = Path(FOLDER_PATH)
paths = [path for path in root.glob("*.pdf")]
print(paths)


[PosixPath('../sample_data/ato/ato-capital-gain-tax.pdf'), PosixPath('../sample_data/ato/ato-dividends.pdf')]


In [182]:

index = None

for path in paths:
    loader = PDFReader()
    print(f"Loading {path}")
    documents = loader.load_data(file=path)
    
    if index is None:
        print("Creating a new index.")
        index = GPTChromaIndex.from_documents(documents, chroma_collection=chroma_collection,  service_context=service_context)
    else:
        print("Indesring into existing index")
        index.insert(documents[0])

Loading ../sample_data/ato/ato-capital-gain-tax.pdf
Creating a new index.


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.65it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 36.09it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 32.81it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 66.65it/s]
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 116862 tokens


Loading ../sample_data/ato/ato-dividends.pdf
Indesring into existing index


Batches: 100%|██████████| 1/1 [00:00<00:00, 44.47it/s]
INFO:llama_index.token_counter.token_counter:> [insert] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [insert] Total embedding token usage: 19468 tokens


### Example questions - standards

In [118]:
r = index.query("Under what conditions fatigue assessment is not required?", text_qa_template=QUESTION_ANSWER_PROMPT)

INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 4334 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 9 tokens


In [119]:
print(r)

Fatigue assessment is not required for a member, connection, or detail if the normal and shear design stress ranges (f*) satisfy the condition f* < φ × 27 MPa or if the number of stress cycles (nsc) satisfies the condition nsc < 2 ×.


In [124]:
r.source_nodes[0].node.extra_info

{'document_id': 'ca47d0b5-a61b-48f9-99f8-9638141bfe08'}

In [130]:
print(r.source_nodes[0].node.get_text())

document_id: ca47d0b5-a61b-48f9-99f8-9638141bfe08

as determined by the analysis
∆cw =  m i d - s p a n  d e f l e c t i o n  o f  t h e  m e m b e r  r e s u l t i n g  f r o m  t h e  t r a n s v e r s e  l o a d i n g
together with only those end bending moments which produce a mid-span 
deflection in the same direction as the transverse load 
4.4.2.3    Moment amplification for a sway member 
For a sway member, the design bending moment ( M*) shall be calculated using either the 
method given in this Clause, or the method given in Appendix F. 
For this Clause, the design bending moment ( M*) shall be calculated as follows: 
M* = δm *
mM 
The moment amplification factor ( δm) shall be taken as the greater of— 
δb =  t h e  m o m e n t  a m p l i f i c a t i o n  f a c t o r  f o r  a  b r a c e d  m e m b e r  d e t e r m i n e d  i n  
accordance with Clause 4.4.2.2, and Licensed to Mrs Maria Lapchev on 10 September 2015. 1 user personal license only. Copying, copy/pasting, storage

In [135]:
r = index.query("In what cases to perform plastic vs elastic analysis?", text_qa_template=QUESTION_ANSWER_PROMPT)

INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 4405 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 10 tokens


In [137]:
print(r)

The provided context is related to the calculation of the nominal shear capacity of a web in a structural member. If the web has a non-uniform shear stress distribution, such as in a member with unequal flanges, varying web thickness or holes not used for fasteners, a rational elastic analysis should be performed to determine the maximum and average design shear stresses in the web. The nominal shear capacity of the web shall then be calculated using the equation provided in Clause 5.11.3. However, if the maximum web panel depth to thickness ratio dp/tw satisfies the condition specified in Clause 5.11.5, plastic analysis should be performed instead to determine the nominal shear buckling capacity of the web.


### Example questions - ATO

In [97]:
r = index.query("Who is eligible for CGT discount?", text_qa_template=QUESTION_ANSWER_PROMPT)

INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 4802 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 8 tokens


In [103]:
print(r)

The CGT discount applies to individuals who meet the 12-month ownership requirement by disposing of shares after 12 months or more of acquiring them, and those who received new interests in a demerged entity that relate to pre-CGT interests in the head entity and acquired their new interests without a CGT event happening to their original interests. Additionally, individuals may choose to make a capital loss if they hold a share or financial instrument relating to a company that went into liquidation or administration, and a liquidator or administrator of the company made a written declaration that they believed the shares were worthless or the financial instruments had no value or negligible value. Trust non-assessable payments may also be subject to CGT, and if an individual receives a non-assessable payment from a trust, CGT event E4 may occur, which may require cost base adjustments to units or trust interests. However, the eligibility for CGT discount in this context depends on th

In [183]:
r = index.query("Can I get CGT discount if I sell shares 11 month after buying them?", text_qa_template=QUESTION_ANSWER_PROMPT)

Batches: 100%|██████████| 1/1 [00:00<00:00, 89.12it/s]
INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 4227 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 16 tokens


In [184]:
print(r)

The new context provided is related to non-assessable payments made by trusts and their impact on CGT events. While it provides useful information, it doesn't directly answer the question of whether CGT discount can be claimed if shares are sold 11 months after buying them. Therefore, the original answer still stands: No, you need to hold the shares for at least 12 months to be eligible for the CGT discount. Trust non-assessable payments (CGT event E4) may have different rules and requirements, but they are not directly related to the question of whether CGT discount can be claimed if shares are sold 11 months after buying them.


In [185]:
print(r.source_nodes[0].node.text)

months to be 
eligible for the CGT discount
.
Example: CGT discount eligibility for new interests
You received BHP Steel Ltd shares under the demerger on 22 July 2002.
These related to shares you acquired in BHP Billiton Ltd on 15 August 2001.
You meet the 12-month ownership requirement for the CGT discount if you
dispose of the shares after 15 August 2002 – that is, 12 months or more
after the date you acquired the BHP Billiton shares.
However, you calculate the 12 months from the date of demerger if you either:
did not choose the rollover and you received new interests in the demerged
134 of 253
entity that relate to pre-CGT interests in the head entity
acquired your new interests without a CGT event happening to your original
interests.
Example: CGT discount eligibility for pre-CGT shares
You received BHP Steel Ltd shares under the demerger on 22 July 2002.
The shares related to pre-CGT shares you owned in BHP Billiton Ltd and
you did not choose a rollover.
You meet the 12-month own

In [186]:
r = index.query("Can I get CGT discount for dividends?", text_qa_template=QUESTION_ANSWER_PROMPT)

Batches: 100%|██████████| 1/1 [00:00<00:00, 45.70it/s]
INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 4068 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 9 tokens


In [187]:
print(r)

The additional context provided is related to non-assessable payments made by trusts and the potential implications for capital gains tax (CGT) events. However, it does not directly address the question of whether CGT discount can be claimed for dividends. Therefore, the original answer remains the same.


In [188]:
print(r.source_nodes[0].node.text)

months to be 
eligible for the CGT discount
.
Example: CGT discount eligibility for new interests
You received BHP Steel Ltd shares under the demerger on 22 July 2002.
These related to shares you acquired in BHP Billiton Ltd on 15 August 2001.
You meet the 12-month ownership requirement for the CGT discount if you
dispose of the shares after 15 August 2002 – that is, 12 months or more
after the date you acquired the BHP Billiton shares.
However, you calculate the 12 months from the date of demerger if you either:
did not choose the rollover and you received new interests in the demerged
134 of 253
entity that relate to pre-CGT interests in the head entity
acquired your new interests without a CGT event happening to your original
interests.
Example: CGT discount eligibility for pre-CGT shares
You received BHP Steel Ltd shares under the demerger on 22 July 2002.
The shares related to pre-CGT shares you owned in BHP Billiton Ltd and
you did not choose a rollover.
You meet the 12-month own

In [189]:
result = index.query("What is a difference between franked and unfranked dividends?", text_qa_template=QUESTION_ANSWER_PROMPT)

Batches: 100%|██████████| 1/1 [00:00<00:00, 45.93it/s]
INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 4159 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 12 tokens


In [190]:
print(result)

The additional context provided is not relevant to the original question about the difference between franked and unfranked dividends. Therefore, the original answer still stands: Franked dividends have a franking credit attached to them, while unfranked dividends do not have a franking credit attached to them.


In [159]:
print(result.source_nodes[0].node.text)

to dividends. A non-share dividend may be franked or unfranked. Any
amount of the dividend, whether franked or unfranked, or any amount of franking
credit carried by the dividend should be shown at the appropriate item on your tax
return as if it were a dividend paid on shares.
Dividends on non-equity shares
Under the imputation system, dividends paid on certain shares that are classified as
non-equity shares (for example, some redeemable preference shares) are treated
as unfrankable distributions for imputation purposes. As a consequence, these
dividends cannot be franked.
For more information, see our 
Guide to the debt and equity tests
.
Continue to
Dividend or distribution statement
Dividend or distribution statement
https://www.ato.gov.au/Individuals/Tax-return/2022/In-detail/Publications/You-
and-your-shares-2022/?page=7
Last modified: 26 May 2022
QC 68032
If an Australian company pays or credits you with a dividend or a non-share
dividend, the company must also send you a divide

In [160]:
len(result.source_nodes)

1

In [193]:
result = index.query("What is a difference between dividends and distributions?", text_qa_template=QUESTION_ANSWER_PROMPT)

Batches: 100%|██████████| 1/1 [00:00<00:00, 45.16it/s]
INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 4474 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 9 tokens


In [194]:
print(result)


Dividends and distributions are terms used interchangeably to refer to payments made to shareholders by a company. If you own shares in a company, you may receive a dividend or distribution. Dividends can be fully franked, meaning that the whole amount of the dividend carries a franking credit, or partially franked, meaning that the dividend has a franked amount and an unfranked amount. The dividend statement or distribution statement you receive from the company paying the franked dividend must state the amount of the franking credit and the amounts of the franked and unfranked parts of the dividend. On the other hand, if a company pays or credits you with an unfranked dividend, there is no franking credit attached to it. The imputation system applies to non-share dividends in the same way that it applies to dividends. A non-share dividend may be franked or unfranked, and any amount of the dividend, whether franked or unfranked, or any amount of franking credit carried by the dividend

In [196]:
print(result.source_nodes[0].node.text)

You and your shares 2022
https://www.ato.gov.au/Individuals/Tax-return/2022/In-detail/Publications/You-
and-your-shares-2022/
Last modified: 26 May 2022
QC 68032
You and your shares 2022
Our You and your shares guide provides information on the income you need to
declare, deductions and credits you can claim and records you need to keep if you
hold shares or convertible notes as an investment (NAT 2632).
In this guide
How to get this publication
About this guide
Basic concepts
Paying dividends or distributions
How dividends are taxed
How non-share dividends are taxed
Dividend or distribution statement
Taxation implications
Effect on tax payable
Your franking tax offset
When you are not entitled to claim a franking tax offset
Allowable deductions from dividend income
Dividends paid or credited by non-resident companies
Dividends paid or credited to non-resident shareholders
Claiming franking credits attached to a partnership distribution
Claiming franking credits attached to a trust dis

In [206]:
result = index.query("WHat is exempt from capital gain tax?", text_qa_template=QUESTION_ANSWER_PROMPT)

Batches: 100%|██████████| 1/1 [00:00<00:00, 58.00it/s]
INFO:llama_index.token_counter.token_counter:> [query] Total LLM token usage: 4734 tokens
INFO:llama_index.token_counter.token_counter:> [query] Total embedding token usage: 9 tokens


In [207]:
print(result)

The market valuation for tax purposes is crucial when calculating capital gains tax. It should be objective, accurate, and supported by evidence. The tax authority provides a guide called "Market Valuation for Tax Purposes" that explains the principles and processes for establishing a market value for tax purposes, their expectations, and the most common valuations for tax. Certain assets may be exempt from capital gains tax, such as a property transferred under a relationship breakdown rollover or a main residence exemption. However, eligibility and the extent of exemption may depend on various factors such as the period of ownership and use of the property. It is important to seek professional advice if you are unsure how the information applies to your circumstances.


In [208]:
print(result.source_nodes[0].node.text)

modified: 01 Jul 2022
QC 66066
If a property was the home of you or your spouse, check if it is exempt from CGT.
On this page
Relationship breakdown rollover and main residence exemption
Eligibility
Calculating a partial exemption
Nominating a main residence
Foreign residents
Property transferred from a company or trust
Relationship breakdown rollover and main residence
exemption
When you sell a property that transferred to you under the relationship breakdown
rollover, you may be eligible for the 
main residence exemption
 from capital gains tax
(CGT).
You need to consider how you and your former spouse used the property during
your combined period of ownership
Eligibility
Under the relationship breakdown rollover, there was no capital gain or loss for
CGT purposes when your former spouse's share of the property transferred to you.
CGT was deferred, or 'rolled over', until you dispose of the property.
If the property was the 
main residence
 of you or your former spouse, you can
gener