# Part 3 - Llama 2 + RAG

## Shruti Badrinarayanan, Poojitha Venkatram

In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
# @title Install dependencies
!pip install llama-index transformers accelerate bitsandbytes pypdf llama-index-llms-huggingface llama-index-embeddings-huggingface torchaudio torchdata torchtext torchvision datasets

Collecting llama-index
  Downloading llama_index-0.10.34-py3-none-any.whl (6.9 kB)
Collecting accelerate
  Downloading accelerate-0.30.0-py3-none-any.whl (302 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.4/302.4 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl (119.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pypdf
  Downloading pypdf-4.2.0-py3-none-any.whl (290 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.4/290.4 kB[0m [31m35.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting llama-index-llms-huggingface
  Downloading llama_index_llms_huggingface-0.1.5-py3-none-any.whl (10 kB)
Collecting llama-index-embeddings-huggingface
  Downloading llama_index_embeddings_huggingface-0.2.0-py3-none-any.whl (7.1 kB)
Collecting datasets
  Downloading dat

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
# @title Load LLM
llm = HuggingFaceLLM(
    model_name="meta-llama/Llama-2-13b-chat-hf",
    tokenizer_name="meta-llama/Llama-2-13b-chat-hf",
    query_wrapper_prompt=PromptTemplate("<|system|>\n\n<|user|>\n{query_str}\n<|assistant|>\n"),
    context_window=4096,
    max_new_tokens=512,
    model_kwargs={"quantization_config": quantization_config},
    tokenizer_kwargs={"use_fast": False},
    generate_kwargs={"temperature": 0.3, "top_k": 50, "top_p": 0.95},
    messages_to_prompt=messages_to_prompt,
    device_map="auto",
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
# @title Service Context
service_context = ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-large-en-v1.5")

In [None]:
# %pwd

'/app/Part 3 - llama'

In [None]:
# @title Setup query engine.
from llama_index.core.retrievers import VectorIndexRetriever

documents = SimpleDirectoryReader(input_dir="/content/drive/MyDrive/Lab 2/Part 3/Part 3 - llama/pdf/").load_data()
vector_index = VectorStoreIndex.from_documents(
    documents, service_context=service_context
)

query_engine = vector_index.as_query_engine(response_mode="simple_summarize")

In [None]:
# @title Setup Questions, Ground Truths, Rag Answer Lists.
questions = [
    "A company produces 500 tons of plastic packaging annually and is subject to an Extended Producer Responsibility (EPR) scheme with a recovery target of 80%. How many tons of plastic packaging should the company aim to recover each year to meet the EPR requirement?",
    "What is the report quarter and when did it end?",
    "What are the results for the reporting quarter? Please describe in the following order using bullet points - revenue, gross margin, opex, op margin, net income, and EPS. Include both GAAP and non-GAAP numbers. Please also include quarter-over-quarter changes.",
    "If a municipality introduces a pay-as-you-throw program as part of its EPR strategy, charging households $0.05 per pound of collected waste, what is the cost for a household that generates 30 pounds per week?",
    "What's the OP margin for the current quarter?",
    "Identify potential risks associated with EPR implementation, such as regulatory changes, market volatility, and technological disruptions. Develop a risk assessment framework that assigns probabilities and impacts to each identified risk factor. Propose risk mitigation strategies for minimizing the adverse effects of these risks on EPR programs and stakeholder interests.",
    "What were Gap Inc's net sales for the first quarter of fiscal 2023 and how did they compare to the previous year?",
    "How did merchandise margin change from the previous year, and what were the drivers of this change?",
    "Did Gap Inc. have an operating profit or loss, and if so, what was the amount?",
    "What were the key results from each of Gap Inc.'s brands (Old Navy, Gap, Banana Republic, and Athleta)?",
    "What is the company's outlook on net sales for the second quarter and the full fiscal year 2023?",
    "How much cash and cash equivalents did Gap, Inc. have at the end of the quarter?",
    "How does the company expect gross margin to change in Q2 and the full year compared to 2022?",
    "Did Gap Inc. pay a dividend in the first quarter, and what is their dividend strategy going forward?",
    "What are the key actions Gap Inc. is taking to execute its plan?"
]

ground_truths = [
    "EPR Recovery Target Calculation:\n To meet the recovery target of 80%, the company should aim to recover 80% of 500 tons, which is:\n Recovery Target=80%×500 tons = 0.80×500 tons=400 tons\n So, the company should aim to recover 400 tons of plastic packaging each year.",  # Q1
    "The reported quarter is the first financial quarter of 2023. The quarter ended on April 29, 2023",  # Q2
    "The Results for the reporting quarter are:\n 1. Revenue: GAAP revenue was $1,214 million (up 3% YoY and 2% QoQ), while non-GAAP revenue stood at $1,200 million (up 3% YoY and 2% QoQ).\n2. Gross Margin: GAAP gross margin was 37.1% (down 0.3% YoY and flat QoQ), non-GAAP was 36.6% (down 0.4% YoY and flat QoQ).\n 3. Operating Expenses (Opex): GAAP operating expenses were $714 million (up 2% YoY and 1% QoQ), non-GAAP expenses were $687 million (up 2% YoY and 1% QoQ).\n 4. Operating Margin: GAAP operating margin came in at 13.1% (down 0.2% YoY and flat QoQ), and non-GAAP was 12.5% (down 0.3% YoY and flat QoQ).\n 5. Net Income: GAAP net income was $18 million (down 62% YoY and 85% QoQ), non-GAAP net income was $3 million (down 67% YoY and 88% QoQ).\n 6. EPS: GAAP EPS was $0.05 per share (down 62% YoY and 85% QoQ), non-GAAP EPS was $0.01 per share (down 67% YoY and 88% QoQ)."    "A household generating 30 pounds of waste per week would incur a cost of $1.50 weekly under a pay-as-you-throw program charging $0.05 per pound .",  # Q4
    "Reported operating margin was -0.3%. Adjusted operating margin was 0.5%.",  # Q5
    "Potential Risks in EPR Implementation: \n * Regulatory Changes: High probability (0.7), high impact (0.8) \n * Market Volatility: Moderate probability (0.6), moderate impact (0.5) \n * Technological Disruptions: Low probability (0.4), low impact (0.3) \n Risk Mitigation Strategies: \n * Regulatory Changes: Proactively monitor changes, maintain open dialogue, design flexible programs. \n * Market Volatility: Offer diverse EPR portfolio, implement risk management. \n * Technological Disruptions: Prioritize R&D, create contingency plans. \n Key Takeaway: Risk assessment and proactive mitigation are crucial for successful EPR programs.",  # Q6
    "Gap Inc's net sales for Q1 fiscal 2023 were $3.28 billion, down 6% from last year which is inclusive of an estimated 1-point foreign exchange headwind and 2 percentage points of negative impact from the sale of Gap China.",  # Q7
    "Merchandise margin increased 600 basis points versus last year, or 610 basis points on an adjusted basis, due to lower air freight expense and improved promotional activity in the quarter, partially offset by inflationary cost headwinds.",  # Q8
    "Reported operating loss of $10 million. Adjusted operating income was $18 million.",  # Q9
    "The key results for Gap Inc.'s brands in the first quarter are the following:\n* Old Navy: $1.8 billion in revenue, sales down 1%.\n* Gap: $692 million in revenue, sales down 13%.\n* Banana Republic: $432 million in revenue, sales down 10%.\n* Athleta: $321 million in revenue, sales down 11%.",  # Q10
    "Second quarter net sales expected to decrease mid to high-single digits. Fiscal 2023 net sales expected to decrease low to mid-single digits.",  # Q11
    "Gap Inc. had $1.2 billion in cash and cash equivalents at the end of the quarter.",  # Q12
    "Gross margin is expected to expand in Q2 and the full year compared to the prior year.",  # Q13
    "Paid a dividend of $0.15 per share in the first quarter. The Board approved the same for the second quarter.",  # Q14
    "Key actions include driving critical change for consistent long-term results, focusing on customer experience, reducing inventory, and strengthening the balance sheet."  # Q15
]

rag_answers = []

In [None]:
# import torch
# torch.cuda.empty_cache()

In [None]:
# @title Question 1
response_object = query_engine.query(questions[0])
print("Q1: ", questions[0], "\n")
display_response(response_object)
# rag_answers.append(response_object.response)

Q1:  A company produces 500 tons of plastic packaging annually and is subject to an Extended Producer Responsibility (EPR) scheme with a recovery target of 80%. How many tons of plastic packaging should the company aim to recover each year to meet the EPR requirement? 



**`Final Response:`** To calculate the amount of plastic packaging the company should aim to recover each year, we need to use the following formula:

Recovery target (in tons) = Total production (in tons) x Recovery rate (as a percentage)

In this case, the total production is 500 tons, and the recovery target is 80%. Therefore, the recovery target in tons is:

Recovery target (in tons) = 500 tons x 80% = 400 tons

So, the company should aim to recover 400 tons of plastic packaging each year to meet the EPR requirement.

In [None]:
# @title Question 2
response_object = query_engine.query(questions[1])
print("Q2: ", questions[1], "\n")
display_response(response_object)
# rag_answers.append(response_object.response)

Q2:  What is the report quarter and when did it end? 



**`Final Response:`** The report quarter is the first quarter of fiscal year 2023, and it ended on April 29, 2023.

In [None]:
# @title Question 3
response_object = query_engine.query(questions[2])
print("Q3: ", questions[2], "\n")
display_response(response_object)
# rag_answers.append(response_object.response)

Q3:  What are the results for the reporting quarter? Please describe in the following order using bullet points - revenue, gross margin, opex, op margin, net income, and EPS. Include both GAAP and non-GAAP numbers. Please also include quarter-over-quarter changes. 



**`Final Response:`** Based on the provided context information, here are the results for the reporting quarter:

Revenue:

* GAAP: $1,214 million (down 10% QoQ)
* Non-GAAP: $1,200 million (down 10% QoQ)

Gross Margin:

* GAAP: $371 million (down 3% QoQ)
* Non-GAAP: $366 million (down 3% QoQ)

Operating Expenses:

* GAAP: $714 million (down 11% QoQ)
* Non-GAAP: $697 million (down 11% QoQ)

Operating Margin:

* GAAP: 37.1% (down 3.4% QoQ)
* Non-GAAP: 36.6% (down 3.4% QoQ)

Net Income:

* GAAP: $18 million (down 87% QoQ)
* Non-GAAP: $5 million (down 92% QoQ)

EPS:

* GAAP: $0.05 per share (down 87% QoQ)
* Non-GAAP: $0.01 per share (down 92% QoQ)

Note: All quarter-over-quarter changes are based on the provided context information and may not accurately reflect the actual performance of the company.

In [None]:
# @title Question 4
response_object = query_engine.query(questions[3])
print("Q4: ", questions[3], "\n")
display_response(response_object)
# rag_answers.append(response_object.response)

Q4:  If a municipality introduces a pay-as-you-throw program as part of its EPR strategy, charging households $0.05 per pound of collected waste, what is the cost for a household that generates 30 pounds per week? 



**`Final Response:`** The cost for a household that generates 30 pounds per week under a pay-as-you-throw program would be $1.50 per week (30 pounds x $0.05 per pound).

In [None]:
# @title Question 5
response_object = query_engine.query(questions[4])
print("Q5: ", questions[4], "\n")
display_response(response_object)
# rag_answers.append(response_object.response)

Q5:  What's the OP margin for the current quarter? 



**`Final Response:`** The given text does not mention the current quarter's OP margin. However, based on the information provided, we can calculate the OP margin for the first quarter of fiscal year 2023.

According to the text, the gross profit was $1,214 million, and the operating expenses were $1,224 million, resulting in an operating loss of $10 million. The operating margin is calculated as follows:

Operating margin = (Gross profit - Operating expenses) / Gross profit

Operating margin = ($1,214 million - $1,224 million) / $1,214 million

Operating margin = -0.3%

Therefore, the OP margin for the first quarter of fiscal year 2023 was -0.3%.

In [None]:
# @title Question 6
response_object = query_engine.query(questions[5])
print("Q6: ", questions[5], "\n")
display_response(response_object)
# rag_answers.append(response_object.response)

Q6:  Identify potential risks associated with EPR implementation, such as regulatory changes, market volatility, and technological disruptions. Develop a risk assessment framework that assigns probabilities and impacts to each identified risk factor. Propose risk mitigation strategies for minimizing the adverse effects of these risks on EPR programs and stakeholder interests. 



**`Final Response:`** Based on the context information provided, potential risks associated with EPR implementation include regulatory changes, market volatility, and technological disruptions. To assess these risks, a risk assessment framework can be developed that assigns probabilities and impacts to each identified risk factor. The following is a list of potential risk factors and their associated probabilities and impacts:

1. Regulatory changes:
	* Probability: 0.7 (high chance of occurrence)
	* Impact: 0.8 (high impact on EPR programs and stakeholder interests)
2. Market volatility:
	* Probability: 0.6 (moderate chance of occurrence)
	* Impact: 0.5 (moderate impact on EPR programs and stakeholder interests)
3. Technological disruptions:
	* Probability: 0.4 (low chance of occurrence)
	* Impact: 0.3 (low impact on EPR programs and stakeholder interests)

To mitigate the adverse effects of these risks on EPR programs and stakeholder interests, the following risk mitigation strategies can be proposed:

1. Regulatory changes:
	* Monitor regulatory changes and engage with regulatory bodies to ensure compliance.
	* Develop flexible and adaptable EPR programs that can respond to changing regulatory requirements.
2. Market volatility:
	* Diversify EPR program offerings to reduce dependence on any single revenue stream.
	* Develop risk management strategies to mitigate the impact of market volatility on EPR programs.
3. Technological disruptions:
	* Invest in research and development to stay ahead of technological changes and ensure the relevance of EPR programs.
	* Develop contingency plans to address potential technological disruptions and ensure the continuity of EPR programs.

By using this risk assessment framework and implementing these risk mitigation strategies, stakeholders can minimize the adverse effects of potential risks associated with EPR implementation and ensure the success of EPR programs.

In [None]:
# @title Question 7
response_object = query_engine.query(questions[6])
print("Q7: ", questions[6], "\n")
display_response(response_object)
# rag_answers.append(response_object.response)

Q7:  What were Gap Inc's net sales for the first quarter of fiscal 2023 and how did they compare to the previous year? 



**`Final Response:`** According to the context information, Gap Inc's net sales for the first quarter of fiscal 2023 were $3.28 billion, which is a 6% decrease compared to the same period last year. This is inclusive of an estimated 1-point foreign exchange headwind and 2 percentage points of negative impact from the sale of Gap China.

In [None]:
# @title Question 8
response_object = query_engine.query(questions[7])
print("Q8: ", questions[7], "\n")
display_response(response_object)
rag_answers.append(response_object.response)

Q8:  How did merchandise margin change from the previous year, and what were the drivers of this change? 



**`Final Response:`** The context information provided is a financial report for a company's first quarter of fiscal year 2023. The report includes information about the company's net sales, comparable sales, and merchandise margin. Based on the information provided, the merchandise margin increased by 600 basis points or 610 basis points on an adjusted basis compared to the previous year. The drivers of this change were lower air freight expense and improved promotional activity, partially offset by inflationary cost headwinds.

In [None]:
# @title Question 9
response_object = query_engine.query(questions[8])
print("Q9: ", questions[8], "\n")
display_response(response_object)
# rag_answers.append(response_object.response)

Q9:  Did Gap Inc. have an operating profit or loss, and if so, what was the amount? 



**`Final Response:`** According to the information provided, Gap Inc. had an operating loss of $10 million.

In [None]:
# @title Question 10
response_object = query_engine.query(questions[9])
print("Q10: ", questions[9], "\n")
display_response(response_object)
# rag_answers.append(response_object.response)

Q10:  What were the key results from each of Gap Inc.'s brands (Old Navy, Gap, Banana Republic, and Athleta)? 



**`Final Response:`** Here are the key results from each of Gap Inc.'s brands in the first quarter:

* Old Navy: Revenue was $1.8 billion, sales down 1%.
* Gap: $692 million, down 13%.
* Banana Republic: $432 million, down 10%.
* Athleta: $321 million, down 11%.

Please let me know if you have any other questions or if there's anything else I can help you with.

In [None]:
# @title Question 11
response_object = query_engine.query(questions[10] + "Answer in 2 lines.")
print("Q11: ", questions[10], "\n")
display_response(response_object)
# rag_answers.append(response_object.response)

Q11:  What is the company's outlook on net sales for the second quarter and the full fiscal year 2023? 



**`Final Response:`** The company's outlook for net sales in the second quarter is in the mid to high single digit range compared to last year's net sales of $3.86 billion.
For the full fiscal year 2023, the company anticipates net sales could decrease in the low to mid single digit range compared to last year's net sales of $15.6 billion.

In [None]:
# @title Question 12
response_object = query_engine.query(questions[11])
print("Q12: ", questions[11], "\n")
display_response(response_object)
# rag_answers.append(response_object.response)

Q12:  How much cash and cash equivalents did Gap, Inc. have at the end of the quarter? 



**`Final Response:`** According to the information provided, Gap, Inc. had $1,198 million in cash and cash equivalents at the end of the quarter.

In [None]:
# @title Question 13
response_object = query_engine.query(questions[12])
print("Q13: ", questions[12], "\n")
display_response(response_object)
# rag_answers.append(response_object.response)

Q13:  How does the company expect gross margin to change in Q2 and the full year compared to 2022? 



**`Final Response:`** The company expects gross margin expansion in Q2 and the full year compared to 2022.

In [None]:
# @title Question 14
response_object = query_engine.query(questions[13])
print("Q14: ", questions[13], "\n")
display_response(response_object)
# rag_answers.append(response_object.response)

Q14:  Did Gap Inc. pay a dividend in the first quarter, and what is their dividend strategy going forward? 



**`Final Response:`** Gap Inc. did pay a dividend in the first quarter of $0.15 per share, and the Board has approved the same for the second quarter. Therefore, the dividend strategy going forward is to continue paying a quarterly dividend of $0.15 per share.

In [None]:
# @title Question 15
response_object = query_engine.query(questions[14])
print("Q15: ", questions[14], "\n")
display_response(response_object)
# rag_answers.append(response_object.response)

Q15:  What are the key actions Gap Inc. is taking to execute its plan? 



**`Final Response:`** Gap Inc. is taking actions such as delivering improvements to the business, share gains at Old Navy and Gap Brand, adjusted operating margin expansion, reduction in inventory, and strength in its balance sheet.

In [None]:
# rag_answers.append(response_object.response)
# len(rag_answers)

# for i in range(len(rag_answers)):
#   print(rag_answers[i], "\n\n\n")

In [None]:
generated_questions = [
    [
        "If a company produces 500 tons of plastic annually and is part of an EPR scheme with an 80% recovery goal, how much plastic should it recover each year?",
        "For a company under an EPR scheme with an 80% target and an annual production of 500 tons of plastic, what is the required recovery amount?",
        "What is the annual recovery target in tons for a company producing 500 tons of plastic under an 80% EPR scheme?",
        "How many tons of plastic must a company recycle annually if it produces 500 tons and is subject to an 80% recovery EPR scheme?"
    ],
    [
        "What quarter is being reported on and when did it conclude?",
        "Identify the reporting quarter and its end date.",
        "Which fiscal quarter does the report cover and on what date did it end?",
        "Can you specify the reported quarter and its closing date?"
    ],
    [
        "List the financial results for the reported quarter in the order of revenue, gross margin, operating expenses, operating margin, net income, and EPS, including both GAAP and non-GAAP figures and their changes from the previous quarter.",
        "Provide the financial outcomes for the quarter, detailing revenue, gross margin, operating expenses, operating margin, net income, and EPS in both GAAP and non-GAAP terms, along with the changes from the last quarter.",
        "Summarize the quarter's financials in the sequence of revenue, gross margin, operating expenses, operating margin, net income, and EPS, showing both GAAP and non-GAAP values and the quarter-over-quarter variations.",
        "Detail the financial results for the quarter, focusing on revenue, gross margin, operating expenses, operating margin, net income, and EPS, including comparisons of GAAP and non-GAAP figures to the previous quarter."
    ],
    [
        "What would be the weekly cost for a household producing 30 pounds of waste under a pay-as-you-throw program charging $0.05 per pound?",
        "Calculate the weekly expense for a household disposing of 30 pounds of waste weekly with a fee of $0.05 per pound under a pay-as-you-throw scheme.",
        "Under a pay-as-you-throw system charging $0.05 per pound, how much would a household pay weekly for 30 pounds of waste?",
        "If a pay-as-you-throw program costs $0.05 per pound, what is the weekly cost for a household generating 30 pounds of waste?"
    ],
    [
        "What is the operating margin for the most recent quarter?",
        "Can you provide the operating margin figure for the latest quarter?",
        "What was the operating margin in the latest financial quarter?",
        "Determine the operating margin for the current quarter."
    ],
    [
        "Outline a risk assessment framework for EPR implementation, including probability and impact of risks like regulatory changes and market volatility, and suggest strategies to mitigate these risks.",
        "Create a risk management plan for EPR programs, assessing the likelihood and impact of risks such as regulatory shifts and technological disruptions, and recommend mitigation tactics.",
        "Develop a framework to assess and manage risks in EPR implementation, focusing on regulatory, market, and technological factors, and propose strategies to reduce their negative impacts.",
        "Construct a risk evaluation and mitigation strategy for implementing EPR, considering risks like regulatory changes and market instability, and offer solutions to minimize these risks."
    ],
    [
        "What were the net sales of Gap Inc. in the first quarter of fiscal 2023, and how did this compare to the same period last year?",
        "Report on Gap Inc.'s net sales for the first fiscal quarter of 2023 and their year-over-year comparison.",
        "How much did Gap Inc. make in net sales during the first quarter of fiscal 2023, and what was the difference from the previous year?",
        "Detail Gap Inc.'s net sales for Q1 of fiscal 2023 and compare them to the net sales from the first quarter of the previous year."
    ],
    [
        "How did the merchandise margin for Gap Inc. change compared to last year, and what influenced this change?",
        "What was the year-over-year change in merchandise margin for Gap Inc., and what were the main factors driving this change?",
        "Describe the change in Gap Inc.'s merchandise margin from the previous year and identify the key drivers of this change.",
        "Explain how Gap Inc.'s merchandise margin shifted from the previous year and the factors that contributed to this shift."
    ],
    [
        "Did Gap Inc. report an operating profit or loss for the period, and what was the amount?",
        "What was Gap Inc.'s operating result, profit or loss, for the period and the specific figure?",
        "For the reported period, did Gap Inc. have an operating profit or a loss, and what was the total?",
        "State whether Gap Inc. had an operating profit or loss during the period and provide the exact figure."
    ],
    [
        "What were the primary results for each of Gap Inc.'s brands including Old Navy, Gap, Banana Republic, and Athleta?",
        "Summarize the key financial outcomes for each of Gap Inc.'s brands: Old Navy, Gap, Banana Republic, and Athleta.",
        "Detail the significant results for Gap Inc.'s brands—Old Navy, Gap, Banana Republic, and Athleta.",
        "Report on the main financial results for each of Gap Inc.'s brands: Old Navy, Gap, Banana Republic, and Athleta."
    ],
    [
        "What are Gap Inc.'s sales projections for the second quarter and the entire fiscal year of 2023?",
        "What is Gap Inc.'s forecast for net sales in Q2 and for the full fiscal year 2023?",
        "How does Gap Inc. anticipate its net sales to perform in the second quarter and throughout fiscal year 2023?",
        "What sales expectations does Gap Inc. have for the second quarter and for the entirety of fiscal year 2023?"
    ],
    [
        "How much cash and equivalents did Gap Inc. possess at the end of the recent quarter?",
        "What was the total amount of cash and cash equivalents held by Gap Inc. at the quarter's end?",
        "At the close of the quarter, what was the balance of cash and cash equivalents for Gap Inc.?",
        "What amount of cash and cash equivalents did Gap Inc. report at the conclusion of the quarter?"
    ],
    [
        "How is Gap Inc. expecting its gross margin to vary in the second quarter and throughout the full year compared to 2022?",
        "What are Gap Inc.'s expectations for changes in gross margin for Q2 and the full year relative to 2022?",
        "In what way does Gap Inc. foresee its gross margin changing in the second quarter and over the full year compared to last year?",
        "What projections does Gap Inc. have regarding the change in gross margin for the second quarter and the entire year as compared to 2022?"
    ],
    [
        "Did Gap Inc. issue a dividend in the first quarter, and what are their future plans regarding dividends?",
        "Was there a dividend paid by Gap Inc. in the first quarter, and what is their strategy for dividends moving forward?",
        "In the first quarter, did Gap Inc. distribute dividends, and what is their dividend policy for the future?",
        "Has Gap Inc. paid out a dividend in the first quarter, and what does their dividend strategy look like going forward?"
    ],
    [
        "What strategic actions is Gap Inc. taking to implement its business plan?",
        "What are the key steps Gap Inc. is undertaking to fulfill its strategic plan?",
        "Identify the main actions being taken by Gap Inc. to execute its business strategy.",
        "What are the principal measures Gap Inc. is implementing to carry out its strategic objectives?"
    ]
]

In [None]:
import pandas as pd
# To dict
data = {
    "question": questions,
    "ground_truths": ground_truths,
    "rag_answer": rag_answers,
    "generated_questions": generated_questions
}

# Convert dict to dataset
df = pd.DataFrame(data)

In [None]:
# df.to_csv('/content/drive/MyDrive/Lab 2/Part 3/Part 3 - llama/output_llama.csv', index=False)

In [3]:
import pandas as pd
df = pd.read_csv('/content/drive/MyDrive/Lab 2/Part 3 - llama/output_llama.csv')
df

Unnamed: 0,question,ground_truths,rag_answer,generated_questions
0,A company produces 500 tons of plastic packagi...,EPR Recovery Target Calculation:\n To meet the...,To calculate the amount of plastic packaging t...,['If a company produces 500 tons of plastic an...
1,What is the report quarter and when did it end?,The reported quarter is the first financial qu...,The report quarter is the first quarter of fis...,['What quarter is being reported on and when d...
2,What are the results for the reporting quarter...,The Results for the reporting quarter are:\n1....,"\nBased on the provided context information, h...",['List the financial results for the reported ...
3,If a municipality introduces a pay-as-you-thro...,A household generating 30 pounds of waste per ...,The cost for a household that generates 30 pou...,['What would be the weekly cost for a househol...
4,What's the OP margin for the current quarter?,\nThe operating margin is calculated with the ...,\nThe given text does not mention the current ...,['What is the operating margin for the most re...
5,Identify potential risks associated with EPR i...,Potential Risks in EPR Implementation:\n* Regu...,"\nBased on the context information provided, p...",['Outline a risk assessment framework for EPR ...
6,What were Gap Inc's net sales for the first qu...,Gap Inc's net sales for Q1 fiscal 2023 were $3...,"\nAccording to the context information, Gap In...",['What were the net sales of Gap Inc. in the f...
7,How did merchandise margin change from the pre...,Merchandise margin increased 600 basis points ...,The context information provided is a financia...,['How did the merchandise margin for Gap Inc. ...
8,"Did Gap Inc. have an operating profit or loss,...",Reported operating loss of $10 million. Adjust...,"\nAccording to the information provided, Gap I...",['Did Gap Inc. report an operating profit or l...
9,What were the key results from each of Gap Inc...,The key results for Gap Inc.'s brands in the f...,\nHere are the key results from each of Gap In...,"[""What were the primary results for each of Ga..."


# Evaluation
## BLEU Score

In [None]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.tokenize import word_tokenize

# Adding a new column for BLEU scores
df['bleu_score'] = 0.0

# Create a smoother
smoother = SmoothingFunction().method1

for index, row in df.iterrows():
    # Tokenize the ground truth and candidate answer
    reference = word_tokenize(row['ground_truths'].lower())
    candidate = word_tokenize(row['rag_answer'].lower())

    # Calculate the sentence BLEU score using a smoothing function
    score = sentence_bleu([reference], candidate, smoothing_function=smoother)  # Note that reference needs to be double-listed

    # Store the score in the DataFrame
    df.at[index, 'bleu_score'] = round(score, 5)

# Print the updated DataFrame with BLEU scores
df[['ground_truths', 'rag_answer', 'bleu_score']]

Unnamed: 0,ground_truths,rag_answer,bleu_score
0,EPR Recovery Target Calculation:\n To meet the...,To calculate the amount of plastic packaging t...,0.24959
1,The reported quarter is the first financial qu...,The report quarter is the first quarter of fis...,0.37392
2,The Results for the reporting quarter are:\n1....,"\nBased on the provided context information, h...",0.61209
3,A household generating 30 pounds of waste per ...,The cost for a household that generates 30 pou...,0.16975
4,\nThe operating margin is calculated with the ...,\nThe given text does not mention the current ...,0.35562
5,Potential Risks in EPR Implementation:\n* Regu...,"\nBased on the context information provided, p...",0.11137
6,Gap Inc's net sales for Q1 fiscal 2023 were $3...,"\nAccording to the context information, Gap In...",0.55584
7,Merchandise margin increased 600 basis points ...,The context information provided is a financia...,0.26979
8,Reported operating loss of $10 million. Adjust...,"\nAccording to the information provided, Gap I...",0.34824
9,The key results for Gap Inc.'s brands in the f...,\nHere are the key results from each of Gap In...,0.31334


In [None]:
import pandas as pd
from nltk.translate.bleu_score import corpus_bleu
from nltk.tokenize import word_tokenize

# Each ground truth can be a list of possible correct sentences, hence it is wrapped it in another list
references = [[word_tokenize(gt.lower())] for gt in df['ground_truths']]
candidates = [word_tokenize(answer.lower()) for answer in df['rag_answer']]

# Calculate the corpus BLEU score
score = corpus_bleu(references, candidates)

print("Corpus BLEU Score:", score)

Corpus BLEU Score: 0.3214909110550651


## RAG Metrics: Answer Relevance	Answer Correctness

In [None]:
import spacy
import pandas as pd
from transformers import AutoTokenizer, AutoModel
import torch
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import string

# Load spaCy model for NLP tasks
nlp = spacy.load("en_core_web_sm")

# Load model and tokenizer
model_name = 'BAAI/bge-large-en-v1.5'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/779 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

In [None]:
# Function to calculate the embedding of a given text
def get_embedding(text):
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
    outputs = model(**inputs)
    # Reshape the tensor to remove the batch dimension and detach it from the computation graph
    return outputs.last_hidden_state.mean(dim=1).detach().numpy().squeeze()

# Function to calculate cosine similarity between two embeddings
def calculate_similarity(embedding1, embedding2):
    # Ensure the embeddings are 2D
    embedding1 = embedding1.reshape(1, -1)
    embedding2 = embedding2.reshape(1, -1)
    return cosine_similarity(embedding1, embedding2)[0][0]

# Function to calculate answer relevancy
def calculate_answer_relevancy(row):
    original_question_embedding = get_embedding(row['question'])

    # 'generated_questions' is a list of artificially generated questions from the answer (reverse engineered)
    artificial_questions_embeddings = [get_embedding(q) for q in row['generated_questions']]

    relevancies = [
        calculate_similarity(original_question_embedding, q_emb)
        for q_emb in artificial_questions_embeddings
    ]

    # Calculate the mean of these relevancies to get the answer relevancy score
    answer_relevancy = np.mean(relevancies)
    return answer_relevancy

In [None]:
# Now apply these functions to each row in your dataframe
df['Answer Relevance'] = df.apply(calculate_answer_relevancy, axis=1)
df['Answer Correctness'] = df.apply(lambda row: calculate_similarity(get_embedding(row['rag_answer']), get_embedding(row['ground_truths'])), axis=1)

# Display the updated DataFrame with the computed metrics
df[['Answer Relevance', 'Answer Correctness']]

Unnamed: 0,Answer Relevance,Answer Correctness
0,0.521876,0.967421
1,0.52627,0.958874
2,0.536365,0.969073
3,0.527066,0.928409
4,0.539291,0.952305
5,0.504796,0.965784
6,0.504203,0.981855
7,0.480876,0.854056
8,0.500985,0.807922
9,0.506595,0.980135


In [None]:
df['Answer Relevance'].mean()

0.51114964

In [None]:
df['Answer Correctness'].mean()

0.90960485

## ROUGE Scores

In [9]:
# !pip install rouge-score

In [7]:
from rouge_score import rouge_scorer

# Create a ROUGE scorer instance
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

# Calculate ROUGE scores for each pair of ground_truth and rag_answer
rouge_scores = [scorer.score(gt, ra) for gt, ra in zip(df['ground_truths'], df['rag_answer'])]

# Convert the list of scores to a DataFrame
rouge_scores_df = pd.DataFrame({
    "ROUGE-1": [scores['rouge1'].fmeasure for scores in rouge_scores],
    "ROUGE-2": [scores['rouge2'].fmeasure for scores in rouge_scores],
    "ROUGE-L": [scores['rougeL'].fmeasure for scores in rouge_scores]
})

rouge_scores_df.describe()  # Provide summary statistics of the scores

Unnamed: 0,ROUGE-1,ROUGE-2,ROUGE-L
count,15.0,15.0,15.0
mean,0.614586,0.458848,0.552899
std,0.171201,0.179117,0.179259
min,0.218182,0.075472,0.181818
25%,0.532806,0.370206,0.456577
50%,0.615385,0.454545,0.504348
75%,0.759615,0.569697,0.696338
max,0.818444,0.702703,0.806916


In [8]:
rouge_scores_df

Unnamed: 0,ROUGE-1,ROUGE-2,ROUGE-L
0,0.622222,0.43609,0.488889
1,0.8,0.606061,0.8
2,0.818444,0.684058,0.806916
3,0.745763,0.491228,0.610169
4,0.564103,0.454545,0.487179
5,0.509091,0.250653,0.425974
6,0.787879,0.701031,0.767677
7,0.556522,0.40708,0.504348
8,0.384615,0.333333,0.384615
9,0.594595,0.440367,0.558559
