In [1]:
!module load CUDA/11.8.0
# sentence_transformers==2.2.2
# chromadb==0.4.5  the newest version conflits with the version of numpy
# langchain==0.0.266


The following have been reloaded with a version change:
  1) CUDA/12.3.0 => CUDA/11.8.0



LLamaparse is not recommended for the Llama2, because this package will overwrite the environment supporting the Llama2.

In [2]:
import torch
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain import HuggingFacePipeline, PromptTemplate
from langchain.chains import RetrievalQA

from auto_gptq import AutoGPTQForCausalLM
from transformers import LlamaForCausalLM, LlamaTokenizer,AutoTokenizer, TextStreamer, pipeline 

import PyPDF2
import requests
import io
import fitz

In [3]:
import re
from fuzzywuzzy import fuzz
import pandas as pd
import csv


In [4]:
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

# Llama2

In [5]:
model_name_or_path = "TheBloke/Llama-2-70B-chat-GPTQ"
model_basename = "model"

tokenizer_70B = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

model_70B = AutoGPTQForCausalLM.from_quantized(
    model_name_or_path,
    revision="gptq-4bit-128g-actorder_True",
    model_basename=model_basename,
    use_safetensors=True,
    trust_remote_code=True,
    inject_fused_attention=False,
    quantize_config=None,
)

INFO - The layer lm_head is not quantized.


  0%|          | 0/3043 [00:00<?, ?w/s]

# presetting prompt

In [6]:
DEFAULT_SYSTEM_PROMPT = """
You are a helpful, respectful and honest assistant in a Wealth Management Company. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
""".strip()


def generate_prompt(prompt: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT) -> str:
    return f"""
[INST] <>
{system_prompt}
<>

{prompt} [/INST]
""".strip()

streamer_70B = TextStreamer(tokenizer_70B, skip_prompt=True, skip_special_tokens=True)
text_pipeline = pipeline(
    "text-generation",
    model=model_70B,
    tokenizer=tokenizer_70B,
    max_new_tokens=1024,
    temperature=0,
    top_p=0.95,
    repetition_penalty=1.15,
    streamer=streamer_70B,
)

llm = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 0})
SYSTEM_PROMPT = "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer."
template = generate_prompt(
    """
{context}

Question: {question}
""",
    system_prompt=SYSTEM_PROMPT,
)

prompt = PromptTemplate(template=template, input_variables=["context", "question"])

embeddings = HuggingFaceInstructEmbeddings(
    model_name="hkunlp/instructor-large", model_kwargs={"device": DEVICE}
)

The model 'LlamaGPTQForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MusicgenMelodyForCausalLM', 'MvpForCausalLM', 'OpenLlam

load INSTRUCTOR_Transformer
max_seq_length  512


# file accessing

In [39]:

def fileaccessing_builddb(url_path):
    pdf_texts = []
    response = requests.get(url_path)
    pdf_data = response.content
    pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_data))
    for i in range(len(pdf_reader.pages)):
        text = pdf_reader.pages[i].extract_text()
        
        text = text.replace('\xa0', ' ')
        docu = Document(
                page_content=text,
                metadata={},
            )
        pdf_texts.append(docu)
    return pdf_texts


def fileaccessing_builddb_pageselection(url_path, search_text_thresholds):
    pdf_texts = []
    try:
        response = requests.get(url_path)
        pdf_data = response.content
        pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_data))
    except PyPDF2._reader.PdfReadError:
        HEADERS = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \ (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36','Accept-Language': 'en-US, en;q=0.5'}
        response = requests.get(url_path, headers=HEADERS)
        pdf_data = response.content
        pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_data))

    for i in range(len(pdf_reader.pages)):
        text = pdf_reader.pages[i].extract_text()
        text = text.replace('\xa0', ' ')
        text = text.lower()
        highest_threshold = 0
        s = 0
        for search_text, threshold in search_text_thresholds:
            s += threshold
            if search_text.lower() in text:
                highest_threshold += 100
            else:
                similarity = fuzz.partial_ratio(search_text.lower(), text)
                if similarity >= threshold:
                    highest_threshold += max(threshold, similarity)

        # If the highest_threshold exceeds the sum of search_text_thresholds,
        # add the document to pdf_texts
        if highest_threshold > s:
            docu = Document(
                page_content=text,
                metadata={},
            )
            pdf_texts.append(docu)
            print(f"page{i}")

    return pdf_texts


In [8]:
def Quarters_cal(currentQ = "2Q23"):
    q = int(currentQ[0])
    year = int(currentQ[2:4])
    quarters = [currentQ]
    # the previous one quarter
    if q == 1:
        q = 4
        year -= 1
    else:
        q -= 1
    time = f"{q}Q{year}"
    quarters.append(time)
    # the quarter in the past year
    q = int(currentQ[0])
    year = int(currentQ[2:4])
    quarters.append(f"{q}Q{year-1}")
    return quarters

In [9]:
def past_7Q(currentQ = "2Q23"):
    quarters = [currentQ]
    q = int(currentQ[0])
    year = int(currentQ[2:4])
    for i in range(7):
        if q == 1:
            q = 4
            year -= 1
        else:
            q -= 1
        if year < 0:
            year = 100 + year
        time = f"{q}Q{year}"
        quarters.append(time)
    return quarters

def past_4Q(currentQ = "2Q23"):
    quarters = [currentQ]
    q = int(currentQ[0])
    year = int(currentQ[2:4])
    for i in range(4):
        if q == 1:
            q = 4
            year -= 1
        else:
            q -= 1
        if year < 0:
            year = 100 + year
        time = f"{q}Q{year}"
        quarters.append(time)
    return quarters

## Morgan Stanley

In [12]:
MSlinks = {"4Q23": "https://www.morganstanley.com/content/dam/msdotcom/en/about-us-ir/finsup4q2023/finsup4q2023.pdf",\
        "2Q23": "https://www.morganstanley.com/content/dam/msdotcom/en/about-us-ir/finsup2q2023/finsup2q2023.pdf", \
        "3Q22": "https://www.morganstanley.com/content/dam/msdotcom/en/about-us-ir/finsup3q2022/finsup3q2022.pdf",\
        "1Q22": "https://www.morganstanley.com/content/dam/msdotcom/en/about-us-ir/finsup1q2022/finsup1q2022.pdf"}

In [11]:
# guessing metrics names
texts = fileaccessing_builddb(MSlinks["2Q23"])
ids = [str(i) for i in range(1, len(texts) + 1)]
MSDB = Chroma.from_documents(
    documents=texts,
    embedding=embeddings,
    ids=ids
)
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=MSDB.as_retriever(search_kwargs={"k": 2}), # change database
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt},
)
query  = "I am interested into the values in Wealth management field about asset under management, the change of asset \
and related subsets. please give me the metrics names in the file which will interest me"
result = qa_chain(query)
MSDB._collection.delete(ids=ids)



 Sure! Based on your interest in values in the wealth management field, specifically asset under management, changes in assets, and related subsets, here are some metrics you might find useful from the provided text:

* Total client assets
* Net new assets
* Advisor-led client assets
* Fee-based client assets
* Self-directed assets
* Stock plan unvested assets

These metrics can be found in the "Wealth Management Metrics" section of the text.


In [15]:
# data extraction 
MS_search_words_with_threshold = [('Wealth Management Metrics',80),('Advisor‐led channel',60),('Financial Information',40),('Asset Management',50)]
source_docu = []
results = []
for index, link in MSlinks.items():
    print(link)
    texts = fileaccessing_builddb_pageselection(link, search_text_thresholds = MS_search_words_with_threshold)
    quarters = Quarters_cal(currentQ = index)
    print(len(texts))
    ids = [str(i) for i in range(1, len(texts) + 1)]
    MSDB = Chroma.from_documents(
        documents=texts,
        embedding=embeddings,
        ids=ids
    )
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=MSDB.as_retriever(search_kwargs={"k": 2}), 
        return_source_documents=True,
        chain_type_kwargs={"prompt": prompt},
    )
    query  = "Create a table to export values about Total client assets, Net new assets, \
    Advisor‐led client assets, Fee‐based client assets and flows , Self‐directed assets about Wealth Management of "
    for quarter in quarters:
        query += f"{quarter}, "
    query += " In ONE TABLE using 'B' for billion and 'M' for million. Only ONE TABLE! \
    The Row names(y-axis) are QUARTERS(xQxx) and the Column names(x-axis) are METRICS name!"
    result = qa_chain(query)
    if "I don't know" in result['result']:
        print(texts)
    results.append(result['result'])
    source_docu.append(result['source_documents'])
    print("count before", MSDB._collection.count())
    MSDB._collection.delete(ids=ids)
    print("count after", MSDB._collection.count())
    # check whether the database is cleaned or not.

https://www.morganstanley.com/content/dam/msdotcom/en/about-us-ir/finsup4q2023/finsup4q2023.pdf
2
 Sure, here's the table you requested:

| Quarters | Total Client Assets ($B) | Net New Assets ($B) | Advisor-Led Client Assets ($B) | Fee-Based Client Assets ($B) | Fee-Based Flows ($B) | Self-Directed Assets ($B) |
| --- | --- | --- | --- | --- | --- | --- |
| 4Q23 | 5,129 | 47.5 | 3,979 | 1,983 | 41.6 | 1,150 |
| 3Q23 | 4,798 | 35.7 | 3,755 | 1,857 | 22.5 | 1,043 |
| 4Q22 | 4,187 | 51.6 | 3,392 | 1,678 | 20.4 | 795 |

Note: The values for Total Client Assets, Net New Assets, Advisor-Led Client Assets, Fee-Based Client Assets, and Self-Directed Assets are in billions ($.B). The values for Fee-Based Flows are in billions ($.B)
count before 2
count after 0
https://www.morganstanley.com/content/dam/msdotcom/en/about-us-ir/finsup2q2023/finsup2q2023.pdf
2
 Sure, here's the table you requested:

| Quarters | Total Client Assets (B) | Net New Assets (B) | Advisor-Led Client Assets (B) | Fee-Bas

Number of requested results 2 is greater than number of elements in index 1, updating n_results = 1


1
 Sure, here's the table you requested:

| Quarters | Total Client Assets (B) | Net New Assets (B) | Advisor-Led Client Assets (B) | Fee-Based Client Assets (B) | Fee-Based Asset Flows (B) | Self-Directed Assets (B) |
| --- | --- | --- | --- | --- | --- | --- |
| 3Q22 | 4,134 | 64.8 | 3,305 | 1,628 | 16.7 | 829 |
| 2Q22 | 4,246 | 52.9 | 3,427 | 1,717 | 28.5 | 819 |
| 3Q21 | 4,666 | 134.5 | 3,647 | 1,752 | 70.6 | 1,019 |

Note: The values for Total Client Assets, Net New Assets, Advisor-Led Client Assets, Fee-Based Client Assets, and Fee-Based Asset Flows are in billions (B), while the values for Self-Directed Assets are in millions (M).
count before 1
count after 0
https://www.morganstanley.com/content/dam/msdotcom/en/about-us-ir/finsup1q2022/finsup1q2022.pdf


Number of requested results 2 is greater than number of elements in index 1, updating n_results = 1


1
 Sure, here's the table you requested:

| Quarters | Total Client Assets (B) | Net New Assets (B) | Advisor-Led Client Assets (B) | Fee-Based Client Assets (B) | Fee-Based Asset Flows (B) | Self-Directed Assets (B) |
| --- | --- | --- | --- | --- | --- | --- |
| 1Q22 | 4,800 | 142.0 | 3,835 | 1,873 | 97.2 | 965 |
| 4Q21 | 4,930 | 127.1 | 3,886 | 1,839 | 37.8 | 1,044 |
| 1Q21 | 4,231 | 104.9 | 3,349 | 1,574 | 37.2 | 882 |

Note: The values for Total Client Assets, Net New Assets, Advisor-Led Client Assets, Fee-Based Client Assets, and Fee-Based Asset Flows are in billions (B), while the values for Self-Directed Assets are in millions (M).
count before 1
count after 0


In [16]:
for i in results:
    print(i)

  Sure, here's the table you requested:

| Quarters | Total Client Assets ($B) | Net New Assets ($B) | Advisor-Led Client Assets ($B) | Fee-Based Client Assets ($B) | Fee-Based Flows ($B) | Self-Directed Assets ($B) |
| --- | --- | --- | --- | --- | --- | --- |
| 4Q23 | 5,129 | 47.5 | 3,979 | 1,983 | 41.6 | 1,150 |
| 3Q23 | 4,798 | 35.7 | 3,755 | 1,857 | 22.5 | 1,043 |
| 4Q22 | 4,187 | 51.6 | 3,392 | 1,678 | 20.4 | 795 |

Note: The values for Total Client Assets, Net New Assets, Advisor-Led Client Assets, Fee-Based Client Assets, and Self-Directed Assets are in billions ($.B). The values for Fee-Based Flows are in billions ($.B)
  Sure, here's the table you requested:

| Quarters | Total Client Assets (B) | Net New Assets (B) | Advisor-Led Client Assets (B) | Fee-Based Client Assets (B) | Fee-Based Flows (B) | Self-Directed Assets (M) |
| --- | --- | --- | --- | --- | --- | --- |
| 2Q23 | 4,885 | 89.5 | 3,784 | 1,856 | 22.7 | 1,101 |
| 1Q23 | 4,558 | 109.6 | 3,582 | 1,769 | 22.4 | 976 

In [19]:
# Save the raw tables
pattern = r'\| Quarters \|.*?\|\n(.*?)\n\n'
MSdataframes = []
for i in results:
    match = re.search(pattern, i, re.DOTALL)
    if match:
        table_str = match.group(0)
        table_rows = [row.strip().split('|') for row in table_str.strip().split('\n')]
        
        headers = [header.strip() for header in table_rows[0] if header.strip()]
        data = [[cell.strip() for cell in row if cell.strip()] for row in table_rows[1:]]
    
        df = pd.DataFrame(data, columns=headers)
        MSdataframes.append(df)
for i in range(len(MSdataframes)):
    print(MSdataframes[i])

  Quarters Total Client Assets ($B) Net New Assets ($B)  \
0      ---                      ---                 ---   
1     4Q23                    5,129                47.5   
2     3Q23                    4,798                35.7   
3     4Q22                    4,187                51.6   

  Advisor-Led Client Assets ($B) Fee-Based Client Assets ($B)  \
0                            ---                          ---   
1                          3,979                        1,983   
2                          3,755                        1,857   
3                          3,392                        1,678   

  Fee-Based Flows ($B) Self-Directed Assets ($B)  
0                  ---                       ---  
1                 41.6                     1,150  
2                 22.5                     1,043  
3                 20.4                       795  
  Quarters Total Client Assets (B) Net New Assets (B)  \
0      ---                     ---                ---   
1     2Q2

In [20]:
startrows = 0
with pd.ExcelWriter('sample_output.xlsx', engine='xlsxwriter') as writer:
    for df in MSdataframes:
        df.to_excel(writer, index=False, sheet_name='Morgan_Stanley', startrow=startrows)
        startrows += df.shape[0] + 2

# Bank of America

In [27]:
BoAlinks = {"4Q23": "https://d1io3yog0oux5.cloudfront.net/_87c973d6eaec18dc169731b14cf06814/bankofamerica/db/806/9995/supplemental_information/The+Supplemental+Information_4Q23_ADA.pdf",\
        "2Q23": "https://d1io3yog0oux5.cloudfront.net/_87c973d6eaec18dc169731b14cf06814/bankofamerica/db/806/9921/supplemental_information/The+Supplemental+Information_2Q23_ADA.pdf", \
        "3Q22": "https://d1io3yog0oux5.cloudfront.net/_87c973d6eaec18dc169731b14cf06814/bankofamerica/db/806/9760/supplemental_information/The+Supplemental+Information+3Q22.pdf", \
        "1Q22": "https://d1io3yog0oux5.cloudfront.net/_66bdda23229b6beeb3b28234b910a155/bankofamerica/db/806/9656/supplemental_information/The+Supplemental+Information+1Q22.pdf"}
bofa_search_words_with_threshold = [('Wealth Management',50),('Assets under management',50),('Total assets',80) ]


In [50]:
results = []
# source_docu = []
for index, link in BoAlinks.items():
    print(link)
    #texts = fileaccessing_builddb(link)
    texts = fileaccessing_builddb_pageselection(link, search_text_thresholds = bofa_search_words_with_threshold)
    quarters = past_4Q(currentQ = index)
    print(len(texts))
    ids = [str(i) for i in range(1, len(texts) + 1)]
    MSDB3 = Chroma.from_documents(
        documents=texts,
        embedding=embeddings,
        ids=ids
    )
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=MSDB3.as_retriever(search_kwargs={"k": 2}), # change database
        return_source_documents=True,
        chain_type_kwargs={"prompt": prompt},
    )
    
    query  = "Create a table containing values of Assent under management, Net Cliet Flow, Total Wealth Advisors in "
    for quarter in quarters:
        query += f"{quarter}, "
    query += " in ONE TABLE, using 'B' to represent 'billion' and 'M' to represent 'million'. \
    The Row names(y-axis) are QUARTER names (xQxx) and the Column names(x-axis) are METRICS name!"
    print(query)
    result = qa_chain(query)
    results.append(result['result'])
    # source_docu.append(result['source_documents'])
    print("count before", MSDB3._collection.count())
    MSDB3._collection.delete(ids=ids)
    print("count after", MSDB3._collection.count())
    # check whether the database is cleaned or not.

https://d1io3yog0oux5.cloudfront.net/_87c973d6eaec18dc169731b14cf06814/bankofamerica/db/806/9995/supplemental_information/The+Supplemental+Information_4Q23_ADA.pdf
3
Create a table containing values of Assent under management, Net Cliet Flow, Total Wealth Advisors in 4Q23, 3Q23, 2Q23, 1Q23, 4Q22,  in ONE TABLE, using 'B' to represent 'billion' and 'M' to represent 'million'.     The Row names(y-axis) are QUARTER names (xQxx) and the Column names(x-axis) are METRICS name!
 Sure, here's a table with the requested information:

| Quarter | Assets Under Management ($B) | Net Client Flows ($M) | Total Wealth Advisors |
| --- | --- | --- | --- |
| 4Q23 | 1,617 | 52 | 18,916 |
| 3Q23 | 1,531 | 20 | 19,273 |
| 2Q23 | 1,496 | 8 | 19,130 |
| 1Q23 | 1,467 | 14 | 19,099 |
| 4Q22 | 1,329 | - | 18,273 |

Note: The values for "Assets Under Management" are in billions ($,B), while the values for "Net Client Flows" and "Total Wealth Advisors" are in millions ($,M).
count before 3
count after 0
https://

In [51]:
for i in results:
    print(i)

  Sure, here's a table with the requested information:

| Quarter | Assets Under Management ($B) | Net Client Flows ($M) | Total Wealth Advisors |
| --- | --- | --- | --- |
| 4Q23 | 1,617 | 52 | 18,916 |
| 3Q23 | 1,531 | 20 | 19,273 |
| 2Q23 | 1,496 | 8 | 19,130 |
| 1Q23 | 1,467 | 14 | 19,099 |
| 4Q22 | 1,329 | - | 18,273 |

Note: The values for "Assets Under Management" are in billions ($,B), while the values for "Net Client Flows" and "Total Wealth Advisors" are in millions ($,M).
  I don't have access to the most recent financial reports from Bank of America, so I cannot provide the exact figures for the second quarter of 2023. However, I can give you an idea of how the table might look based on the information provided in the reference text. Here's a sample table with the requested information:

| Quarter | Assets Under Management ($B) | Net Client Flows ($M) | Total Wealth Advisors |
| --- | --- | --- | --- |
| 2Q23 | 1,531 | 29 | 19,099 |
| 1Q23 | 1,411 | 16 | 18,449 |
| 4Q22 | 1

In [52]:
# Save the raw tables
pattern = r'\| Quarter \|.*?\|\n((?:.*?\|.*?\|\n)+)'

BOAdataframes = []
for i in results:
    match = re.search(pattern, i, re.DOTALL)
    if match:
        table_str = match.group(0)
        table_rows = [row.strip().split('|') for row in table_str.strip().split('\n')]
        
        headers = [header.strip() for header in table_rows[0] if header.strip()]
        data = [[cell.strip() for cell in row if cell.strip()] for row in table_rows[1:]]
    
        df = pd.DataFrame(data, columns=headers)
        BOAdataframes.append(df)
for i in range(len(BOAdataframes)):
    print(BOAdataframes[i])

  Quarter Assets Under Management ($B) Net Client Flows ($M)  \
0     ---                          ---                   ---   
1    4Q23                        1,617                    52   
2    3Q23                        1,531                    20   
3    2Q23                        1,496                     8   
4    1Q23                        1,467                    14   
5    4Q22                        1,329                     -   

  Total Wealth Advisors  
0                   ---  
1                18,916  
2                19,273  
3                19,130  
4                19,099  
5                18,273  
  Quarter Assets Under Management ($B) Net Client Flows ($M)  \
0     ---                          ---                   ---   
1    2Q23                        1,531                    29   
2    1Q23                        1,411                    16   
3    4Q22                        1,467                    14   
4    3Q22                        1,401           

In [53]:
with pd.ExcelWriter('sample_output.xlsx', engine='xlsxwriter') as writer:
    startrows = 0
    for df in MSdataframes:
        df.to_excel(writer, index=True, sheet_name='Morgan_Stanley', startrow=startrows)
        startrows += df.shape[0] + 2
    startrows = 0
    for df in BOAdataframes:
        df.to_excel(writer, index=True, sheet_name='Bank_of_America', startrow=startrows)
        startrows += df.shape[0] + 2

# Goldman Sachs

In [58]:
GSlinks = {"4Q23": "https://www.goldmansachs.com/media-relations/press-releases/current/pdfs/2023-q4-results.pdf",\
        "2Q23": "https://www.goldmansachs.com/media-relations/press-releases/current/pdfs/2023-q2-results.pdf", \
        "3Q22": "https://www.goldmansachs.com/media-relations/press-releases/current/pdfs/2022-q3-results.pdf", \
        "1Q22": "https://www.goldmansachs.com/media-relations/press-releases/current/pdfs/2022-q1-results.pdf"}
gs_search_words_with_threshold = [('Assets Under Supervision',50),('Earnings Results',80),('AUS',100),('Wealth Management',60)]

In [62]:
results = []
for index, link in GSlinks.items():
    print(link)
    texts = fileaccessing_builddb_pageselection(link, search_text_thresholds = gs_search_words_with_threshold)
    quarters = Quarters_cal(currentQ = index)
    print(len(texts))
    ids = [str(i) for i in range(1, len(texts) + 1)]
    MSDB3 = Chroma.from_documents(
        documents=texts,
        embedding=embeddings,
        ids=ids
    )
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=MSDB3.as_retriever(search_kwargs={"k": 2}),
        return_source_documents=True,
        chain_type_kwargs={"prompt": prompt},
    )
    query  = "Create a table to export values about containing wealth management information about Wealth AUS,\
 Wealth net flows, Wealth Management Fees, Total AUS net inflows / (outflows)and Total Revenues,\
 Total AUS,about Wealth Management of "
    for quarter in quarters:
        query += f"{quarter}, "
    query += " In ONE TABLE using 'B' for billion and 'M' for million. Only ONE TABLE! \
    The Row names(y-axis) are QUARTERS(xQxx) and the Column names(x-axis) are METRICS name!"
    print(query)
    result = qa_chain(query)
    results.append(result['result'])
    print("count before", MSDB3._collection.count())
    MSDB3._collection.delete(ids=ids)
    print("count after", MSDB3._collection.count())
    # check whether the database is cleaned or not.

https://www.goldmansachs.com/media-relations/press-releases/current/pdfs/2023-q4-results.pdf
page2
page4
page14
3
Create a table to export values about containing wealth management information about Wealth AUS, Wealth net flows, Wealth Management Fees, Total AUS net inflows / (outflows)and Total Revenues, Total AUS,about Wealth Management of 4Q23, 3Q23, 4Q22,  In ONE TABLE using 'B' for billion and 'M' for million. Only ONE TABLE!     The Row names(y-axis) are QUARTERS(xQxx) and the Column names(x-axis) are METRICS name!
 I can't create tables or exports, but I can provide the data you need in a format that you can use to create a table. Here is the information you requested for Wealth Management in a table format:

| Quarter | Wealth AUS (B) | Wealth Net Flows (B) | Wealth Management Fees (B) | Total AUS Net Inflows/(Outflows) (B) | Total Revenues (B) | Total AUS (B) |
| --- | --- | --- | --- | --- | --- | --- |
| 4Q23 | 1,122 | 51 | 13.88 | 101 | 46.25 | 2,812 |
| 3Q23 | 1,031 | 26 |

In [63]:
for i in results:
    print(i)

  I can't create tables or exports, but I can provide the data you need in a format that you can use to create a table. Here is the information you requested for Wealth Management in a table format:

| Quarter | Wealth AUS (B) | Wealth Net Flows (B) | Wealth Management Fees (B) | Total AUS Net Inflows/(Outflows) (B) | Total Revenues (B) | Total AUS (B) |
| --- | --- | --- | --- | --- | --- | --- |
| 4Q23 | 1,122 | 51 | 13.88 | 101 | 46.25 | 2,812 |
| 3Q23 | 1,031 | 26 | 12.77 | 50 | 45.77 | 2,680 |
| 4Q22 | 1,010 | 22 | 11.73 | 66 | 44.73 | 2,547 |

Note: The values for Wealth AUS, Wealth Net Flows, and Wealth Management Fees are in billions (B), while the values for Total AUS Net Inflows/(Outflows), Total Revenues, and Total AUS are in millions (M).
  Sure, here's a table with the requested information:

| Quarters | Wealth AUS ($B) | Wealth Net Flows ($B) | Wealth Management Fees ($B) | Total AUS Net Inflows/Outflows ($B) | Total Revenue ($B) | Total AUS ($B) |
| --- | --- | --- | --

In [65]:
# Save the raw tables
pattern1 = r'\| Quarters \|.*?\|\n(.*?)\n\n'
pattern2 = r'\| QUARTERS \|.*?\|\n(.*?)\n\n'
pattern3 = r'\| Quarter \|.*?\|\n(.*?)\n\n'
GSdataframes = []
for i in results:
    match1 = re.search(pattern1, i, re.DOTALL)
    match2 = re.search(pattern2, i, re.DOTALL)
    match3 = re.search(pattern3, i, re.DOTALL)
    if match1:
        table_str = match1.group(0)
        table_rows = [row.strip().split('|') for row in table_str.strip().split('\n')]
        
        headers = [header.strip() for header in table_rows[0] if header.strip()]
        data = [[cell.strip() for cell in row if cell.strip()] for row in table_rows[1:]]
    
        df = pd.DataFrame(data, columns=headers)
        GSdataframes.append(df)
    if match2:
        table_str = match2.group(0)
        table_rows = [row.strip().split('|') for row in table_str.strip().split('\n')]
        
        headers = [header.strip() for header in table_rows[0] if header.strip()]
        data = [[cell.strip() for cell in row if cell.strip()] for row in table_rows[1:]]
    
        df = pd.DataFrame(data, columns=headers)
        GSdataframes.append(df)
    if match3:
        table_str = match3.group(0)
        table_rows = [row.strip().split('|') for row in table_str.strip().split('\n')]
        
        headers = [header.strip() for header in table_rows[0] if header.strip()]
        data = [[cell.strip() for cell in row if cell.strip()] for row in table_rows[1:]]
    
        df = pd.DataFrame(data, columns=headers)
        GSdataframes.append(df)
for i in range(len(GSdataframes)):
    print(GSdataframes[i])

  Quarter Wealth AUS (B) Wealth Net Flows (B) Wealth Management Fees (B)  \
0     ---            ---                  ---                        ---   
1    4Q23          1,122                   51                      13.88   
2    3Q23          1,031                   26                      12.77   
3    4Q22          1,010                   22                      11.73   

  Total AUS Net Inflows/(Outflows) (B) Total Revenues (B) Total AUS (B)  
0                                  ---                ---           ---  
1                                  101              46.25         2,812  
2                                   50              45.77         2,680  
3                                   66              44.73         2,547  
  Quarters Wealth AUS ($B) Wealth Net Flows ($B) Wealth Management Fees ($B)  \
0      ---             ---                   ---                         ---   
1     2Q23           1,950                    12                           8   
2     1Q2

In [66]:
with pd.ExcelWriter('sample_output.xlsx', engine='xlsxwriter') as writer:
    startrows = 0
    for df in MSdataframes:
        df.to_excel(writer, index=True, sheet_name='Morgan_Stanley', startrow=startrows)
        startrows += df.shape[0] + 2
    startrows = 0
    for df in BOAdataframes:
        df.to_excel(writer, index=True, sheet_name='Bank_of_America', startrow=startrows)
        startrows += df.shape[0] + 2
    startrows = 0
    for df in GSdataframes:
        df.to_excel(writer, index=True, sheet_name='Goldenman_Sachs', startrow=startrows)
        startrows += df.shape[0] + 2

# JP Morgan

In [10]:
JPlinks = {"4Q23": "https://www.jpmorganchase.com/content/dam/jpmc/jpmorgan-chase-and-co/investor-relations/documents/quarterly-earnings/2023/4th-quarter/16d9371e-30e9-4898-abf6-d1f7c86fd311.pdf",\
            "2Q23": "https://www.jpmorganchase.com/content/dam/jpmc/jpmorgan-chase-and-co/investor-relations/documents/quarterly-earnings/2023/2nd-quarter/c9585f9b-75cc-4a49-b1ea-cec4423c87a7.pdf", \
            "3Q22": "https://www.jpmorganchase.com/content/dam/jpmc/jpmorgan-chase-and-co/investor-relations/documents/quarterly-earnings/2022/3rd-quarter/03e7493b-bc90-4ff5-8c1d-9e6611117091.pdf", \
            "1Q22": "https://www.jpmorganchase.com/content/dam/jpmc/jpmorgan-chase-and-co/investor-relations/documents/quarterly-earnings/2022/1st-quarter/f9d46f02-79ac-491e-920e-26d0d2008667.pdf"}
jp_search_words_with_threshold=[('ASSET & WEALTH MANAGEMENT',95)]

In [105]:
results = []
for index, link in JPlinks.items():
    print(link)
    texts = fileaccessing_builddb_pageselection(link, search_text_thresholds = jp_search_words_with_threshold)
    quarters = Quarters_cal(currentQ = index)
    print(len(texts))
    ids = [str(i) for i in range(1, len(texts) + 1)]
    MSDB3 = Chroma.from_documents(
        documents=texts,
        embedding=embeddings,
        ids=ids
    )
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=MSDB3.as_retriever(search_kwargs={"k": 2}),
        return_source_documents=True,
        chain_type_kwargs={"prompt": prompt},
    )
    query  = " Create a table to export values of these metrics:\
    Total client assets, Net new assets, Total Assets under management and Assent under supervision,total net revenue, net income,Asset management fees, in "
    for quarter in quarters:
        query += f"{quarter}, "
    query += " In ONE TABLE using 'B' for billion and 'M' for million.  \
    The Row names(y-axis) are QUARTERS(xQxx) and the Column names(x-axis) are METRICS name!"
    print(query)
    result = qa_chain(query)
    results.append(result['result'])
    print("count before", MSDB3._collection.count())
    MSDB3._collection.delete(ids=ids)
    print("count after", MSDB3._collection.count())
    # check whether the database is cleaned or not.

https://www.jpmorganchase.com/content/dam/jpmc/jpmorgan-chase-and-co/investor-relations/documents/quarterly-earnings/2023/4th-quarter/16d9371e-30e9-4898-abf6-d1f7c86fd311.pdf
page1
page3
page8
page21
page22
page23
6
 Create a table to export values of these metrics:    Total client assets, Net new assets, Total Assets under management and Assent under supervision,total net revenue, net income,Asset management fees, in 4Q23, 3Q23, 4Q22,  In ONE TABLE using 'B' for billion and 'M' for million.      The Row names(y-axis) are QUARTERS(xQxx) and the Column names(x-axis) are METRICS name!
 Sure, here's a table with the requested metrics and quarters:

| Quarter | Total Client Assets (B) | Net New Assets (B) | Total Assets Under Management (B) | Asset Under Supervision (B) | Total Net Revenue (B) | Net Income (B) | Asset Management Fees (B) |
| --- | --- | --- | --- | --- | --- | --- | --- |
| 4Q23 | 5,012 | 94 | 3,422 | 1,590 | 5,095 | 1,217 | 3,137 |
| 3Q23 | 4,644 | 132 | 3,186 | 1,458 | 5

In [106]:
for i in results:
    print(i)

  Sure, here's a table with the requested metrics and quarters:

| Quarter | Total Client Assets (B) | Net New Assets (B) | Total Assets Under Management (B) | Asset Under Supervision (B) | Total Net Revenue (B) | Net Income (B) | Asset Management Fees (B) |
| --- | --- | --- | --- | --- | --- | --- | --- |
| 4Q23 | 5,012 | 94 | 3,422 | 1,590 | 5,095 | 1,217 | 3,137 |
| 3Q23 | 4,644 | 132 | 3,186 | 1,458 | 5,005 | 1,417 | 2,975 |
| 4Q22 | 4,347 | 112 | 2,766 | 1,341 | 4,588 | 1,134 | 2,825 |

Note: The values for "Total Client Assets", "Net New Assets", "Total Assets Under Management", and "Assets Under Supervision" are in billions (B), while the values for "Total Net Revenue", "Net Income", and "Asset Management Fees" are in millions (M).
  I can certainly help you with that! Here's a table that exports the values of the given metrics for the specified quarters:

| Quarter | Total Client Assets (B) | Net New Assets (B) | Total Assets Under Management (B) | Asset Under Supervision (B) 

In [107]:
# Save the raw tables
pattern1 = r'Quarter \|.*?\|\n(.*?)\n\n'
pattern2 = r'\| Quarters \|.*?\|\n(.*?)\n\n'
JPMdataframes = []
for i in results:
    match1 = re.search(pattern1, i, re.DOTALL)
    match2 = re.search(pattern2, i, re.DOTALL)
    if match1:
        table_str = match1.group(0)
        table_rows = [row.strip().split('|') for row in table_str.strip().split('\n')]

        headers = [header.strip() for header in table_rows[0] if header.strip()]
        data = [[cell.strip() for cell in row if cell.strip()] for row in table_rows[1:]]

        df = pd.DataFrame(data, columns=headers)
        JPMdataframes.append(df)
    if match2:
        table_str = match2.group(0)
        table_rows = [row.strip().split('|') for row in table_str.strip().split('\n')]

        headers = [header.strip() for header in table_rows[0] if header.strip()]
        data = [[cell.strip() for cell in row if cell.strip()] for row in table_rows[1:]]

        df = pd.DataFrame(data, columns=headers)
        JPMdataframes.append(df)
for i in range(len(JPMdataframes)):
    print(JPMdataframes[i])

  Quarter Total Client Assets (B) Net New Assets (B)  \
0     ---                     ---                ---   
1    4Q23                   5,012                 94   
2    3Q23                   4,644                132   
3    4Q22                   4,347                112   

  Total Assets Under Management (B) Asset Under Supervision (B)  \
0                               ---                         ---   
1                             3,422                       1,590   
2                             3,186                       1,458   
3                             2,766                       1,341   

  Total Net Revenue (B) Net Income (B) Asset Management Fees (B)  
0                   ---            ---                       ---  
1                 5,095          1,217                     3,137  
2                 5,005          1,417                     2,975  
3                 4,588          1,134                     2,825  
  Quarter Total Client Assets (B) Net New Assets

In [108]:
with pd.ExcelWriter('sample_output.xlsx', engine='xlsxwriter') as writer:
    startrows = 0
    for df in MSdataframes:
        df.to_excel(writer, index=True, sheet_name='Morgan_Stanley', startrow=startrows)
        startrows += df.shape[0] + 2
    startrows = 0
    for df in BOAdataframes:
        df.to_excel(writer, index=True, sheet_name='Bank_of_America', startrow=startrows)
        startrows += df.shape[0] + 2
    startrows = 0
    for df in GSdataframes:
        df.to_excel(writer, index=True, sheet_name='Goldenman_Sachs', startrow=startrows)
        startrows += df.shape[0] + 2
    startrows = 0
    for df in JPMdataframes:
        df.to_excel(writer, index=True, sheet_name='JPMorgan', startrow=startrows)
        startrows += df.shape[0] + 2

In [24]:
results = []
for index, link in JPlinks.items():
    print(link)
    texts = fileaccessing_builddb_pageselection(link, search_text_thresholds = jp_search_words_with_threshold)
    quarters = Quarters_cal(currentQ = index)
    print(len(texts))
    ids = [str(i) for i in range(1, len(texts) + 1)]
    MSDB3 = Chroma.from_documents(
        documents=texts,
        embedding=embeddings,
        ids=ids
    )
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=MSDB3.as_retriever(search_kwargs={"k": 2}), 
        return_source_documents=True,
        chain_type_kwargs={"prompt": prompt},
    )
    query  = "from the given information, Create a table to export values of these metrics:\
    [Liquidity, Fixed income, Equity , Multi-asset and Alternatives] related with Net asset flows in Assets under management rollforward part in "
    for quarter in quarters:
        query += f"{quarter}, "
    query += " Only three rows, In ONE TABLE using 'B' for billion and 'M' for million. dont give an text other than the table \
    The Row names(y-axis) are QUARTERS(xQxx) and the Column names(x-axis) are METRICS name!"
    print(query)
    result = qa_chain(query)
    results.append(result['result'])
    print("count before", MSDB3._collection.count())
    MSDB3._collection.delete(ids=ids)
    print("count after", MSDB3._collection.count())
    # check whether the database is cleaned or not.

https://www.jpmorganchase.com/content/dam/jpmc/jpmorgan-chase-and-co/investor-relations/documents/quarterly-earnings/2023/4th-quarter/16d9371e-30e9-4898-abf6-d1f7c86fd311.pdf
page1
page3
page8
page21
page22
page23
6
from the given information, Create a table to export values of these metrics:    [Liquidity, Fixed income, Equity , Multi-asset and Alternatives] related with Net asset flows in Assets under management rollforward part in 4Q23, 3Q23, 4Q22,  Only three rows, In ONE TABLE using 'B' for billion and 'M' for million. dont give an text other than the table     The Row names(y-axis) are QUARTERS(xQxx) and the Column names(x-axis) are METRICS name!
 Sure, here's the table you requested:

| Quarters | Liquidity (B) | Fixed Income (B) | Equity (B) | Multi-Asset (B) | Alternatives (B) |
| --- | --- | --- | --- | --- | --- |
| 4Q23 | 49 | 6 | 12 | -1 | 5 |
| 3Q23 | 40 | 1 | 16 | 1 | 2 |
| 4Q22 | 33 | 8 | 22 | 7 | 1 |

Note: The values in the table are in billions (B) or millions (M).
c

In [25]:
for i in results:
    print(i)

  Sure, here's the table you requested:

| Quarters | Liquidity (B) | Fixed Income (B) | Equity (B) | Multi-Asset (B) | Alternatives (B) |
| --- | --- | --- | --- | --- | --- |
| 4Q23 | 49 | 6 | 12 | -1 | 5 |
| 3Q23 | 40 | 1 | 16 | 1 | 2 |
| 4Q22 | 33 | 8 | 22 | 7 | 1 |

Note: The values in the table are in billions (B) or millions (M).
  Sure, here's the table you requested:

| Quarters | Liquidity | Fixed Income | Equity | Multi-Asset | Alternatives |
| --- | --- | --- | --- | --- | --- |
| 2Q23 | 12.8B | 15.4B | 40.3B | 10.7B | 14.5B |
| 1Q23 | 13.3B | 14.9B | 38.7B | 10.3B | 14.1B |
| 2Q22 | 11.9B | 13.5B | 35.9B | 9.4B | 13.7B |

Note: The values are in billions (B) or millions (M).
  Sure, here's a table with the requested information:

| Quarters | Liquidity | Fixed Income | Equity | Multi-Asset | Alternatives |
| --- | --- | --- | --- | --- | --- |
| 3Q22 | 128 B | 143 B | 3,417 M | 1,151 B | 1,013 B |
| 2Q22 | 133 B | 141 B | 3,403 M | 1,147 B | 982 M |
| 3Q21 | 117 B | 135 B 

# Schwab

In [32]:
schlinks = {"4Q23": "https://content.schwab.com/web/retail/public/about-schwab/schw_q4_2023_earnings_release.pdf",\
            "3Q23": "https://content.schwab.com/web/retail/public/about-schwab/schwab_q3_2023_earnings_release.pdf",\
             "2Q23": "https://content.schwab.com/web/retail/public/about-schwab/schw_q2_2023_earnings_release.pdf", \
            "1Q23": "https://content.schwab.com/web/retail/public/about-schwab/schw_q1_2023_earnings_release.pdf"}
schwab_search_words_with_threshold = [('Total asset management',50),('Total client assets',80),('Net New Assets',30) ]

In [40]:
results = []
source_docu = []
for index, link in schlinks.items():
    print(link)
    texts = fileaccessing_builddb_pageselection(link, search_text_thresholds = schwab_search_words_with_threshold)
    quarters = Quarters_cal(currentQ = index)
    ids = [str(i) for i in range(1, len(texts) + 1)]
    MSDB3 = Chroma.from_documents(
        documents=texts,
        embedding=embeddings,
        ids=ids
    )
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=MSDB3.as_retriever(search_kwargs={"k": 2}), 
        return_source_documents=True,
        chain_type_kwargs={"prompt": prompt},
    )
    query  = "Create a table containing Asset management information bout Total net new assets, Net growth in client assets, Total Advice Solutions and Investor Services NNA in "
    for quarter in quarters:
        query += f"{quarter}, "
        
    query += " In ONE TABLE using 'B' for billion and 'M' for million. Only ONE TABLE! \
    The Row names(y-axis) are QUARTERS(xQxx) and the Column names(x-axis) are METRICS name!"
    print(query)
    result = qa_chain(query)
    results.append(result['result'])
    source_docu.append(result['source_documents'])
    print("count before", MSDB3._collection.count())
    MSDB3._collection.delete(ids=ids)
    print("count after", MSDB3._collection.count())
    # check whether the database is cleaned or not.

https://content.schwab.com/web/retail/public/about-schwab/schw_q4_2023_earnings_release.pdf
page0
page7
page8
Create a table containing Asset management information bout Total net new assets, Net growth in client assets, Total Advice Solutions and Investor Services NNA in 4Q23, 3Q23, 4Q22,  In ONE TABLE using 'B' for billion and 'M' for million. Only ONE TABLE!     The Row names(y-axis) are QUARTERS(xQxx) and the Column names(x-axis) are METRICS name!

|||
====|
$$|  $$•
��•  $$•
##  $$•2565656565656565656565765765777762576565657657657657657657657657657657657657657657657657657657657657657657657657657657657657657657657575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575666666666666664653753856485  £5  ($64  $  $4 4 4 4 5 5 5 5 5 5 5  $6  $4  $  $4  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $    $    $  $  $  $  $  $ 

In [41]:
for i in results:
    print(i)


|||
====|
$$|  $$•
��•  $$•
##  $$•2565656565656565656565765765777762576565657657657657657657657657657657657657657657657657657657657657657657657657657657657657657657657575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575757575666666666666664653753856485  £5  ($64  $  $4 4 4 4 5 5 5 5 5 5 5  $6  $4  $  $4  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $  $    $    $  $  $  $  $  $  $  $  $  $  $  $  $    $    $    $    $  $  $  $    $    $    $    $    $    $    $  8  8  6  $  6  $  6  $  6  $  6  $  6  $  6  $  9  $  8  $  6  $  6  $  6  $    $    $  $  $  $  $  $  $  $  $  $  $    $  $  $    $  $  $  $  $  64444  €  644444  4  4  4  4  4  4  4  4  64  6444444444  44  44  44  644  44  64  64  64  64  (54  $44  $  (54 4 4 56  $  $  $  $  $  $  $ 5  $4  $4 5  (5  (5 4  (5  (5 5 4 5 4 5 4 5 4  (5 5 5 5 5 4 5 8 8 5 8 8 5 5 5  (8 5 

In [42]:
pattern = r'\| Quarters \|.*?\|\n(.*?)\n\n'
tables = []

tables = []
for i in results:
    match = re.search(pattern, i, re.DOTALL)
    if match:
        table_str = match.group(0)
        table_rows = [row.strip().split('|') for row in table_str.strip().split('\n')]
        # Extract headers (first row) and data (remaining rows)
        headers = [header.strip().lower() for header in table_rows[0] if header.strip()]
        data = [[cell.strip() for cell in row if cell.strip()] for row in table_rows[1:]]
        # Create DataFrame
        df = pd.DataFrame(data, columns=headers)
        tables.append(df)
tables
   

[  quarters total net new assets (b) net growth in client assets (b)  \
 0      ---                      ---                             ---   
 1     1Q23                    150.7                           530.2   
 2     4Q22                    128.4                           405.6   
 3     1Q22                    114.6                           379.5   
 
   total advice solutions (b) investor services nna (m)  
 0                        ---                       ---  
 1                        8.9                      60.5  
 2                        7.8                      64.1  
 3                        6.8                      55.1  ]

In [22]:
with pd.ExcelWriter('sample_output.xlsx', engine='xlsxwriter') as writer:
    startrows = 0
    for df in MSdataframes:
        df.to_excel(writer, index=True, sheet_name='Morgan_Stanley', startrow=startrows)
        startrows += df.shape[0] + 2
    startrows = 0
    for df in BOAdataframes:
        df.to_excel(writer, index=True, sheet_name='Bank_of_America', startrow=startrows)
        startrows += df.shape[0] + 2
    startrows = 0
    for df in GSdataframes:
        df.to_excel(writer, index=True, sheet_name='Goldenman_Sachs', startrow=startrows)
        startrows += df.shape[0] + 2
    startrows = 0
    for df in JPMdataframes:
        df.to_excel(writer, index=True, sheet_name='JPMorgan', startrow=startrows)
        startrows += df.shape[0] + 2
    startrows = 0
    for df in tables:
        df.to_excel(writer, index=True, sheet_name='Schwab', startrow=startrows)
        startrows += df.shape[0] + 2