In [1]:
import pandas as pd

In [2]:
df = pd.read_json("data/sec_items/sec_items_AAPL.json")
df.Item.value_counts()

Item
ITEM 1     27
ITEM 7     27
ITEM 14    27
ITEM 13    27
ITEM 12    27
ITEM 11    27
ITEM 10    27
ITEM 9     27
ITEM 8     27
ITEM 6     27
ITEM 5     27
ITEM 3     27
ITEM 2     27
ITEM 4     25
ITEM 7A    24
ITEM 15    22
ITEM 9A    21
ITEM 9B    19
ITEM 1A    18
ITEM 1B    18
Name: count, dtype: int64

# Setup Llama

In [3]:
import dotenv
dotenv.load_dotenv("./.env")

True

In [4]:
import requests
import os

In [5]:
def ask_llama(context: str, prompt: str) -> str:
    DEEP_INFRA_TOKEN = os.getenv("DEEP_INFRA_TOKEN")
    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {DEEP_INFRA_TOKEN}"}
    responses = []
    for context_chunk in context[::4080]:
        
        data = {"stream": False, "input": f"{context_chunk} [INST]{prompt}[/INST]"}
        resp = requests.post("https://api.deepinfra.com/v1/inference/meta-llama/Llama-2-13b-chat-hf", json=data, headers=headers).json()
    
        llama_response:str = resp["results"][0]['generated_text']
        responses.append(llama_response)
    return responses

# Summarize Individual

In [6]:
df_aapl_item_7 = df[df.Item == "ITEM 7"]
df_aapl_item_7a = df[df.Item == "ITEM 7A"]

In [7]:
df_aapl_item_7a.head()

Unnamed: 0,Company,Year,Item,Text
9,AAPL,2008,ITEM 7A,Quantitative and Qualitative Disclosures About...
29,AAPL,2006,ITEM 7A,Quantitative and Qualitative Disclosures About...
49,AAPL,2019,ITEM 7A,Quantitative and Qualitative Disclosures About...
67,AAPL,1999,ITEM 7A,DISCLOSURES ABOUT MARKET RISK\n\nINTEREST RATE...
84,AAPL,2012,ITEM 7A,Quantitative and Qualitative Disclosures About...


In [9]:
txt = df_aapl_item_7.iloc[0].Text + df_aapl_item_7a.iloc[0].Text
str(txt)

'Management\x92s Discussion and Analysis of Financial Condition and Results of Operations \n This section and other parts of this Form 10-K contain forward-looking statements that involve risks and uncertainties. Forward-looking statements can\nalso be identified by words such as \x93anticipates,\x94 \x93expects,\x94 \x93believes,\x94 \x93plans,\x94 \x93predicts,\x94 and similar terms. Forward-looking statements are not guarantees of future performance and the\nCompany\x92s actual results may differ significantly from the results discussed in the forward-looking statements. Factors that might cause such differences include, but are not limited to, those discussed in the subsection entitled \x93Risk\nFactors\x94 above, which are incorporated herein by reference. The following discussion should be read in conjunction with the consolidated financial statements and notes thereto included in Item\xa08 of this Form 10-K. All information presented\nherein is based on the Company\x92s fiscal c

In [10]:
prompt = "Identify key information that is relevant to risks the company faces. Answer only in bullet points of what important insight can be taken away. If there is no insight, respond with by saying 'nothing'"
response = ask_llama(txt, prompt)

In [11]:
prompts = response
filtered_insights = []
for i, prompt_response in enumerate(prompts):
    print(f"Prompt {i}")
    # print(prompt_response)
    bullet_index = prompt_response.index(":")
    bullets = prompt_response[bullet_index:].split("* ")
    for bullet in bullets[1:]:
        filtered_insights.append(bullet)

Prompt 0
Prompt 1
Prompt 2
Prompt 3
Prompt 4
Prompt 5
Prompt 6
Prompt 7
Prompt 8
Prompt 9
Prompt 10
Prompt 11
Prompt 12
Prompt 13
Prompt 14
Prompt 15
Prompt 16
Prompt 17
Prompt 18


In [13]:
len(filtered_insights)

142

In [14]:
final_insights = ask_llama(filtered_insights, "Give me the top risk factors for the company listed in a bullet point list using the '*' character. Only give me the list and no other text. It can be any number of bullets but limit the number.")

In [15]:
final_insights

["  Sure! Here are the top risk factors for the company, listed in a bullet point list using the '*' character:\n\n• Competition from established players and new entrants\n• Rapidly changing technology and customer preferences\n• Dependence on a few key products and services\n• Regulatory and legal risks\n• Cybersecurity threats and data privacy concerns\n• Global economic and political uncertainty\n• Supply chain disruptions and manufacturing risks\n• Customer concentration and credit risks\n• Reputation and brand damage\n• Environmental and social responsibility risks."]

In [17]:
bullet_list = [b.strip() for b in final_insights[0].split("\u2022")[1:]]
bullet_list

['Competition from established players and new entrants',
 'Rapidly changing technology and customer preferences',
 'Dependence on a few key products and services',
 'Regulatory and legal risks',
 'Cybersecurity threats and data privacy concerns',
 'Global economic and political uncertainty',
 'Supply chain disruptions and manufacturing risks',
 'Customer concentration and credit risks',
 'Reputation and brand damage',
 'Environmental and social responsibility risks.']

# Llama

In [19]:
# Use a pipeline as a high-level helper
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_name="bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)




: 