In [25]:
import re
import json

from langchain.document_loaders import UnstructuredFileLoader
from langchain.chains.summarize import load_summarize_chain
from langchain.llms import AzureOpenAI, OpenAI
from langchain.chat_models import AzureChatOpenAI

from langchain.output_parsers import PydanticOutputParser
from langchain.pydantic_v1 import BaseModel, Field, validator
from langchain.prompts import (
    ChatPromptTemplate,
    PromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)



In [15]:
loader = UnstructuredFileLoader(
    "../data/tslaletter.htm - Generated by SEC Publisher for SEC Filing.pdf"
)
docs = loader.load()

def make_doc_clean(doc_text):
    # print(docs[0].page_content)
    doc_text = re.sub("\n+",  "\n", doc_text)
    doc_text = re.sub(" +",  " ", doc_text)
    return doc_text
doc_text = make_doc_clean(docs[0].page_content)
# print(doc_text)

In [20]:
AZURE_ENDPOINT = "https://futu-002-caeast-001.openai.azure.com/"
AZURE_API_KEY = "5d050ffec2b94f5eb43c54c80149561e"
AZURE_API_VERSION = "2023-07-01-preview"


azure_model = AzureOpenAI(
    openai_api_base=AZURE_ENDPOINT,
    openai_api_version=AZURE_API_VERSION,
    deployment_name="gpt-4",
    openai_api_key=AZURE_API_KEY,
    openai_api_type = "azure",
    temperature=0.0
)

azure_chat_model = AzureChatOpenAI(
    openai_api_base=AZURE_ENDPOINT,
    openai_api_version=AZURE_API_VERSION,
    deployment_name="gpt-4",
    openai_api_key=AZURE_API_KEY,
    openai_api_type = "azure",
    temperature=0.0
)



In [38]:
cod_system_prompt_template = """
You will generate increasingly concise, entity-dense summaries of the above Article.

Repeat the following 2 steps 5 times.

Step 1. Identify 1-3 informative Entities (";" delimited) from the Article which are missing from the previously generated summary. 
Step 2. Write a new, denser summary of identical length which covers every entity and detail from the previous summary plus the Missing Entities.

A Missing Entity is:
- Relevant: to the main story.
- Specific: descriptive yet concise (5 words or fewer).
- Novel: not in the previous summary.
- Faithful: present in the Article.
- Anywhere: located anywhere in the article.

Guidelines:
- The first summary should be long (4-7 sentences, ~120 words) yet highly non-specific, containing little information beyond the entities marked as missing. Use overly verbose language and fillers (e.g., "this article discusses") to reach ~80 words.
- Make every word count: re-write the previous summary to improve flow and make space for additional entities.
- Make space with fusion, compression, and removal of uninformative phrases like "the article discusses".
- The summaries should become highly dense and concise yet self-contained, e.g., easily understood without the Article.
- Missing entities can appear anywhere in the new summary.
- Never drop entities from the previous summary. If space cannot be made, add fewer new entities.

Answer in JSON. The JSON should be a list (length 5) of dictionaries whose keys are "Missing_Entities" and "Denser_Summary".
"""
# Remember, use the exact same number of words for each summary. 
# Answer in Chinese.
cod_user_template = """Article: {input_text}
Dense summary: """

my_system_prompt_template = """
Please disregard any previous instructions or tips. Act as a highly skilled SEO expert and a top-notch copywriter with a strong command of the English language. Pretend that your English content creation is so exceptional that it consistently outperforms other websites. Your task is to transform a complex announcement document into a professionally structured financial information report that is easy to understand.

Let's break down the process into the following steps and think step by step:

Step 1: Utilize the 5W1H principle to extract essential event information. Include all relevant details about 'Who,' including their career position, if applicable.
Step 2: Craft a concise SEO title, written in bold letters, based on the findings from Step 1. 
Step 3: Develop four to six well-structured paragraphs for your complete text, commencing with "Event Detail" and concluding with "Conclusion." In the "Event Detail" paragraph, delve into the title's details, including key figures and numerical data. In the "Conclusion" paragraph, analyze both the positive and negative impacts of this event on the company's stock price or future prospects for investors. Provide supporting evidence for each impact.
Step 4: Between the "Event Detail" and "Conclusion" paragraphs, include a section where you explain the function of the specific announcement type, in accordance with the document's form. Elaborate on the meaning, purpose, release regulations, and legal requirements related to this type of announcement, helping investors comprehend the filing process in a straightforward and comprehensible manner.
Step 5: Organize the aforementioned title and content into a coherent article. Extract the most relevant portions from the original text and rephrase the content to ensure your article is 100% unique and free from plagiarism. 

Your article should have a length ranging from 800 to 1200 tokens. All content should be composed in English with a human writing style. Additionally, rectify any grammatical issues and convert sentences to the active voice.
"""

my_user_template = """The text that needs to be rewritten is this: {input_text}"""



In [40]:
prompts= {"cod": {"system": cod_system_prompt_template,
                  "user": cod_user_template},
          "jarvix": {"system": my_system_prompt_template,
                     "user": my_user_template}}
# azure_chat_model
input_type="jarvix"
system_template = SystemMessagePromptTemplate.from_template(prompts[input_type]["system"])
# user_message_prompt = """Given a news: {news_doc}\nQuestion: How related is the article to {company_name} (0-10 points)\n{company_description}"""
user_message_template = HumanMessagePromptTemplate.from_template(prompts[input_type]["user"])
chat_prompt = ChatPromptTemplate.from_messages([system_template, user_message_template])
messages = chat_prompt.format_prompt(input_text=make_doc_clean(docs[0].page_content)).to_messages()
# print(messages)
ai_response = azure_chat_model(messages)
print(ai_response)
# rsp = json.loads(ai_response.content)
# print(ai_response)
# for i, step in enumerate(rsp):
#     print(f"第{i+1}次总结")
#     for k, v in step.items():
#         print(f"{k}: {v}")
# print(rsp)

content="**SEO Title: Tesla's Board Independence Questioned as J.B. Straubel Nominated for Directorship**\n\n**Event Detail**\n\nOn April 24, 2023, a group of shareholders, including SOC Investment Group, Investor Advocates for Social Justice, and Nordea Asset Management, among others, issued a letter urging fellow Tesla shareholders to vote against the election of J.B. Straubel to Tesla's Board of Directors. Straubel, a co-founder and former Chief Technology Officer of Tesla, has been nominated to replace independent director Hiro Mizuno. The shareholders argue that Straubel's close ties to the company and CEO Elon Musk compromise the board's independence, which is already under scrutiny due to the presence of Musk's personal friends and family members.\n\n**Understanding the Announcement**\n\nThis announcement is a Notice of Exempt Solicitation, a document filed with the Securities and Exchange Commission (SEC) under Rule 14a-6(1) of the Securities Exchange Act of 1934. This rule all

In [41]:
print(ai_response.content)

**SEO Title: Tesla's Board Independence Questioned as J.B. Straubel Nominated for Directorship**

**Event Detail**

On April 24, 2023, a group of shareholders, including SOC Investment Group, Investor Advocates for Social Justice, and Nordea Asset Management, among others, issued a letter urging fellow Tesla shareholders to vote against the election of J.B. Straubel to Tesla's Board of Directors. Straubel, a co-founder and former Chief Technology Officer of Tesla, has been nominated to replace independent director Hiro Mizuno. The shareholders argue that Straubel's close ties to the company and CEO Elon Musk compromise the board's independence, which is already under scrutiny due to the presence of Musk's personal friends and family members.

**Understanding the Announcement**

This announcement is a Notice of Exempt Solicitation, a document filed with the Securities and Exchange Commission (SEC) under Rule 14a-6(1) of the Securities Exchange Act of 1934. This rule allows shareholders 