## Insight from the various information

Many sales person want to know about their counter accounts. And they also want to know the business trend and insights to deliver more relevant solutions to their customers. 

But in many cases, the customers - especially greenfield customers - doesn't want to show their pains and short-term/long-term goals to the sales person in the vendors. 

Our goal is to analyze their business status from their financial statements and pain points from ther crapped news from the internet.


In [None]:
# !pip install google-cloud-aiplatform langchain chroma
# !pip install html5lib
# !pip install openai
# !pip install opendartreader
# !pip install numexpr
# !pip install dart-fss
# !pip install ipywidgets
# !pip install google-api-python-client


In [None]:
import os
import OpenDartReader
from OpenDartReader.dart_list import *

DART_API_KEY=os.getenv("DART_API_KEY")
dartReader = OpenDartReader(DART_API_KEY)
df_companylist = corp_codes(DART_API_KEY)

In [None]:
import dart_fss as dart
import os

# Open DART API KEY 설정
api_key=os.getenv('DART_API_KEY')
dart.set_api_key(api_key=api_key)
dart_corp_list=dart.get_corp_list()

In [None]:
import vertexai
from langchain.chat_models import ChatVertexAI
from langchain.llms import VertexAI

PROJECT_ID = os.getenv("PROJECT_ID")  # @param {type:"string"}
vertexai.init(project=PROJECT_ID, location="us-central1")

llm_vertex = VertexAI(
    model_name="text-bison@latest",
    max_output_tokens=2048,
    temperature=0,
    top_p=0.8,
    top_k=40,
)

# llm_vertexchat = ChatVertexAI()


In [None]:
# from langchain.chat_models import ChatOpenAI

# llm_openai = ChatOpenAI(temperature=0)


In [None]:
llm = llm_vertex

In [None]:
from langchain.tools import Tool
from langchain.utilities import GoogleSearchAPIWrapper

search = GoogleSearchAPIWrapper()
tools = []

def top5_results(query):
    query = query.replace("'","")
    return search.results(query=query, num_results=5)

search_tool = Tool(
    name="search",
    description="search snippets through google internet search engine with result format json - title, link, snippet - list and 5 results. useful for finding related information.",
    func=top5_results,
)

tools.append(search_tool)


In [None]:
from bs4 import BeautifulSoup
import requests


def getHtmlContentFromUrl(url_link):
  url_link = url_link.replace("'","")
  response = requests.get(url_link, timeout=10)
  html_content = response.text
  soup = BeautifulSoup(html_content, 'lxml')
  for script in soup.find_all('script'):
    script.extract()
  list_content = soup.find_all('p')
  joined_content = ''
  for content in list_content:
    joined_content += content.text
  return joined_content

crawl_tool = Tool(
  name="crawl",
  description="crawl html content from url. useful for crawling news articles with url which is provided by search tool.",
  func=getHtmlContentFromUrl,
)

tools.append(crawl_tool)

In [None]:
def searchCorpCodeFromCompanyName(company_name):
    print("SEARCH CORP NAME:" + company_name)
    company_name = company_name.replace("'","")
    return df_companylist[df_companylist['corp_name'].str.startswith(company_name)].to_string(index=False)

# def searchDocumentsFromDart(documentId):
#   return dart.list(documentId, start='1999-01-01', kind='A') 

print(searchCorpCodeFromCompanyName("CJ대한통운"))

search_corp_code_tool = Tool(
    name="search_corp_code",
    description="search corp code from company name. useful for searching corp code from company name.",
    func=searchCorpCodeFromCompanyName,
)

tools.append(search_corp_code_tool)

In [None]:
def getCompanyOverallInformationWithIndustryCode(corp_code):
  corp_code=corp_code.replace("'","")
  return dartReader.company(corp_code)

print(getCompanyOverallInformationWithIndustryCode("00113410"))

company_overall_information_tool = Tool(
    name="company_overall_information_with_industry_code",
    description="get company overall information and related industry code from corp code. useful for getting company overall information from corp code.",
    func=getCompanyOverallInformationWithIndustryCode,
)

tools.append(company_overall_information_tool)

In [None]:
prompt_template_cfo_role_current_state = """As the CFO role, please check the current financial statement and dfine the current state of the company.

context:
{context}"""
prompt_template_cfo_role_business_action = """As the CFO role, please make the Business Goals and Actions to be taken next year for the given financial statement. Define larger categories for Business Goals. Provide the response in the format of Category | Business Goal | Action.

context:
{context}"""
prompt_template_cfo_role_finance_summary = """Summary the given financial statement and provide the summary with growth rate and direction of the company. Check the overall score of this company

Context :
{context}
"""
prompt_template_cfo_role_finance_score = """As a M&A investor in a investment bank, Check the overall financial statement and provide whether the company is in good shape or not. Provide the overall score of this company from 0 to 100

Context :
{context}
"""

def makeCurrentStateAsCfo(financial_statement):
  response = llm.predict(prompt_template_cfo_role_current_state.format(context=financial_statement))
  return response

def makeBusinessGoalAsCfo(financial_statement):
  response = llm.predict(prompt_template_cfo_role_business_action.format(context=financial_statement))
  return response

def makeFinancialSummary(financial_statement):
  response = llm.predict(prompt_template_cfo_role_finance_summary.format(context=financial_statement))
  return response

def makeFinancialScore(financial_statement):
  response = llm.predict(prompt_template_cfo_role_finance_score.format(context=financial_statement))
  return response

In [None]:
# 산업분류 코드 확인
import pandas as pd

url = 'https://github.com/FinanceData/KSIC/raw/master/KSIC_10.csv.gz'

df_ksic = pd.read_csv(url, dtype='str')
#df_ksic[df_ksic['Industy_code'] == '493']

In [None]:
def getIndustryName(industy_code):
  industy_code = industy_code.replace("'","")
  if checkDataFrameHas0Record(df_ksic[df_ksic['Industy_code'] == industy_code]):
    return 'no industry name. before to use this tool, use company_overall_information_with_industry_code tool and get industy_code field.'
  return df_ksic[df_ksic['Industy_code'] == industy_code]['Industy_name'].values[0]

def checkDataFrameHas0Record(df):
  if df.shape[0] == 0:
    return True
  else:
    return False

#print(getIndustryName('493'))

industry_name_retrevial_tool = Tool(
    name="industry_name_retrevial",
    description="get industry name from industry code. before to use this tool, you should know the 'industry_code'. useful for getting industry name from industry code.",
    func=getIndustryName,
)

tools.append(industry_name_retrevial_tool)

In [None]:
def getCorpListByIndustry(industry_name):
    corp_list = dart.get_corp_list().find_by_sector(industry_name)
    if corp_list is None:
        return "There is no matched corporation. Please use industry_name_retrevial tool before to use it."
    return 

corp_list_by_industry_retrevial_tool = Tool(
    name="corp_list_by_industry_name_retrevial_tool",
    description="get corporation list-[corp_cord]corp_name- by industry_name which can be retrieved by industry_name_retrevial tool.",
    func=getCorpListByIndustry
)

tools.append(corp_list_by_industry_retrevial_tool)

In [None]:
xbrl = None

def prepareFinancialXBRLInformation(corp_code):
  corp_code=corp_code.replace("'","")
  target_corp = dart_corp_list.find_by_corp_code(corp_code)
  reports = target_corp.search_filings(bgn_de='20190301')
  for report in reports:
    if report.xbrl is not None:
      xbrl = report.xbrl
      return xbrl
  return False

def getCashFlowStatement(corp_code_with_digit_8):
  corp_code = corp_code_with_digit_8
  corp_code = corp_code.replace("'","")
  if len(corp_code) != 8:
    return "Before to use this tool, retrieve the company code(corp_code) from the searchCorpCodeFromCompanyName tool instead of using stock_code - 6 digit."
  target_corp = dart_corp_list.find_by_corp_code(corp_code)
  if target_corp is None:
    return "No Company Found. You should check the company code(corp_code) from the searchCorpCodeFromCompanyName tool instead of stock_code(ex:005963)."
  if prepareFinancialXBRLInformation(corp_code) == False:
    return "No Financial Statement Found"
  cashflows = ""
  xbrl = prepareFinancialXBRLInformation(corp_code)
  if xbrl is None:
    return "No Financial Statement Found"
  for cashflow in xbrl.get_cash_flows():
    cashflows += cashflow.to_DataFrame(show_class=False).to_string() + "\n"
  return cashflows

cashflow_statement_tool_with_corp_code = Tool(
    name="cashflow_statement",
    description="get cashflow statement from corp code. useful for getting cashflow statement from corp_code.",
    func=getCashFlowStatement,
)

tools.append(cashflow_statement_tool_with_corp_code)


In [None]:
from langchain.chains import LLMMathChain

llm_math_chain = LLMMathChain(llm=llm, verbose=True)

def calculator(calculatorInputString):
  llm_math_chain.run(calculatorInputString)

calculator_tool = Tool(
    name="calculator",
    description="calculator tool. useful for calculating mathematical expressions.",
    func=calculator,
)

tools.append(calculator_tool)

In [None]:
from pydantic import BaseModel, Field
from langchain.agents import AgentType, initialize_agent

agent = initialize_agent(
    tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True
)

agent.run("CJ대한통운 과 같은 산업군 에 속한 회사의 전반적인 현금흐름을 알고 싶습니다. ")
