Basado en Nick Zincone
https://developers.refinitiv.com/en/article-catalog/article/using-ai-modeling-to-interpret-10-Q-filings
https://pypi.org/project/sec-api/

In [4]:
#!pip3 install transformers
#!pip3 install torch
#!conda install -c pytorch torchtext
#!conda install pytorch torchvision -c pytorch
#!pip3 install sec-api

In [9]:
import eikon as ek  # the Eikon Python wrapper package
import numpy as np  # NumPy
import pandas as pd  # pandas
import cufflinks as cf  # Cufflinks
import configparser as cp
import datetime as dt
cf.set_config_file(offline=True)  # set the plotting mode to offline

In [10]:
# NLP package used to aid in text manipulation
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize

# Machine Learning modules used to prepare and measure text
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
import torch

# HTML text processing
from bs4 import BeautifulSoup

# Helper modules
import matplotlib.pyplot as plt
from tqdm.notebook import trange # Progress bar
import pandas as pd

from sec_api import QueryApi

pd.set_option('display.max_colwidth', 60)

In [11]:
cfg = cp.ConfigParser()
cfg.read('../refinitiv.cfg')
ek.set_app_key(cfg['eikon']['app_id'])

In [12]:
cfg = cp.ConfigParser()
cfg.read('../secapi.cfg')
sec = cfg['sec']['app_id']


In [30]:
today = dt.date.today()
start = today - dt.timedelta(days=365.2*5)
print(today, start)

2023-04-20 2018-04-20


In [31]:
fields = ['TR.PortfolioConstituentName','TR.PortfolioWeight']
data, err = ek.get_data(['Portfolio(RETO2023_B)'],fields)
t = data['Instrument'][3]

In [32]:
#help(ek.get_symbology)

In [33]:
tick = ek.get_symbology(t, from_symbol_type="RIC", to_symbol_type="ticker")['ticker'][0]
tick

'TSLA'

In [34]:
# Query the Filings service using the Refinitiv Data Library for Python.
#
# Retrieve SEC filings for the specific company - specify the text we want to retrieve
# is defined within the "Management Discussion" section.

queryApi = QueryApi(api_key=sec)

query = {
  "query": { "query_string": {
      "query": "ticker:TSLA AND filedAt:{2022-01-01 TO 2023-12-31} AND formType:\"10-Q\""
    } },
  "from": "0",
  "size": "10",
  "sort": [{ "filedAt": { "order": "desc" } }]
}

filings = queryApi.get_filings(query)

print(filings)

{'total': {'value': 3, 'relation': 'eq'}, 'query': {'from': 0, 'size': 10}, 'filings': [{'id': '9b09ba89105cd748ffc3dc4958a13f37', 'accessionNo': '0000950170-22-019867', 'cik': '1318605', 'ticker': 'TSLA', 'companyName': 'Tesla, Inc.', 'companyNameLong': 'Tesla, Inc. (Filer)', 'formType': '10-Q', 'description': 'Form 10-Q - Quarterly report [Sections 13 or 15(d)]', 'filedAt': '2022-10-24T06:08:50-04:00', 'linkToTxt': 'https://www.sec.gov/Archives/edgar/data/1318605/000095017022019867/0000950170-22-019867.txt', 'linkToHtml': 'https://www.sec.gov/Archives/edgar/data/1318605/000095017022019867/0000950170-22-019867-index.htm', 'linkToXbrl': '', 'linkToFilingDetails': 'https://www.sec.gov/Archives/edgar/data/1318605/000095017022019867/tsla-20220930.htm', 'entities': [{'companyName': 'Tesla, Inc. (Filer)', 'cik': '1318605', 'irsNo': '912197729', 'stateOfIncorporation': 'DE', 'fiscalYearEnd': '1231', 'type': '10-Q', 'act': '34', 'fileNo': '001-34756', 'filmNo': '221324774', 'sic': '3711 Motor

In [35]:
import json
data = json.dumps(filings)
#data

In [36]:
filings['filings']

[{'id': '9b09ba89105cd748ffc3dc4958a13f37',
  'accessionNo': '0000950170-22-019867',
  'cik': '1318605',
  'ticker': 'TSLA',
  'companyName': 'Tesla, Inc.',
  'companyNameLong': 'Tesla, Inc. (Filer)',
  'formType': '10-Q',
  'description': 'Form 10-Q - Quarterly report [Sections 13 or 15(d)]',
  'filedAt': '2022-10-24T06:08:50-04:00',
  'linkToTxt': 'https://www.sec.gov/Archives/edgar/data/1318605/000095017022019867/0000950170-22-019867.txt',
  'linkToHtml': 'https://www.sec.gov/Archives/edgar/data/1318605/000095017022019867/0000950170-22-019867-index.htm',
  'linkToXbrl': '',
  'linkToFilingDetails': 'https://www.sec.gov/Archives/edgar/data/1318605/000095017022019867/tsla-20220930.htm',
  'entities': [{'companyName': 'Tesla, Inc. (Filer)',
    'cik': '1318605',
    'irsNo': '912197729',
    'stateOfIncorporation': 'DE',
    'fiscalYearEnd': '1231',
    'type': '10-Q',
    'act': '34',
    'fileNo': '001-34756',
    'filmNo': '221324774',
    'sic': '3711 Motor Vehicles &amp; Passenger

In [37]:
df = pd.json_normalize(filings['filings'])
df

Unnamed: 0,id,accessionNo,cik,ticker,companyName,companyNameLong,formType,description,filedAt,linkToTxt,linkToHtml,linkToXbrl,linkToFilingDetails,entities,documentFormatFiles,dataFiles,seriesAndClassesContractsInformation,periodOfReport
0,9b09ba89105cd748ffc3dc4958a13f37,0000950170-22-019867,1318605,TSLA,"Tesla, Inc.","Tesla, Inc. (Filer)",10-Q,Form 10-Q - Quarterly report [Sections 13 or 15(d)],2022-10-24T06:08:50-04:00,https://www.sec.gov/Archives/edgar/data/1318605/00009501...,https://www.sec.gov/Archives/edgar/data/1318605/00009501...,,https://www.sec.gov/Archives/edgar/data/1318605/00009501...,"[{'companyName': 'Tesla, Inc. (Filer)', 'cik': '1318605'...","[{'sequence': '1', 'description': '10-Q', 'documentUrl':...","[{'sequence': '5', 'description': 'XBRL TAXONOMY EXTENSI...",[],2022-09-30
1,5354328741f8078f3771da51e98cc882,0000950170-22-012936,1318605,TSLA,"Tesla, Inc.","Tesla, Inc. (Filer)",10-Q,Form 10-Q - Quarterly report [Sections 13 or 15(d)],2022-07-25T06:07:25-04:00,https://www.sec.gov/Archives/edgar/data/1318605/00009501...,https://www.sec.gov/Archives/edgar/data/1318605/00009501...,,https://www.sec.gov/Archives/edgar/data/1318605/00009501...,"[{'companyName': 'Tesla, Inc. (Filer)', 'cik': '1318605'...","[{'sequence': '1', 'description': '10-Q', 'documentUrl':...","[{'sequence': '5', 'description': 'XBRL TAXONOMY EXTENSI...",[],2022-06-30
2,0a71e99b88e45a0972da2ed8cfb32d0b,0000950170-22-006034,1318605,TSLA,"Tesla, Inc.","Tesla, Inc. (Filer)",10-Q,Form 10-Q - Quarterly report [Sections 13 or 15(d)],2022-04-22T21:36:59-04:00,https://www.sec.gov/Archives/edgar/data/1318605/00009501...,https://www.sec.gov/Archives/edgar/data/1318605/00009501...,,https://www.sec.gov/Archives/edgar/data/1318605/00009501...,"[{'companyName': 'Tesla, Inc. (Filer)', 'cik': '1318605'...","[{'sequence': '1', 'description': '10-Q', 'documentUrl':...","[{'sequence': '5', 'description': 'XBRL TAXONOMY EXTENSI...",[],2022-03-31


In [28]:
from sec_api import ExtractorApi

extractorApi = ExtractorApi(sec)

#
# 10-Q example
#
# Tesla 10-Q filing
filing_url_10q = "https://www.sec.gov/Archives/edgar/data/1318605/000095017022012936/tsla-ex32_1.htm"

# get the original HTML of section 7 "Management’s Discussion and Analysis of Financial Condition and Results of Operations"
section_html = extractorApi.get_section(filing_url_10q, "part1item2", "html")
beautifulSoupText = BeautifulSoup(section_html, "html.parser").text.replace(u'\xa0', ' ').replace('\r', ' ')
beautifulSoupText


'ITEM 2.MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL CONDITION AND RESULTS OF OPERATIONS The following discussion and analysis should be read in conjunction with the consolidated financial statements and the related notes included elsewhere in this Quarterly Report on Form 10-Q. Overview Our mission is to accelerate the world’s transition to sustainable energy. We design, develop, manufacture, lease and sell high-performance fully electric vehicles, solar energy generation systems and energy storage products. We also offer maintenance, installation, operation, financial and other services related to our products. Additionally, we are increasingly focused on products and services based on artificial intelligence, robotics and automation. In 2022, we have produced 563,987 vehicles and delivered 564,743 vehicles through the second quarter, despite ongoing supply chain challenges and factory shutdowns. We are currently focused on increasing vehicle production and capacity, improving a

In [47]:
# Declare our final results table
results = pd.DataFrame()
text = []
dates = []

# Pull out the filings text for each report
for i in range(0,len(df)):
    extractorApi = ExtractorApi(sec)
    filing_url_10q = df.linkToHtml.values.tolist()[i]

    # get the original HTML of section 7 "Management’s Discussion and Analysis of Financial Condition and Results of Operations"
    section_html = extractorApi.get_section(filing_url_10q, "part1item2", "html")
    beautifulSoupText = BeautifulSoup(section_html, "html.parser").text.replace(u'\xa0', ' ').replace('\r', ' ')
    # Clean the data and capture it for later processing
    text.append(beautifulSoupText)
    dates.append(df.filedAt.values.tolist()[i])
    

In [60]:
results['text'] = text
results['FilingDate'] = dates

In [61]:
results

Unnamed: 0,text,dates,FilingDate
0,ITEM 2.MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL...,2022-10-24T06:08:50-04:00,2022-10-24T06:08:50-04:00
1,ITEM 2.MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL...,2022-07-25T06:07:25-04:00,2022-07-25T06:07:25-04:00
2,ITEM 2.MANAGEMENT’S DISCUSSION AND ANALYSIS OF FINANCIAL...,2022-04-22T21:36:59-04:00,2022-04-22T21:36:59-04:00


In [53]:
# Load models

In [97]:
# Load the models
finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-fls',num_labels=3)

In [98]:
# Download the Pre-trained transformer used to process our raw text
tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-fls')


In [4]:
prediction = nlp("The future for next years sales will increase by 10 %.", top_k=3)
prediction

NameError: name 'nlp' is not defined

In [99]:
# Sentiment - Download the Pre-trained transformer used to process our raw text
sent_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")

In [100]:
# Sentiment - Download the FinBert model used to process our transformed data
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")


In [39]:
# Capture closing prices

In [69]:
# Container to hold the Closing Prices based on the filing date
prices = []

# Walk through the collection of filings and pull out the reported filing date
num_rows = len(results)

for i in trange(num_rows):
    date = results.iloc[i]['FilingDate']
    response, err = ek.get_data(tick, ['TR.PriceClose.date', "TR.PriceClose"], parameters = {'SDate': date, 'EDate': date})
    
    if not response.empty:
        prices.append(response.iloc[0]['Price Close'])
    else:
        print(f'Unable to retrieve any data price for {tick}. May be an issue with permissions')
        
if prices:
    results['close'] = prices

  0%|          | 0/3 [00:00<?, ?it/s]

KeyError: 'Price Close'

In [70]:
response

Unnamed: 0,None,TR.PRICECLOSE.DATE,TR.PRICECLOSE
0,TSLA,,


In [71]:
date

'2022-10-24T06:08:50-04:00'

In [72]:
response, err = ek.get_data(tick, ['TR.PriceClose.date', "TR.PriceClose"], parameters = {'SDate': '2022-10-24T06:08:50-04:00', 'EDate': '2022-10-24T06:08:50-04:00'})