In [35]:
import json
from pprint import pprint
from typing import List, Dict
import pandas as pd
import requests
import time
import re
from tqdm import tqdm

## Get All transcripts List

In [58]:
def get_all_transcripts_list(size=30, api_key=None, ticker="SPGI"):
    url = "https://seeking-alpha.p.rapidapi.com/transcripts/v2/list"

    querystring = {"id":ticker,"size":size,"number":"1"}

    headers = {
    "X-RapidAPI-Key": "",
    "X-RapidAPI-Host": "seeking-alpha.p.rapidapi.com"
    }

    response = requests.request("GET", url, headers=headers, params=querystring)

    #print(response.text)
    result = response.text
    data = json.loads(result)
    return data['data']


def filter_type(data: List[Dict])-> List[Dict]:
    data = [dict_ for dict_ in data if dict_['type'] == 'transcript']
    return data


def get_detail_transcript(transcripts_id, api_key=None):
    
    url = "https://seeking-alpha.p.rapidapi.com/transcripts/v2/get-details"

    querystring = {"id":str(transcripts_id)}

    headers = {
    "X-RapidAPI-Key": "",
    "X-RapidAPI-Host": "seeking-alpha.p.rapidapi.com"
    }
    #time.sleep(5)
    response = requests.request("GET", url, headers=headers, params=querystring)
    dict_ = json.loads(response.text)
    return dict_

# helper functions
def write_json(file_path, data):
    with open(file_path,'w',encoding='utf-8') as f:
        json.dump(data,f,indent=4)

def read_json(file_path):
    with open(file_path,encoding='utf-8') as f:
        data = json.load(f.read())
        return data

def striphtml(data):
    p = re.compile(r'<.*?>')
    return p.sub('', data)

def get_content(details_dict):
    #details_dict = json.loads(details_dict)
    content = details_dict.get('data').get('attributes').get('content')
    content = striphtml(content)
    return content

In [59]:
# get_detail_transcript(4549900)

In [60]:
data = get_all_transcripts_list()

In [61]:
data

[{'id': '4549900',
  'type': 'transcript',
  'attributes': {'publishOn': '2022-10-27T12:20:12-04:00',
   'isLockedPro': False,
   'commentCount': 0,
   'gettyImageUrl': None,
   'videoPreviewUrl': None,
   'themes': {},
   'title': 'S&P Global Inc. (SPGI) Q3 2022 Earnings Call Transcript',
   'isPaywalled': False},
  'relationships': {'author': {'data': {'id': '44211', 'type': 'author'}},
   'sentiments': {'data': []},
   'primaryTickers': {'data': [{'id': '1183', 'type': 'tag'}]},
   'secondaryTickers': {'data': []},
   'otherTags': {'data': [{'id': '49', 'type': 'tag'}]}},
  'links': {'self': '/article/4549900-s-and-p-global-inc-spgi-q3-2022-earnings-call-transcript'}},
 {'id': '4528700',
  'type': 'transcript',
  'attributes': {'publishOn': '2022-08-02T13:35:24-04:00',
   'isLockedPro': False,
   'commentCount': 0,
   'gettyImageUrl': None,
   'videoPreviewUrl': None,
   'themes': {},
   'title': "S&P Global Inc.'s (SPGI) CEO Doug Peterson on Q2 2022 Results - Earnings Call Transcri

In [64]:
def get_list_id(data: List[Dict])-> List:
    list_id = [dict_['id'] for dict_ in data]
    return list_id

def get_list_title(data: List[Dict])-> List:
    list_title = [dict_.get('attributes').get('title') for dict_ in data]
    return list_title

def get_list_date(data: List[Dict])-> List:
    list_date = [dict_.get('attributes').get('publishOn') for dict_ in data]
    return list_date

def get_content_list(data: List[Dict])-> List:
    list_id = get_list_id(data)
    list_content = []
    for id_ in list_id:
        dict_content = get_detail_transcript(id_)
        content = get_content(dict_content)
        list_content.append(content)
        
    return list_content

def integrate(data: List[Dict])-> List[Dict]:
    list_of_dict = []
    
    list_id = get_list_id(data)
    list_title = get_list_title(data)
    list_date = get_list_date(data)
    list_content = get_content_list(data)
    
    for id_, title_, date_, content_ in tqdm(zip(list_id, list_title, list_date, list_content)):
        dict_ = {}
        dict_['id'] = id_
        dict_['title'] = title_
        dict_['list_date'] = date_
        dict_['list_content'] = content_
        
        list_of_dict.append(dict_)
        
    return list_of_dict

In [65]:
integrate(data)

30it [00:00, 58497.96it/s]


[{'id': '4549900',
  'title': 'S&P Global Inc. (SPGI) Q3 2022 Earnings Call Transcript',
  'list_date': '2022-10-27T12:20:12-04:00',
  'list_content': "S&amp;P Global Inc. (NYSE:SPGI) Q3 2022 Earnings Conference Call October 27, 2022 8:30 AM ET Company Participants Mark Grant - Senior Vice President, Investor Relations Douglas Peterson - President and CEO Ewout Steenbergen - Executive Vice President and CFO Conference Call Participants Owen Lau - Oppenheimer Ashish Sabadra - RBC Capital Markets Alex Kramm - UBS  Toni Kaplan - Morgan Stanley Jeffrey Silber - BMO Capital Markets  George Tong - Goldman Sachs Craig Huber - Huber Research Partners Manav Patnaik - Barclays  Jeffrey Meuler - Robert W. Baird  Faiza Alwy - Deutsche Bank  Hans Hoffman - Jefferies  Russell Quelch - Redburn  Shlomo Rosenbaum - Stifel Andrew Steinerman - JPMorgan Mark Grant Good morning, and thank you for joining today's S&amp;P Global Third Quarter 2022 Earnings Call. Presenting on today's call are Doug Peterson, 

In [87]:

for dict_ in data:
    id_ = dict_.get('id')
    title_ = dict_.get('attributes').get('title')
    date = dict_.get('attributes').get('publishOn')
    content_dict = get_detail_transcript(id_)
    content = get_content(content_dict)
    
    break
    
    

In [88]:
list_of_dict[20]

{'id_': '4415601',
 'title': 'IHS Markit Ltd. 2021 Q1 - Results - Earnings Call Presentation',
 'date': '2021-03-23T13:10:50-04:00',
 'content': '<p>S&amp;P Global Inc. (<span class="ticker-hover-wrapper">NYSE:<a href="https://seekingalpha.com/symbol/SPGI" title="S&amp;P Global Inc.">SPGI</a></span>) Q3 2022 Earnings Conference Call October 27, 2022 8:30 AM ET</p> <p><strong>Company Participants</strong></p> <p>Mark Grant - Senior Vice President, Investor Relations</p> <p>Douglas Peterson - President and CEO</p> <p>Ewout Steenbergen - Executive Vice President and CFO</p> <p><strong>Conference Call Participants</strong></p> <p>Owen Lau - Oppenheimer</p> <p>Ashish Sabadra - RBC Capital Markets</p> <p>Alex Kramm - UBS </p> <p>Toni Kaplan - Morgan Stanley</p> <p>Jeffrey Silber - BMO Capital Markets </p> <p>George Tong - Goldman Sachs</p> <p>Craig Huber - Huber Research Partners</p> <p>Manav Patnaik - Barclays </p> <p>Jeffrey Meuler - Robert W. Baird </p> <p>Faiza Alwy - Deutsche Bank </p> 

In [89]:
list_of_dict[4]

{'id_': '4506159',
 'title': 'S&P Global Inc. 2022 Q1 - Results - Earnings Call Presentation',
 'date': '2022-05-03T09:25:52-04:00',
 'content': '<p>S&amp;P Global Inc. (<span class="ticker-hover-wrapper">NYSE:<a href="https://seekingalpha.com/symbol/SPGI" title="S&amp;P Global Inc.">SPGI</a></span>) Q3 2022 Earnings Conference Call October 27, 2022 8:30 AM ET</p> <p><strong>Company Participants</strong></p> <p>Mark Grant - Senior Vice President, Investor Relations</p> <p>Douglas Peterson - President and CEO</p> <p>Ewout Steenbergen - Executive Vice President and CFO</p> <p><strong>Conference Call Participants</strong></p> <p>Owen Lau - Oppenheimer</p> <p>Ashish Sabadra - RBC Capital Markets</p> <p>Alex Kramm - UBS </p> <p>Toni Kaplan - Morgan Stanley</p> <p>Jeffrey Silber - BMO Capital Markets </p> <p>George Tong - Goldman Sachs</p> <p>Craig Huber - Huber Research Partners</p> <p>Manav Patnaik - Barclays </p> <p>Jeffrey Meuler - Robert W. Baird </p> <p>Faiza Alwy - Deutsche Bank </p> 

In [78]:
df

Unnamed: 0,id_,title,date,content
0,4549900,S&P Global Inc. (SPGI) Q3 2022 Earnings Call T...,2022-10-27T12:20:12-04:00,S&amp;P Global Inc. (NYSE:SPGI) Q3 2022 Earnin...
1,4528700,S&P Global Inc.'s (SPGI) CEO Doug Peterson on ...,2022-08-02T13:35:24-04:00,S&amp;P Global Inc. (NYSE:SPGI) Q3 2022 Earnin...
2,4528544,S&P Global Inc. 2022 Q2 - Results - Earnings C...,2022-08-02T09:46:26-04:00,S&amp;P Global Inc. (NYSE:SPGI) Q3 2022 Earnin...
3,4506245,S&P Global Inc. (SPGI) CEO Doug Peterson on Q1...,2022-05-03T12:31:04-04:00,S&amp;P Global Inc. (NYSE:SPGI) Q3 2022 Earnin...
4,4506159,S&P Global Inc. 2022 Q1 - Results - Earnings C...,2022-05-03T09:25:52-04:00,S&amp;P Global Inc. (NYSE:SPGI) Q3 2022 Earnin...
5,4485212,S&P Global Inc. (SPGI) CEO Doug Peterson on Q4...,2022-02-08T13:34:04-05:00,S&amp;P Global Inc. (NYSE:SPGI) Q3 2022 Earnin...
6,4480822,IHS Markit Ltd. 2022 Q4 - Results - Earnings C...,2022-01-21T11:40:45-05:00,S&amp;P Global Inc. (NYSE:SPGI) Q3 2022 Earnin...
7,4474946,S&P Global (SPGI) Presents At Goldman Sachs 20...,2021-12-13T13:09:36-05:00,S&amp;P Global Inc. (NYSE:SPGI) Q3 2022 Earnin...
8,4471393,S&P Global (SPGI) Presents At RBC Global Techn...,2021-11-23T07:22:24-05:00,S&amp;P Global Inc. (NYSE:SPGI) Q3 2022 Earnin...
9,4462434,S&P Global Inc. 2021 Q3 - Results - Earnings C...,2021-10-27T12:36:55-04:00,S&amp;P Global Inc. (NYSE:SPGI) Q3 2022 Earnin...


In [81]:
df[df['id_'] == '4376906']['content']

29    S&amp;P Global Inc. (NYSE:SPGI) Q3 2022 Earnin...
Name: content, dtype: object

In [None]:
### ticker -> list_of_id -> transcript_id -> get_detail
{
    ticker: [
        {id: 5454, title: 'aoinovmnb', content:"iwojnih bviwbv", data: '2022-01-01'},
        {id: 5454, title: 'aoinovmnb', content:"iwojnih bviwbv", data: '2022-01-01'},
        {id: 5454, title: 'aoinovmnb', content:"iwojnih bviwbv", data: '2022-01-01'},
        {id: 5454, title: 'aoinovmnb', content:"iwojnih bviwbv", data: '2022-01-01'},
    ],
    
    ticker: [
        {id: 5454, title: 'aoinovmnb', content:"iwojnih bviwbv", data: '2022-01-01'},
        {id: 5454, title: 'aoinovmnb', content:"iwojnih bviwbv", data: '2022-01-01'},
        {id: 5454, title: 'aoinovmnb', content:"iwojnih bviwbv", data: '2022-01-01'},
        {id: 5454, title: 'aoinovmnb', content:"iwojnih bviwbv", data: '2022-01-01'},
    ],
    
    ticker: [
        {id: 5454, title: 'aoinovmnb', content:"iwojnih bviwbv", data: '2022-01-01'},
        {id: 5454, title: 'aoinovmnb', content:"iwojnih bviwbv", data: '2022-01-01'},
        {id: 5454, title: 'aoinovmnb', content:"iwojnih bviwbv", data: '2022-01-01'},
        {id: 5454, title: 'aoinovmnb', content:"iwojnih bviwbv", data: '2022-01-01'},
    ],
}

In [90]:

from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from time import sleep
import re
import requests
from bs4 import BeautifulSoup

In [91]:
service = Service(executable_path=ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)
driver.implicitly_wait(5)
driver.get("https://seekingalpha.com/symbol/SPGI/earnings/transcripts")

[WDM] - Downloading: 100%|████████████████████████████████████████████████████████| 6.79M/6.79M [00:02<00:00, 3.53MB/s]


In [5]:
import requests
from bs4 import BeautifulSoup

In [2]:
r = requests.get('https://seekingalpha.com/symbol/AAPL')

In [6]:
soup = BeautifulSoup(r.text)

In [8]:
soup.find_all()

 <meta charset="utf-8"/>,
 <title data-static-tag-from="prerender">Apple Inc. (AAPL) Stock Price Today, Quote &amp; News | Seeking Alpha</title>,
 <link href="https://static.seekingalpha.com" rel="preconnect"/>,
 <link href="https://static1.seekingalpha.com" rel="preconnect"/>,
 <link href="https://static2.seekingalpha.com" rel="preconnect"/>,
 <link href="https://static3.seekingalpha.com" rel="preconnect"/>,
 <link crossorigin="anonymous" href="https://www.googletagmanager.com" rel="preconnect"/>,
 <link crossorigin="anonymous" href="https://www.google-analytics.com" rel="preconnect"/>,
 <meta content="width=device-width,initial-scale=1,viewport-fit=cover" name="viewport"/>,
 <meta content="ie=edge" http-equiv="X-UA-Compatible"/>,
 <meta content="Seeking Alpha" name="application-name"/>,
 <meta content="True" name="HandheldFriendly"/>,
 <meta content="yes" name="apple-mobile-web-app-capable"/>,
 <meta content="SeekingAlpha" name="apple-mobile-web-app-title"/>,
 <meta content="black" n