# Earning Calls ID to ticker

We dont know what the company id from the earning calls is so we will need a data map. We will use GPT4 for zero shot classification to get the map then manually check the repeated ones.

In [None]:
import pandas as pd
import os
from dotenv import load_dotenv
import json
import openai

from companies import small_banks, medium_banks, large_banks

from tqdm.auto import tqdm

In [2]:
# Load variables from .env file
load_dotenv()

# Read API key from environment variable
openai.api_key = os.getenv("OPENAI_API_KEY")

In [3]:
# Directory containing the JSON files
json_folder_path ='data/text/earning_call_transcripts' 

# List to store transcripts
transcripts = []

# Loop through all files in the folder
for filename in os.listdir(json_folder_path):
    if filename.endswith(".json"):
        file_path = os.path.join(json_folder_path, filename)
        

        with open(file_path, 'r') as f:
            data = json.load(f)
        
        company_id = data.get("companyid", None)
        company_name = data.get("companyname", None)
        # Combine all component texts into one document
        components = data.get("components", [])
        full_text = " ".join(component["text"] for component in components if "text" in component)
        
        transcripts.append({
            "company_id": company_id,
            "company_name": company_name,
        })

# Create DataFrame
df_transcript = pd.DataFrame(transcripts)

In [4]:
df_transcript.drop_duplicates(subset=["company_id", "company_name"], inplace=True)

In [5]:
name_map_str = """
Company_name: Trading Symbol\n
JPMorgan Chase & Co: JPM\n
City National Corp/CA: CYN\n
Comerica Inc: CMA\n
First Commonwealth Financial Corp: FCF\n
Bank of America Corp: BAC\n
Wells Fargo & Co: WFC\n
PNC Financial Services Group Inc/The: PNC\n
Citigroup Inc: C\n
Truist Financial Corp: TFC\n
Sterling Bancorp/NY: STL\n
SunTrust Banks Inc: STI\n
Synovus Financial Corp: SNV\n
TCF Financial Corp/DE: \n
Arrow Financial Corp: AROW\n
BOK Financial Corp: BOKF\n
Citizens Republic Bancorp Inc: CRBC\n
Community Financial System Inc: CBU\n
Cullen/Frost Bankers Inc: CFR\n
City Holding Co: CHCO\n
FNB Corp/PA: FNB\n
FirstMerit Corp: FMER\n
First Financial Bancorp: FFBC\n
Farmers Capital Bank Corp: FFKT\n
First of Long Island Corp/The: FLIC\n
First Midwest Bancorp Inc/IL: FMBI\n
First Merchants Corp: FRME\n
First Horizon Corp: FHN\n
Fulton Financial Corp: FULT\n
First United Corp: FUNC\n
Huntington Bancshares Inc/OH: HBAN\n
MBT Financial Corp: MBTF\n
NBT Bancorp Inc: NBTB\n
National Penn Bancshares Inc: NPBC\n
OFG Bancorp: OFG\n
Old National Bancorp/IN: ONB\n
Park National Corp: PRK\n
Savannah Bancorp Inc/The: SAVB\n
Simmons First National Corp: SFNC\n
KeyCorp: KEY\n
US Bancorp: USB\n
WSFS Financial Corp: WSFS\n
Suffolk Bancorp: SCNB\n
Susquehanna Bancshares Inc: SUSQ\n
Trustmark Corp: TRMK\n
AmeriServ Financial Inc: ASRV\n
UMB Financial Corp: UMBF\n
Valley National Bancorp: VLY\n
Westamerica BanCorp: WABC\n
Webster Financial Corp: WBS\n
WaFd Inc: WAFD\n
WesBanco Inc: WSBC\n
Whitney Holding Corp/LA: WTNY\n
MFC Bancorp Ltd: \n
First Bancshares Inc/The: FBMS\n
SmartFinancial Inc: SMBK\n
BancFirst Corp: BANF\n
MidSouth Bancorp Inc: MSL\n
PacWest Bancorp: PACW\n
CenterState Bank Corp: CSFL\n
Pinnacle Financial Partners Inc: PNFP\n
NewtekOne Inc: NEWT\n
Encore Bancshares Inc: EBTX\n
VIST Financial Corp: \n
Peoples Bancorp Inc/OH: PEBO\n
Patriot National Bancorp Inc: PNBK\n
Meridian Bancorp Inc: EBSB\n
American National Bankshares Inc: AMNB\n
Ames National Corp: ATLO\n
FNCB Bancorp Inc: FNCB\n
Alerus Financial Corp: ALRS\n
Horizon Bancorp Inc/IN: HBNC\n
LCNB Corp: LCNB\n
National Bankshares Inc: NKSH\n
Dime Community Bancshares Inc: DCOM\n
Landmark Bancorp Inc/Manhattan KS: LARK\n
CommunityOne Bancorp: COB\n
Xenith Bankshares Inc: XBKS\n
Alliance Bankshares Corp: ABVA\n
DNB Financial Corp: DNBF\n
Alliance Financial Corp/NY: ALNC\n
Riverview Financial Corp: RIVE\n
MB Financial Inc: MBFI\n
Northwest Bancshares Inc: NWBI\n
Capital One Financial Corp: COF\n
Sandy Spring Bancorp Inc: SASR\n
NB&T Financial Group Inc: NBTF\n
First Security Group Inc/TN: FSGI\n
Banc of California Inc: BANC\n
CIT Group Inc: CIT\n
Centrue Financial Corp: CFCB\n
Hope Bancorp Inc: HOPE\n
First Chester County Corp: FCEC\n
Hanmi Financial Corp: HAFC\n
Fidelity D&D Bancorp Inc: FDBC\n
LNB Bancorp Inc: LNBB\n
United Security Bancshares/Fresno CA: UBFO\n
Bear State Financial Inc: BSF\n
Bancorp Rhode Island Inc: BARI\n
First Bancorp Inc/The: FNLC\n
Emclaire Financial Corp: EMCF\n
Peoples Financial Services Corp: PFIS\n
CU Bancorp/Los Angeles CA: CUNB\n
Shore Bancshares Inc: SHBI\n
Pacific Continental Corp: PCBK\n
Middleburg Financial Corp: MBRG\n
Camden National Corp: CAC\n
Intervest Bancshares Corp: IBCA\n
Old Point Financial Corp: OPOF\n
Blue Ridge Bankshares Inc: BRBS\n
BNC Bancorp: BNCN\n
First Niagara Financial Group Inc: FNFG\n
Central Jersey Bancorp/Long Branch NJ: CJBK\n
Sterling Bancorp/DE: STL\n
Summit Financial Group Inc: SMMF\n
Cadence Financial Corp: \n
First Community Corp/SC: FCCO\n
Cardinal Financial Corp: CFNL\n
Virginia National Bankshares Corp: VABK\n
Evans Bancorp Inc: EVBN\n
CoBiz Financial Inc: COBZ\n
Citizens First Corp: CZFC\n
Bridge Capital Holdings: BBNK\n
Severn Bancorp Inc: SVBI\n
OptimumBank Holdings Inc: OPHC\n
Monarch Financial Holdings Inc: MNRK\n
Hawthorn Bancshares Inc: HWBK\n
NewAlliance Bancshares Inc: NAL\n
Financial Institutions Inc: FISI\n
Pacific Mercantile Bancorp: PMBC\n
Southern First Bancshares Inc: SFST\n
First Clover Leaf Financial Corp: FCLF\n
Regions Financial Corp: RF\n
Bancorp Inc/The: TBBK\n
Limestone Bancorp Inc: LMST\n
MainStreet Bancshares Inc: MNSB\n
Capital Bank Financial Corp: CBF\n
Nicolet Bankshares Inc: NIC\n
BAIYU Holdings Inc: BYU\n
National Bank Holdings Corp: NBHC\n
Opus Bank: OPB\n
Talmer Bancorp Inc: TLMR\n
FCB Financial Holdings Inc: FCB\n
Green Bancorp Inc: GNBC\n
Avenue Financial Holdings Inc: AVNU\n
National Commerce Corp: NCOM\n
Carolina Trust Bancshares Inc: CART\n
Cadence BanCorp: CADE\n
Scully Royalty Ltd: SRL\n
Stellar Bancorp Inc: STEL\n
Amalgamated Bank: AMAL\n
Amerant Bancorp Inc: AMTB\n
Meridian Corp: MRBK\n
Zions Bancorp NA: ZION\n
Carter Bankshares Inc: CARE"""

In [6]:
name_map_str

'\nCompany_name: Trading Symbol\n\nJPMorgan Chase & Co: JPM\n\nCity National Corp/CA: CYN\n\nComerica Inc: CMA\n\nFirst Commonwealth Financial Corp: FCF\n\nBank of America Corp: BAC\n\nWells Fargo & Co: WFC\n\nPNC Financial Services Group Inc/The: PNC\n\nCitigroup Inc: C\n\nTruist Financial Corp: TFC\n\nSterling Bancorp/NY: STL\n\nSunTrust Banks Inc: STI\n\nSynovus Financial Corp: SNV\n\nTCF Financial Corp/DE: \n\nArrow Financial Corp: AROW\n\nBOK Financial Corp: BOKF\n\nCitizens Republic Bancorp Inc: CRBC\n\nCommunity Financial System Inc: CBU\n\nCullen/Frost Bankers Inc: CFR\n\nCity Holding Co: CHCO\n\nFNB Corp/PA: FNB\n\nFirstMerit Corp: FMER\n\nFirst Financial Bancorp: FFBC\n\nFarmers Capital Bank Corp: FFKT\n\nFirst of Long Island Corp/The: FLIC\n\nFirst Midwest Bancorp Inc/IL: FMBI\n\nFirst Merchants Corp: FRME\n\nFirst Horizon Corp: FHN\n\nFulton Financial Corp: FULT\n\nFirst United Corp: FUNC\n\nHuntington Bancshares Inc/OH: HBAN\n\nMBT Financial Corp: MBTF\n\nNBT Bancorp Inc: 

In [7]:
available_tickers = small_banks + medium_banks + large_banks
# Convert to a comma-separated string instead of space-separated for better readability
available_tickers_str = ', '.join(available_tickers)

client = openai.OpenAI()

def get_ticker_from_company_name(company_name):
    prompt = f"""Given the list of available tickers and a rough map of the name and tickers, what is the stock ticker symbol for {company_name}?
                Available tickers: {available_tickers_str}. Rough Name Map: {name_map_str}. If you dont know say 'ERROR'. Return only the symbol."""

    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a financial assistant. Respond with only the ticker symbol."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.2,
        max_tokens=20
    )

    answer = response.choices[0].message.content.strip()
    return answer

In [8]:
df_transcript

Unnamed: 0,company_id,company_name
0,272004,First Financial Bancorp.
1,274031,Fulton Financial Corporation
2,294585,"The PNC Financial Services Group, Inc."
3,336241,Capital One Financial Corporation
4,272220,First Merchants Corporation
...,...,...
944,1038352,"Banc of California, Inc."
1204,359483,VIST Financial Corp.
2110,345724,"Northwest Bancshares, Inc."
2629,376210,"Commerce Bancorp, LLC"


In [9]:
ticker_symbols = []
for index, row in tqdm(df_transcript.iterrows(), total=df_transcript.shape[0]):
    company_name = row['company_name']
    ticker_symbol = get_ticker_from_company_name(company_name)
    ticker_symbols.append(ticker_symbol)

  0%|          | 0/97 [00:00<?, ?it/s]

In [11]:
df_transcript['tic'] = ticker_symbols

In [12]:
len(df_transcript["tic"].unique())

89

In [13]:
df_transcript.head(10)

Unnamed: 0,company_id,company_name,tic
0,272004,First Financial Bancorp.,FFBC
1,274031,Fulton Financial Corporation,FULT
2,294585,"The PNC Financial Services Group, Inc.",PNC
3,336241,Capital One Financial Corporation,COF
4,272220,First Merchants Corporation,FRME
5,430453,"The Bancorp, Inc.",TBBK
6,658776,JPMorgan Chase & Co.,JPM
7,289652,NBT Bancorp Inc.,NBTB
9,528094,"NewtekOne, Inc.",NEWT
10,315476,"Zions Bancorporation, National Association",ZION


In [16]:
# Count number of companies groupby ticker symbols
df_transcript.groupby("tic").size().reset_index(name='counts').sort_values(by="counts", ascending=False).head(20)

Unnamed: 0,tic,counts
23,ERROR,5
27,FCF,2
4,BANC,2
84,WFC,2
18,CRBC,2
58,ONB,1
65,RF,1
64,PNFP,1
63,PNC,1
62,PMBC,1


In [None]:
# df_transcript.to_csv('GPT_map.csv', index=False)

In [23]:
df_transcript = pd.read_csv('GPT_map.csv')

In [24]:
df_transcript[df_transcript["tic"] == "ERROR"]

Unnamed: 0,company_id,company_name,tic
41,395969,"MetroCorp Bancshares, Inc.",ERROR
43,247106,"AMCORE Financial, Inc.",ERROR
69,190906,MUFG Americas Holdings Corporation,ERROR
89,289941,Integra Bank Corporation,ERROR
90,21985,National City Corp.,ERROR


In [26]:
df_transcript_fix = df_transcript.copy()

In [42]:
# All tickers not in our list of tickers from the excel sheet
df_transcript_fix.loc[41, "tic"] = "MCBI"
df_transcript_fix.loc[43, "tic"] = "AFCL"
df_transcript_fix.loc[69, "tic"] = "MUFJ"
df_transcript_fix.loc[89, "tic"] = "ITRG"
df_transcript_fix.loc[90, "tic"] = "NCC"

In [34]:
df_transcript[df_transcript["tic"] == "FCF"] 

Unnamed: 0,company_id,company_name,tic
58,290231,"First California Financial Group, Inc.",FCF
63,323780,First Commonwealth Financial Corporation,FCF


In [43]:
df_transcript_fix.loc[58, "tic"] = "FCAL"  # delisted old

In [36]:
df_transcript[df_transcript["tic"] == "BANC"]  # Already correct

Unnamed: 0,company_id,company_name,tic
32,2674024,"Banc of California, Inc.",BANC
92,1038352,"Banc of California, Inc.",BANC


In [37]:
df_transcript[df_transcript["tic"] == "WFC"]  # 79 is parent of 35 but essentially same company and ticker

Unnamed: 0,company_id,company_name,tic
35,292891,Wells Fargo & Company,WFC
79,176342,Wells Fargo Corporation,WFC


In [38]:
df_transcript[df_transcript["tic"] == "CRBC"]

Unnamed: 0,company_id,company_name,tic
84,260974,"Citizens Republic Bancorp, Inc.",CRBC
95,376210,"Commerce Bancorp, LLC",CRBC


In [39]:
df_transcript_fix.loc[95, "tic"] = "CBH"  # I think its this one

In [44]:
df_transcript_fix.groupby("tic").size().reset_index(name='counts').sort_values(by="counts", ascending=False).head(20)

Unnamed: 0,tic,counts
5,BANC,2
90,WFC,2
0,AFCL,1
60,NEWT,1
69,PNC,1
68,PMBC,1
67,PEBO,1
66,PCBK,1
65,OPB,1
64,ONB,1


In [45]:
from companies import small_banks, medium_banks, large_banks

all_banks = large_banks + medium_banks + small_banks

In [47]:
# See which ticker is not in all_banks
non_tickers = df_transcript_fix[~df_transcript_fix["tic"].isin(all_banks)]["tic"].unique()

In [48]:
# For non_tickers, see what are the bank names
non_tickers_df = df_transcript_fix[df_transcript_fix["tic"].isin(non_tickers)]

In [49]:
non_tickers_df  # Basically, stuff we manually added

Unnamed: 0,company_id,company_name,tic
41,395969,"MetroCorp Bancshares, Inc.",MCBI
43,247106,"AMCORE Financial, Inc.",AFCL
58,290231,"First California Financial Group, Inc.",FCAL
69,190906,MUFG Americas Holdings Corporation,MUFJ
89,289941,Integra Bank Corporation,ITRG
90,21985,National City Corp.,NCC
95,376210,"Commerce Bancorp, LLC",CBH


In [50]:
df_transcript_fix.to_csv('GPT_map_fixed.csv', index=False)

In [51]:
# From the df_transcript_fix, create a dictionary with company_id as key and ticker as value
ticker_dict = dict(zip(df_transcript_fix["company_id"], df_transcript_fix["tic"]))

In [52]:
ticker_dict

{272004: 'FFBC',
 274031: 'FULT',
 294585: 'PNC',
 336241: 'COF',
 272220: 'FRME',
 430453: 'TBBK',
 658776: 'JPM',
 289652: 'NBTB',
 528094: 'NEWT',
 315476: 'ZION',
 142913479: 'SMBK',
 305387: 'WSFS',
 327660: 'PEBO',
 19109: 'TFC',
 293447: 'ONB',
 306324: 'SNV',
 3168416: 'MBTF',
 338088: 'MBFI',
 323261: 'BOKF',
 527694: 'PNFP',
 271659: 'FMER',
 4593620: 'OPB',
 62659342: 'FCB',
 272721: 'FHN',
 311514: 'VLY',
 346301: 'LARK',
 305901: 'STI',
 303685: 'SFNC',
 394712: 'STL',
 272192: 'FLIC',
 309540: 'TRMK',
 800219: 'BBNK',
 2674024: 'BANC',
 19049: 'BAC',
 271409: 'RF',
 292891: 'WFC',
 313313: 'WSBC',
 386991: 'HBNC',
 1475968: 'CIT',
 305304: 'USB',
 318670: 'CMA',
 395969: 'MCBI',
 414570: 'HOPE',
 247106: 'AFCL',
 329376: 'KEY',
 4814728: 'EBTX',
 265176: 'CFR',
 108443: 'WBS',
 527762: 'CADE',
 51660646: 'STEL',
 366107: 'CAC',
 854347: 'HAFC',
 261524: 'CYN',
 306515: 'TCF',
 306426: 'SUSQ',
 972174: 'COBZ',
 78754792: 'NBHC',
 290333: 'NPBC',
 290231: 'FCAL',
 98045865: