# Adding countries to our ticker list
The polygon ticker details always gives the 'US' as address, so it's useless. I want to know whether stocks are headquartered in the US or not. An ADR is NOT a good method to find this, because foreign companies can be 'normally' listed.

In [1]:
from tickers import get_tickers
from sec_edgar_api import EdgarClient
import numpy as np
import pandas as pd

edgar = EdgarClient(user_agent="shinathan python project")

In [3]:
tickers = get_tickers(5)
tickers['country'] = pd.Series(dtype='str')
tickers['sic'] = pd.Series(dtype='str')

for index, row in tickers.iterrows():
    cik = row['cik']
    if np.isnan(cik):
        continue
    submission = edgar.get_submissions(int(cik))
    tickers.loc[index, 'country'] = submission['addresses']['business']['stateOrCountry']
    tickers.loc[index, 'sic'] = submission['sic']
    
    if index % 100 == 0:
        print(index)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3100
3200
3300
3400


# Converting country codes to country.

There are also some old codes, these are 'E6', 'E7', 'I8', 'L4', 'L5', 'U2', 'US', 'a0', 'a1', 'ct', 'ny',
'pa', 'ut', 'wa'. Looking at the [SEC](https://www.sec.gov/edgar/searchedgar/edgarcompstate.htm), we will assign:
* E6, E7, a0, a1: CANADA
* I8: GERMANY
* L4, L5: ISRAEL
* U2: RUSSIAN FEDERATION
* ct, ny, pa, ut, wa: we must make sure to capitalize the country code string.

We will also change 'CANADA (federal level)' to 'CANADA'.

In [None]:
# https://www.sec.gov/edgar/searchedgar/edgarstatecodes
states_in_US = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "DC", "FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "X1", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]
stats_in_CN = ["A0", "A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "B0", "Z4"]
countries_map = pd.read_excel('../data/other/SEC countries.xlsx', index_col=0)

In [None]:
for index, row in tickers.iterrows():
    country_code = str(row['country'])
    country_code = country_code.upper()
    if country_code == 'NAN':
        continue

    if country_code in states_in_US:
        tickers.loc[index, 'country'] = 'US'
    elif country_code in stats_in_CN:
        tickers.loc[index, 'country'] = 'CANADA'
    else:
        tickers.loc[index, 'country'] = countries_map.loc[country_code, 'Country']


In [None]:
tickers.head(5)[['ID', 'ticker', 'name', 'cik', 'sic', 'country']]

In [None]:
tickers.to_csv('../data/tickers_v5.csv')