1. Geocoding: Address -> Latitude, Longitude
   1. company_profile: Add latitude & longitude, update json files.
   2. json_data: Update json files profile section with latitude and longitude.

In [1]:
# Open JSON for each file
import json
import os

# Install Geocoding Library
import sys
!{sys.executable} -m pip install geopy



Companies with no data attached to them.

In [2]:
directory_for_company_profile = '../data/raw/company_profile'
invalid_files = []
valid_files = []

for filename in os.listdir(directory_for_company_profile):
    with open(os.path.join(directory_for_company_profile, filename)) as f:
        data = json.load(f)

        # Invalid Files: No data/address found.
        if type(data) == str:
            print(f"NO DATA FOUND FOR: {filename}. Message: {data}")
            invalid_files.append(filename)
            continue
        
        elif "address1" not in data:
            print(f"NO ADDRESS FOUND FOR: {filename}. Message: {data}")
            invalid_files.append(filename)
            continue
        
        # Valid Files!
        valid_files.append(filename)

print(f"\n{len(invalid_files)} invalid files: {invalid_files}")
print(f"\n{len(valid_files)} valid files: {valid_files}")


NO DATA FOUND FOR: AIMAU.json. Message: No fundamentals data found for any of the summaryTypes=assetProfile
NO DATA FOUND FOR: BRK.B.json. Message: No fundamentals data found for any of the summaryTypes=assetProfile
NO DATA FOUND FOR: AGM.A.json. Message: Quote not found for ticker symbol: AGM.A
NO DATA FOUND FOR: ALCY.json. Message: No fundamentals data found for any of the summaryTypes=assetProfile
NO DATA FOUND FOR: CRD.A.json. Message: Quote not found for ticker symbol: CRD.A
NO DATA FOUND FOR: CRD.B.json. Message: No fundamentals data found for any of the summaryTypes=assetProfile
NO DATA FOUND FOR: AACT.json. Message: No fundamentals data found for any of the summaryTypes=assetProfile
NO DATA FOUND FOR: GODN.json. Message: Quote not found for ticker symbol: GODN
NO ADDRESS FOUND FOR: WHF.json. Message: {'city': 'Miami', 'state': 'FL', 'country': 'United States', 'website': 'https://www.whitehorsefinance.com', 'industry': 'Asset Management', 'industryDisp': 'Asset Management', 'se

Get the valid addresses for each company.

In [3]:
company_address = {}

for filename in valid_files:
    with open(os.path.join(directory_for_company_profile, filename)) as f:
        data = json.load(f)

        # Street Address
        address = ""
        address += data["address1"] + " " if "address1" in data else ''
        # address += data["address2"] + " " if "address2" in data else ''
        address += data["city"] + " " if "city" in data else ''
        address += data["state"] + " " if "state" in data else ''
        address += data["zip"] + " " if "zip" in data else ''
        address += data["country"] if "country" in data else ''

        # Add to dictionary
        company_address[filename] = address

print(company_address)
print(len(company_address))



{'BLFY.json': '19 Park Avenue Rutherford NJ 07070 United States', 'CPAA.json': '999 Vanderbilt Beach Road Naples FL 34108 United States', 'PJT.json': '280 Park Avenue New York NY 10017 United States', 'AXS.json': '92 Pitts Bay Road Pembroke HM 08 Bermuda', 'STC.json': '1360 Post Oak Boulevard Houston TX 77056 United States', 'LCW.json': '11755 Wilshire Boulevard Los Angeles CA 90025 United States', 'AIHS.json': 'Shihao Square Chengdu 610000 China', 'PKBK.json': '601 Delsea Drive Washington Township NJ 08080 United States', 'PUCK.json': '12600 Hill Country Blvd Building R Bee Cave TX 78738 United States', 'SEDA.json': 'One Grand Central Place New York NY 10165 United States', 'COOL.json': '251 Lytton Avenue Palo Alto CA 94301 United States', 'ACAQ.json': '442 5th Avenue New York NY 10018 United States', 'SPFI.json': '5219 City Bank Parkway Lubbock TX 79407 United States', 'FANH.json': 'Pearl River Tower Guangzhou 510623 China', 'LDI.json': '26642 Towne Centre Drive Foothill Ranch CA 926

In [6]:
# Import Geocoding Library
from geopy.geocoders import ArcGIS

# Geocoding Example: JPMorgan Chase
geolocator = ArcGIS()

print(company_address["JPM.json"])
location = geolocator.geocode(company_address["JPM.json"])
print((location.latitude, location.longitude))

383 Madison Avenue New York NY 10179 United States
(40.75584606302709, -73.97688227425184)


In [7]:
# import sleep to prevent timeout
from time import sleep

company_latitude_longitude: dict[str, (float, float)] = {}
invalid_locations = []

# Geocoding for all companies
for company in company_address:
    location = geolocator.geocode(company_address[company])

    # If location is not found, skip
    if location is None:
        print(f"NO LOCATION FOUND FOR: {company}")
        invalid_locations.append(company)
        continue

    company_latitude_longitude[company] = (location.latitude, location.longitude)
    # sleep(1)

print(f"\nUnable to geocode {len(invalid_locations)} locations: {invalid_locations} ")
print(f"\nAble to geocode {len(company_latitude_longitude)} locations: {company_latitude_longitude} ")


Unable to geocode 0 locations: [] 

Able to geocode 1165 locations: {'BLFY.json': (40.8279269937556, -74.10199600928556), 'CPAA.json': (26.2511849898152, -81.7993230008517), 'PJT.json': (40.75636607630008, -73.97494714426573), 'AXS.json': (32.29133692926005, -64.79248377453608), 'STC.json': (29.75279960346621, -95.45996336574034), 'LCW.json': (34.048902999632475, -118.46214903446946), 'AIHS.json': (30.666670000000067, 104.06667000000004), 'PKBK.json': (39.74567300628203, -75.11392197344169), 'PUCK.json': (30.308152986833903, -97.93749898576061), 'SEDA.json': (40.75217432172232, -73.97754312227553), 'COOL.json': (37.44658200882682, -122.1633870071561), 'ACAQ.json': (40.751795173041415, -73.98234861622674), 'SPFI.json': (33.560836009476716, -101.92591697067297), 'FANH.json': (23.116670000000056, 113.25000000000011), 'LDI.json': (33.675146016141, -117.66625103432045), 'MTRY.json': (40.70289137805946, -74.0141535801879), 'SCU.json': (40.76379822602672, -73.97518343011625), 'BOCN.json': (3

In [8]:
# Add Latitude & Longitude to Company Profile JSONs
directory_for_company_profile = '../data/raw/company_profile'

for filename in valid_files:
    with open(os.path.join(directory_for_company_profile, filename)) as f:
        data = json.load(f)

        # Add Latitude & Longitude
        data["latitude"] = company_latitude_longitude[filename][0]
        data["longitude"] = company_latitude_longitude[filename][1]

        # Save to JSON
        with open(os.path.join(directory_for_company_profile, filename), 'w') as outfile:
            json.dump(data, outfile)

In [9]:
# Add Latitude & Longitude to the profile section of the general JSON Data
directory_for_json_data = '../data/raw/json_data'

for filename in valid_files:
    with open(os.path.join(directory_for_json_data, filename)) as f:
        data = json.load(f)

        # Add Latitude & Longitude
        data["profile"]["latitude"] = company_latitude_longitude[filename][0]
        data["profile"]["longitude"] = company_latitude_longitude[filename][1]

        # Save to JSON
        with open(os.path.join(directory_for_json_data, filename), 'w') as outfile:
            json.dump(data, outfile)
