1. Geocoding: Address -> Latitude, Longitude
   1. company_profile: Add latitude & longitude, update json files.
   2. json_data: Update json files profile section with latitude and longitude.

In [1]:
# Open JSON for each file
import json
import os

# Install Geocoding Library
import sys
!{sys.executable} -m pip install geopy



Companies with no data attached to them.

In [2]:
directory_for_company_profile = '../data/raw/company_profile'
invalid_files = []
valid_files = []

for filename in os.listdir(directory_for_company_profile):
    with open(os.path.join(directory_for_company_profile, filename)) as f:
        data = json.load(f)

        # Invalid Files: No data/address found.
        if type(data) == str:
            print(f"NO DATA FOUND FOR: {filename}. Message: {data}")
            invalid_files.append(filename)
            continue
        
        elif "address1" not in data:
            print(f"NO ADDRESS FOUND FOR: {filename}. Message: {data}")
            invalid_files.append(filename)
            continue
        
        # Valid Files!
        valid_files.append(filename)

print(f"\n{len(invalid_files)} invalid files: {invalid_files}")
print(f"{len(valid_files)} valid files: {valid_files}")


NO DATA FOUND FOR: AIMAU.json. Message: No fundamentals data found for any of the summaryTypes=assetProfile
NO DATA FOUND FOR: BRK.B.json. Message: No fundamentals data found for any of the summaryTypes=assetProfile
NO DATA FOUND FOR: AGM.A.json. Message: Quote not found for ticker symbol: AGM.A
NO DATA FOUND FOR: ALCY.json. Message: No fundamentals data found for any of the summaryTypes=assetProfile
NO DATA FOUND FOR: CRD.A.json. Message: Quote not found for ticker symbol: CRD.A
NO DATA FOUND FOR: CRD.B.json. Message: No fundamentals data found for any of the summaryTypes=assetProfile
NO DATA FOUND FOR: AACT.json. Message: No fundamentals data found for any of the summaryTypes=assetProfile
NO DATA FOUND FOR: GODN.json. Message: Quote not found for ticker symbol: GODN
NO ADDRESS FOUND FOR: WHF.json. Message: {'city': 'Miami', 'state': 'FL', 'country': 'United States', 'website': 'https://www.whitehorsefinance.com', 'industry': 'Asset Management', 'industryDisp': 'Asset Management', 'se

Get the valid addresses for each company.

In [3]:
company_address = {}

for filename in valid_files:
    with open(os.path.join(directory_for_company_profile, filename)) as f:
        data = json.load(f)

        # Street Address
        address = ""
        address += data["address1"] + ", " if "address1" in data else ''
        # address += data["address2"] + " " if "address2" in data else ''
        address += data["city"] + ", " if "city" in data else ''
        address += data["state"] + ", " if "state" in data else ''
        address += data["zip"] + ", " if "zip" in data else ''
        address += data["country"] if "country" in data else ''

        # Add to dictionary
        company_address[filename] = address

print(company_address)
print(len(company_address))



{'BLFY.json': '19 Park Avenue, Rutherford, NJ, 07070, United States', 'CPAA.json': '999 Vanderbilt Beach Road, Naples, FL, 34108, United States', 'PJT.json': '280 Park Avenue, New York, NY, 10017, United States', 'AXS.json': '92 Pitts Bay Road, Pembroke, HM 08, Bermuda', 'STC.json': '1360 Post Oak Boulevard, Houston, TX, 77056, United States', 'LCW.json': '11755 Wilshire Boulevard, Los Angeles, CA, 90025, United States', 'AIHS.json': 'Shihao Square, Chengdu, 610000, China', 'PKBK.json': '601 Delsea Drive, Washington Township, NJ, 08080, United States', 'PUCK.json': '12600 Hill Country Blvd Building R, Bee Cave, TX, 78738, United States', 'SEDA.json': 'One Grand Central Place, New York, NY, 10165, United States', 'COOL.json': '251 Lytton Avenue, Palo Alto, CA, 94301, United States', 'ACAQ.json': '442 5th Avenue, New York, NY, 10018, United States', 'SPFI.json': '5219 City Bank Parkway, Lubbock, TX, 79407, United States', 'FANH.json': 'Pearl River Tower, Guangzhou, 510623, China', 'LDI.j

In [4]:
# Import Geocoding Library
from time import sleep
from geopy.geocoders import Photon

# Geocoding Example: AXS
geolocator = Photon()

print(company_address["AXS.json"])
location = geolocator.geocode(company_address["AXS.json"])
print((location.latitude, location.longitude))

92 Pitts Bay Road, Pembroke, HM 08, Bermuda
(32.2947329, -64.7961749)


In [5]:
company_latitude_longitude: dict[str, (float, float)] = {}

# Geocoding && Save to JSONs
for company in company_address:
    location = geolocator.geocode(company_address["AXS.json"])
    company_latitude_longitude[company] = (location.latitude, location.longitude)

print(company_latitude_longitude)
print(len(company_latitude_longitude))

{'BLFY.json': (32.2947329, -64.7961749), 'CPAA.json': (32.2947329, -64.7961749), 'PJT.json': (32.2947329, -64.7961749), 'AXS.json': (32.2947329, -64.7961749), 'STC.json': (32.2947329, -64.7961749), 'LCW.json': (32.2947329, -64.7961749), 'AIHS.json': (32.2947329, -64.7961749), 'PKBK.json': (32.2947329, -64.7961749), 'PUCK.json': (32.2947329, -64.7961749), 'SEDA.json': (32.2947329, -64.7961749), 'COOL.json': (32.2947329, -64.7961749), 'ACAQ.json': (32.2947329, -64.7961749), 'SPFI.json': (32.2947329, -64.7961749), 'FANH.json': (32.2947329, -64.7961749), 'LDI.json': (32.2947329, -64.7961749), 'MTRY.json': (32.2947329, -64.7961749), 'SCU.json': (32.2947329, -64.7961749), 'BOCN.json': (32.2947329, -64.7961749), 'ANZU.json': (32.2947329, -64.7961749), 'TBMC.json': (32.2947329, -64.7961749), 'SAL.json': (32.2947329, -64.7961749), 'BCSF.json': (32.2947329, -64.7961749), 'BNS.json': (32.2947329, -64.7961749), 'GPAC.json': (32.2947329, -64.7961749), 'LX.json': (32.2947329, -64.7961749), 'TLGY.jso

In [None]:
# Add Latitude & Longitude to Company Profile JSONs
directory_for_company_profile = '../data/raw/company_profile'

for filename in valid_files:
    with open(os.path.join(directory_for_company_profile, filename)) as f:
        data = json.load(f)

        # Add Latitude & Longitude
        data["latitude"] = company_latitude_longitude[filename][0]
        data["longitude"] = company_latitude_longitude[filename][1]

        # Save to JSON
        with open(os.path.join(directory_for_company_profile, filename), 'w') as outfile:
            json.dump(data, outfile)