## Populate the Los Angeles Ontology with the data related to businesses, geographical information, and dates

### Import required libraries

In [1]:
# required libraries
import pandas as pd
import os
from pathlib import Path
# Load the required libraries
from rdflib import Graph, Literal, RDF, URIRef, Namespace
# rdflib knows about some namespaces, like FOAF
from rdflib.namespace import FOAF, XSD
# CHECK DATE 
from datetime import datetime, date, timedelta
import urllib



### Define the paths for datasets and outputs

In [2]:
# parameters and URLs
path = str(Path(os.path.abspath(os.getcwd())).parent.absolute())
activeBusinessesData = '../datasets/ACTIVE BUSINESSES/fixed-Listing_of_Active_Businesses_parsed.csv'
closedBusinessesData = '../datasets/CLOSED BUSINESSES/parsed-All_Closed_Businesses_20231101_PARSED.csv'
laCovidData = '../datasets/COVID DATA/sorted_los_angeles_covid_data.csv'
crimeData1 = '../datasets/CRIME DATA/parsed-Crime_Data_from_2020_to_Present-part1.csv'
crimeData2 = '../datasets/CRIME DATA/parsed-Crime_Data_from_2020_to_Present-part2.csv'
crimeData3 = '../datasets/CRIME DATA/parsed-Crime_Data_from_2020_to_Present-part3.csv'
naicsData = '../datasets/CLOSED BUSINESSES/2022_NAICS_Descriptions.csv'


# saving folder
savePath =  path

In [3]:
# Construct the Los Angeles ontology namespace not known by RDFlib
LAO = Namespace("http://www.bitsei.it/losAngelesOntology/")

## COVID Days

In [4]:
# Load the CSV files in memory
def daterange(start_date, end_date):
    for n in range(int((end_date - start_date).days)):
        yield start_date + timedelta(n)


laCovid = pd.read_csv(laCovidData, sep=';') #, dtype={'Active':int, 'Deaths':int}

laCovid["Last_Update"] = pd.to_datetime(laCovid['Last_Update'])
laCovid['solodata'] = laCovid['Last_Update'].dt.date
laCovid['Confirmed'] = laCovid['Confirmed'].astype('Int64')
laCovid['Active'] = laCovid['Active'].astype('Int64')
laCovid['Deaths'] = laCovid['Deaths'].astype('Int64')
laCovid['New_Cases'] = laCovid['New_Cases'].astype('Int64')

#laCovid['Active'] = laCovid["Confirmed"] - laCovid["Deaths"]

laCovid = laCovid[['Last_Update', 'solodata', 'Active', 'Deaths', 'New_Cases']]

start_date = pd.to_datetime('2018-01-01')
end_date = pd.to_datetime('2023-12-31')
for i in daterange(start_date, end_date):
    if i.date() not in laCovid['solodata'].values:
        new_row = {
            'solodata': i.date(),
            'Last_Update' : i, #pd.to_datetime(str(i), '%Y-%m-%d %H%M%S')
            'Active': 0,
            'Deaths': 0,
            'New_Cases': 0
        }
        laCovid = pd.concat([laCovid, pd.DataFrame([new_row])], ignore_index=True)



laCovid.sort_values(by='solodata', inplace=True)
laCovid.set_index("solodata", inplace=True)
laCovid["Last_Update"] = laCovid["Last_Update"].astype("string")
laCovid["Last_Update"] = laCovid["Last_Update"].str.replace(' ', 'T')


#create the graph
g = Graph()

# Bind the namespaces to a prefix for more readable output
g.bind("foaf", FOAF)
g.bind("xsd", XSD) 
g.bind("lao", LAO)

laCovid

Unnamed: 0_level_0,Last_Update,Active,Deaths,New_Cases
solodata,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-01-01,2018-01-01T00:00:00,0,0,0
2018-01-02,2018-01-02T00:00:00,0,0,0
2018-01-03,2018-01-03T00:00:00,0,0,0
2018-01-04,2018-01-04T00:00:00,0,0,0
2018-01-05,2018-01-05T00:00:00,0,0,0
...,...,...,...,...
2023-12-26,2023-12-26T00:00:00,0,0,0
2023-12-27,2023-12-27T00:00:00,0,0,0
2023-12-28,2023-12-28T00:00:00,0,0,0
2023-12-29,2023-12-29T00:00:00,0,0,0


In [5]:
%%time 
#measure execution time

#iterate over the laCovid dataframe
for index, row in laCovid.iterrows():
    # Create the node to add to the Graph
    # the node has the namespace + the dayId as URI
    idU = "day" + str(index)
    Day = URIRef(LAO[idU])

    # Add triples using store's add() method.
    g.add((Day, RDF.type, LAO.Day))
    g.add((Day, LAO['hasDate'], Literal(str(row['Last_Update']), datatype=XSD.dateTime)))    
    g.add((Day, LAO['hasActiveCases'], Literal(row['Active'], datatype=XSD.integer)))    
    g.add((Day, LAO['hasNOfDeaths'], Literal(row['Deaths'], datatype=XSD.integer)))    
    g.add((Day, LAO['hasNewCases'], Literal(row['New_Cases'], datatype=XSD.integer)))  
    
print("--- saving serialization ---")
with open('covidDays.ttl', 'w') as file:
    file.write(g.serialize(format='turtle'))

--- saving serialization ---
CPU times: total: 1.39 s
Wall time: 2.8 s


## Cities

In [9]:
# Load the CSV files in memory
cities1 = pd.read_csv(closedBusinessesData, sep=',', index_col='LOCATION ACCOUNT #')
cities1 = cities1[['CITY','ZIP CODE']]


cities2 = pd.read_csv(activeBusinessesData, sep=',', index_col='LOCATION ACCOUNT #')
cities2 = cities2[['CITY','ZIP CODE']]


cities = pd.merge(cities1, cities2)
cities["ZIP CODE"] = cities["ZIP CODE"].str.split("-", expand=True).get(0)
cities = cities[cities['ZIP CODE'] != '']
cities = cities.drop_duplicates()
cities.set_index("ZIP CODE", inplace=True)

print(cities.head(100))
print(len(cities))


#create the graph
g = Graph()

# Bind the namespaces to a prefix for more readable output
g.bind("foaf", FOAF)
g.bind("xsd", XSD)
g.bind("lao", LAO)

                   CITY
ZIP CODE               
90028       LOS ANGELES
91311        CHATSWORTH
91402     PANORAMA CITY
90025       LOS ANGELES
90058            VERNON
...                 ...
90290           TOPANGA
91501           BURBANK
90032       LOS ANGELES
90210     BEVERLY HILLS
90037       LOS ANGELES

[100 rows x 1 columns]
603


In [10]:
%%time 
#measure execution time

#iterate over the cities dataframe
for index, row in cities.iterrows():
    # Create the node to add to the Graph
    # the node has the namespace + the cityId as URI
    idU = "city" + str(index)
    City = URIRef(LAO[idU])
    
    # Add triples using store's add() method.
    g.add((City, RDF.type, LAO.City))
    g.add((City, LAO['cityZipCode'], Literal(str(index), datatype=XSD.string)))    
    g.add((City, LAO['cityName'], Literal(row['CITY'], datatype=XSD.string)))    
    
print("--- saving serialization ---")
with open('cities.ttl', 'w') as file:
        file.write(g.serialize(format='turtle'))

--- saving serialization ---
CPU times: total: 109 ms
Wall time: 236 ms


## Naics

In [11]:
# Load the CSV files in memory
naics = pd.read_csv(naicsData, sep=',', index_col='Code')
naics = naics[['Title']]
naics["Title"] = naics["Title"].replace("T$", "", regex=True)
print(naics.head(20))
print(len(naics))
#create the graph
g = Graph()

# Bind the namespaces to a prefix for more readable output
g.bind("foaf", FOAF)
g.bind("xsd", XSD) 
g.bind("lao", LAO)


                                             Title
Code                                              
11      Agriculture, Forestry, Fishing and Hunting
111                                Crop Production
1111                     Oilseed and Grain Farming
11111                              Soybean Farming
111110                             Soybean Farming
11112             Oilseed (except Soybean) Farming
111120            Oilseed (except Soybean) Farming
11113                     Dry Pea and Bean Farming
111130                    Dry Pea and Bean Farming
11114                                Wheat Farming
111140                               Wheat Farming
11115                                 Corn Farming
111150                                Corn Farming
11116                                 Rice Farming
111160                                Rice Farming
11119                          Other Grain Farming
111191       Oilseed and Grain Combination Farming
111199                     All 

In [12]:
%%time 
#measure execution time

#iterate over the naics dataframe
for index, row in naics.iterrows():
    # Create the node to add to the Graph
    # the node has the namespace + the naicsId as URI
    idU = "naics" + str(index)
    Naics = URIRef(LAO[idU])
    
    # Add triples using store's add() method.
    g.add((Naics, RDF.type, LAO.Naics))
    g.add((Naics, LAO['naicsCode'], Literal(index, datatype=XSD.string)))    
    g.add((Naics, LAO['naicsDescription'], Literal(row['Title'], datatype=XSD.string)))    
     
print("--- saving serialization ---")
with open('naics.ttl', 'w') as file:
    file.write(g.serialize(format='turtle'))

--- saving serialization ---
CPU times: total: 312 ms
Wall time: 534 ms


## Area

In [13]:
# Load the CSV files in memory
areasList = ['Harbor Gateway-HAR', 'Palms - Mar Vista - Del Rey-PLM', 'Bel Air - Beverly Crest-BAR', 'Granada Hills - Knollwood-GHL', 'Mission Hills - Panorama City - North Hills-MSS', 'Encino - Tarzana-ENC', 'Brentwood - Pacific Palisades-BTW', 'West Adams - Baldwin Hills - Leimert-WAD', 'North Hollywood - Valley Village-NHL', 'Arleta - Pacoima-ARL', 'Northeast Los Angeles-NLA', 'Venice-VEN', 'Silver Lake - Echo Park - Elysian Valley-SLK', 'San Pedro-SPD', 'Reseda - West Van Nuys-RES', 'Sun Valley - La Tuna Canyon-SVY', 'Sunland - Tujunga - Lake View Terrace - Shadow Hills - East La Tuna Canyon-SLD', 'Westwood-WWD', 'West Los Angeles-WLA', 'Hollywood-HWD', 'Canoga Park - Winnetka - Woodland Hills - West Hills-CPK', 'Central City North-CCN', 'Chatsworth - Porter Ranch-CHT', 'Wilmington - Harbor City-WLM', 'Sylmar-SYL', 'Wilshire-WIL', 'Central City-CCY', 'Westlake-WLK', 'Port of Los Angeles-PTL', 'Northridge-NRD', 'Van Nuys - North Sherman Oaks-VNY', 'Boyle Heights-BHT', 'Sherman Oaks - Studio City - Toluca Lake - Cahuenga Pass-SHR', 'Westchester - Playa del Rey-WCH', 'Los Angeles International Airport-LAX', 'South Los Angeles-SLA', 'Southeast Los Angeles-SEL']
# Split each string in the list into two parts, before and after the dash.
city_names = [string.split('-')[0] for string in areasList]
acronyms = [string.split('-')[-1] for string in areasList]

# Create a DataFrame with the two columns.
areas = pd.DataFrame({'CITY NAME': city_names, 'ACRONYM': acronyms})
areas.set_index("ACRONYM", inplace=True)

#create the graph
g = Graph()

# Bind the namespaces to a prefix for more readable output
g.bind("foaf", FOAF)
g.bind("xsd", XSD)
g.bind("lao", LAO)


In [14]:
%%time 
#measure execution time

#iterate over the areas dataframe
for index, row in areas.iterrows():
    # Create the node to add to the Graph
    # the node has the namespace + the areaId as URI
    idU = "area" + str(index)
    Area = URIRef(LAO[idU])
    # Add triples using store's add() method.

    #TYPE
    g.add((Area, RDF.type, LAO.Area))
    
    #DATA PROPERTIES
    g.add((Area, LAO['areaAcronym'], Literal(str(index), datatype=XSD.string)))    
    g.add((Area, LAO['areaName'], Literal(row['CITY NAME'], datatype=XSD.string)))    

print("--- saving serialization ---")
with open('areas.ttl', 'w') as file:
    file.write(g.serialize(format='turtle'))

--- saving serialization ---
CPU times: total: 0 ns
Wall time: 13.6 ms


## Locations

In [15]:
# Load the CSV files in memory
activeBusinesses = pd.read_csv(activeBusinessesData, sep=',')
closedBusinesses = pd.read_csv(closedBusinessesData, sep=',')
crimeData1_df = pd.read_csv(crimeData1, sep=',')
crimeData2_df = pd.read_csv(crimeData2, sep=',')
crimeData3_df = pd.read_csv(crimeData3, sep=',')

activeBusinesses = activeBusinesses[["STREET ADDRESS", "LOCATION", "COMMUNITY PLANNING AREA"]]
closedBusinesses = closedBusinesses[["STREET ADDRESS", "LOCATION", "COMMUNITY PLANNING AREA"]]
crimeData1_df = crimeData1_df[["LOCATION", "LAT", "LON", "COMMUNITY PLANNING AREA"]]
crimeData1_df.rename(columns={"LOCATION": "STREET ADDRESS", "LAT": "LATITUDE", "LON": "LONGITUDE", "COMMUNITY PLANNING AREA": "COMMUNITY PLANNING AREA"}, inplace=True)
crimeData2_df = crimeData2_df[["LOCATION", "LAT", "LON", "COMMUNITY PLANNING AREA"]]
crimeData2_df.rename(columns={"LOCATION": "STREET ADDRESS", "LAT": "LATITUDE", "LON": "LONGITUDE", "COMMUNITY PLANNING AREA": "COMMUNITY PLANNING AREA"}, inplace=True)
crimeData3_df = crimeData3_df[["LOCATION", "LAT", "LON", "COMMUNITY PLANNING AREA"]]
crimeData3_df.rename(columns={"LOCATION": "STREET ADDRESS", "LAT": "LATITUDE", "LON": "LONGITUDE", "COMMUNITY PLANNING AREA": "COMMUNITY PLANNING AREA"}, inplace=True)


locations = pd.concat([activeBusinesses, closedBusinesses]).drop_duplicates()
locations['LOCATION'] = locations['LOCATION'].str.replace('(','') 
locations['LOCATION'] = locations['LOCATION'].str.replace(')','')
locations['LOCATION'] = locations['LOCATION'].str.replace(' ','')
locations[['LATITUDE', 'LONGITUDE']] = locations['LOCATION'].str.split(',', expand=True)
locations.drop(columns=['LOCATION'], inplace=True)
# Remove the original coordinates column
#locations.drop('LOCATION', axis=1, inplace=True)

locations = pd.concat([locations, crimeData1_df, crimeData2_df, crimeData3_df]).drop_duplicates()
locations['CHIAVE'] = "lat" + locations['LATITUDE'].astype("string") + "lon" + locations['LONGITUDE'].astype("string")
locations.drop(locations[locations['CHIAVE'] == 'lat0.0lon0.0'].index, inplace = True)
locations.set_index('CHIAVE', inplace=True)
locations = locations.dropna()
#0.0,0.0

locations['COMMUNITY PLANNING AREA'] = locations['COMMUNITY PLANNING AREA'].str.split('-')
get_last_element = lambda x: x[-1]
locations['COMMUNITY PLANNING AREA'] = locations['COMMUNITY PLANNING AREA'].apply(get_last_element)




#print((locations['COMMUNITY PLANNING AREA'].str.split('-').astype("string")))



locations

Unnamed: 0_level_0,STREET ADDRESS,COMMUNITY PLANNING AREA,LATITUDE,LONGITUDE
CHIAVE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
lat33.7901lon-118.2804,1330 W PACIFIC COAST HIGHWAY SUITE #E,WLM,33.7901,-118.2804
lat33.9603lon-118.4471,327 CULVER BLVD UNIT #4,WCH,33.9603,-118.4471
lat34.1468lon-118.423,13317 VENTURA BLVD #B,SHR,34.1468,-118.423
lat32.6665lon-117.1073,319 W 18TH STREET,_NOT FOUND_,32.6665,-117.1073
lat33.8301lon-118.328,1740 CRENSHAW BLVD,_NOT FOUND_,33.8301,-118.328
...,...,...,...,...
lat33.9511lon-118.2739,94TH,SEL,33.9511,-118.2739
lat34.2687lon-118.3107,NASSAU AV,SLD,34.2687,-118.3107
lat34.069lon-118.3041,W 3RD ST,WIL,34.069,-118.3041
lat34.1536lon-118.4181,MILBANK ST,SHR,34.1536,-118.4181


In [16]:
%%time 
#measure execution time

#create the graph
g = Graph()

# Bind the namespaces to a prefix for more readable output
g.bind("foaf", FOAF)
g.bind("xsd", XSD)
g.bind("lao", LAO)

#iterate over the locations dataframe
for index, row in locations.iterrows():
    # Create the node to add to the Graph
    # the node has the namespace + the locationId as URI
    idU = index
    Location = URIRef(LAO[idU])  # Corrected: Use LAO[idU] instead of LAO[idU]
    # Add triples using the store's add() method.
    g.add((Location, RDF.type, LAO.Location))

    # DATA PROPERTIES
    g.add((Location, LAO['hasLatitude'], Literal(row['LATITUDE'], datatype=XSD.double)))
    g.add((Location, LAO['hasLongitude'], Literal(row['LONGITUDE'], datatype=XSD.double)))
    g.add((Location, LAO['hasAddress'], Literal(row['STREET ADDRESS'], datatype=XSD.string)))

    # OBJECT PROPERTIES
    if (row["COMMUNITY PLANNING AREA"] != '_NOT FOUND_'):
        g.add((Location, LAO['belongsToArea'], LAO["area" + str(row["COMMUNITY PLANNING AREA"])]))


    
# print all the data in the Turtle format
print("--- saving serialization ---")
with open('locations.ttl', 'w') as file:
    file.write(g.serialize(format='turtle'))

--- saving serialization ---
CPU times: total: 1min 34s
Wall time: 1min 55s


## Businesses

In [17]:
# Load the CSV files in memory
activeBusinesses = pd.read_csv(activeBusinessesData, sep=',', index_col='LOCATION ACCOUNT #')
closedBusinesses = pd.read_csv(closedBusinessesData, sep=',', index_col='LOCATION ACCOUNT #')
closedBusinesses['LOCATION START DATE'] = pd.to_datetime(closedBusinesses['LOCATION START DATE'], format='%m/%d/%Y').dt.strftime('%Y-%m-%d')
closedBusinesses['LOCATION END DATE'] = pd.to_datetime(closedBusinesses['LOCATION END DATE'], format='%m/%d/%Y').dt.strftime('%Y-%m-%d')

businesses = pd.concat([activeBusinesses, closedBusinesses]).drop_duplicates()

#create the graph
g = Graph()

# Bind the namespaces to a prefix for more readable output
g.bind("foaf", FOAF)
g.bind("xsd", XSD)
g.bind("lao", LAO)

businesses


Unnamed: 0_level_0,Unnamed: 0,BUSINESS NAME,DBA NAME,STREET ADDRESS,CITY,ZIP CODE,LOCATION DESCRIPTION,MAILING ADDRESS,MAILING CITY,MAILING ZIP CODE,NAICS,PRIMARY NAICS DESCRIPTION,COUNCIL DISTRICT,LOCATION START DATE,LOCATION END DATE,LOCATION,COMMUNITY PLANNING AREA
LOCATION ACCOUNT #,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0003176540-0001-1,0,SOEUNG CHAING,CAFECAFE,1330 W PACIFIC COAST HIGHWAY SUITE #E,WILMINGTON,90744-2467,1330 PACIFIC COAST 90744-2467,,,,722211.0,Limited-service eating places,15,2020-01-01,,"(33.7901, -118.2804)",Wilmington - Harbor City-WLM
0002973746-0002-1,1,BELINDA MIXON-JOY,BOOM'S ROOM,327 CULVER BLVD UNIT #4,PLAYA DEL REY,90293-7770,327 CULVER 90293-7770,,,,448150.0,Clothing accessories stores,11,2021-05-12,,"(33.9603, -118.4471)",Westchester - Playa del Rey-WCH
0003318526-0001-9,2,MARTIN HERNANDEZ,E H PAINTING,1421 PICO STREET,SAN FERNANDO,91340-3506,1421 PICO 91340-3506,,,,233210.0,Single Family Housing Construction (1997 NAICS),0,2022-06-01,,,_INVALID_
0003314268-0001-9,3,AZNIV SIMONYAN,,13317 VENTURA BLVD #B,SHERMAN OAKS,91423-6210,13317 Ventura 91423-6210,,,,812112.0,Beauty salons,4,2022-05-09,,"(34.1468, -118.423)",Sherman Oaks - Studio City - Toluca Lake - Cah...
0003260354-0001-2,4,SOCAL DEMOLITION COMPANY,,319 W 18TH STREET,NATIONAL CITY,91950-5525,319 18TH 91950,319 W 18TH STREET,NATIONAL CITY,91950-5525,,,0,2021-04-01,,"(32.6665, -117.1073)",_NOT FOUND_
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0003230029-0001-1,143047,AMETHYST,,11469 MOORPARK STREET APT #1,NORTH HOLLYWOOD,91602-2045,11469 MOORPARK 91602,10822 OTSEGO STREET APT #102,NORTH HOLLYWOOD,91601-3957,,,2,2020-05-04,2023-12-31,"(34.1507, -118.3816)",Sherman Oaks - Studio City - Toluca Lake - Cah...
0000876436-0003-8,143048,YOJANA N ZAPATA,HAPPY DOG GROOMING # 2,7621 WOODLAKE AVENUE,WEST HILLS,91304-5327,7621 WOODLAKE 91304-5327,16201 VICTORY BLVD,VAN NUYS,91406-5821,812910.0,Pet care (except veterinary) services,12,2017-06-15,2023-12-31,"(34.2088, -118.6323)",Canoga Park - Winnetka - Woodland Hills - West...
0002632808-0001-6,143049,SEAN REILLY,,3568 KEYSTONE AVENUE #7,LOS ANGELES,90034-5569,3568 KEYSTONE 90034-5569,1380 W CAPITOL DRIVE UNIT #115,SAN PEDRO,90732-5082,541600.0,"Management, scientific, & technical consulting...",5,2011-08-31,2023-12-31,"(34.0242, -118.4098)",Palms - Mar Vista - Del Rey-PLM
0003052169-0001-0,143050,JENNIFER R. UTULO,,20649 SKOURAS DRIVE,WINNETKA,91306-4044,20649 SKOURAS 91306-4044,POST OFFICE BOX #5444,WEST HILLS,91308-5444,812990.0,All other personal services,3,2017-02-25,2023-12-31,"(34.1928, -118.5836)",Canoga Park - Winnetka - Woodland Hills - West...


In [18]:
%%time 
#measure execution time

businesses["FILTERED ZIP"] = businesses["ZIP CODE"].str.split("-", expand=True).get(0)
businesses["NAICS"] = businesses["NAICS"].astype("Int64")

#iterate over the businesses dataframe
for index, row in businesses.iterrows():
    # Create the node to add to the Graph
    # the node has the namespace + the businessId as URI
    idU = str(index)
    Business = URIRef(LAO[idU])
    # Add triples using store's add() method.

    #TYPE
    g.add((Business, RDF.type, LAO.Business))
    
    #DATA PROPERTIES
    g.add((Business, LAO['businessId'], Literal(str(index), datatype=XSD.string)))    
    g.add((Business, LAO['businessName'], Literal(row['BUSINESS NAME'], datatype=XSD.string)))
    if (pd.isna(row["DBA NAME"]) == False):    
        g.add((Business, LAO['doingBusinessName'], Literal(row['DBA NAME'], datatype=XSD.string)))

    #OBJECT PROPERTIES
    if (pd.isna(row["LOCATION START DATE"]) == False):
        g.add((Business, LAO['openedOnDate'], LAO["day" + str(row['LOCATION START DATE'])]))
    if (pd.isna(row["LOCATION END DATE"]) == False):
        g.add((Business, LAO['closedOnDate'], LAO["day" + str(row['LOCATION END DATE'])]))
    if (row["FILTERED ZIP"] != ''):
        g.add((Business, LAO['locatedInCity'], LAO["city" + str(urllib.parse.quote(row["FILTERED ZIP"]))]))
    if (pd.isna(row["NAICS"]) == False):
        g.add((Business, LAO['hasNaics'], LAO["naics" + str(row['NAICS'])]))
    if (pd.isna(row["LOCATION"]) == False):
        row['LOCATION'] = str(row['LOCATION']).replace('(','') 
        row['LOCATION'] = row['LOCATION'].replace(')','')
        row['LOCATION'] = row['LOCATION'].replace(' ','')
        lat = row['LOCATION'].split(',')[0]
        lon = row['LOCATION'].split(',')[1]
        coordsURI = 'lat' + lat + 'lon' + lon
        g.add((Business, LAO['locatedIn'], LAO[str(coordsURI)]))
        
         
print("--- saving serialization ---")
with open('businesses.ttl', 'w') as file:
    file.write(g.serialize(format='turtle'))

--- saving serialization ---
CPU times: total: 2min 10s
Wall time: 2min 32s
