In [1]:
%pip install neo4j-driver

##### Dataset
The original dataset can be found in "https://github.com/krlawrence/graph"

In [2]:
#import libraries
from neo4j import GraphDatabase, basic_auth
import pandas as pd

In [3]:
#define sandbox credentials
uri= "bolt://xx.xx.xx.xx:7687"
pwd = "closures-lifts-payroll"

In [4]:
#establish connection to the Graph Database
driver = GraphDatabase.driver(uri, auth=basic_auth("neo4j", pwd))
driver

<neo4j._sync.driver.BoltDriver at 0x1c10f3f4390>

In [5]:
# Function to execute the query and return the results
def execute_query(driver, query):
    with driver.session() as session:
        result = session.run(query)
        # Extract the results as a list of dictionaries
        data = [record["label"] for record in result]
    return data

# Execute the query
node_labels = execute_query(driver, query = 'CALL db.labels()')

# Print the results
print("Node Labels:", node_labels)

Node Labels: ['Airport', 'City', 'Region', 'Country', 'Continent']


In [6]:
# Function to execute the query and return the results
def execute_query(driver, query):
    with driver.session() as session:
        result = session.run(query)
        data = [record["relationshipType"] for record in result]
    return data

relationship_types = execute_query(driver, query = 'CALL db.relationshipTypes()')
# Print the results
print("Relationship Types:", relationship_types)

Relationship Types: ['IN_CITY', 'IN_COUNTRY', 'IN_REGION', 'ON_CONTINENT', 'HAS_ROUTE']


In [7]:
# Initialize an empty list to store the results
data = []

In [8]:
#Query the number of nodes
query = "MATCH (n) RETURN COUNT(n)"

In [9]:
with driver.session() as session:
    result = session.run(query)
    print(result.single())

<Record COUNT(n)=8627>


In [10]:
with driver.session() as session:
    result = session.run(query)
    print(result.single()["COUNT(n)"])

8627


In [11]:
with driver.session() as session:
    result = session.run(query)
    count = result.single()["COUNT(n)"]
    data.append({"Entity": "Nodes", "Count": count})
    print(f'Number of Nodes: {count}')

Number of Nodes: 8627


In [12]:
# Query number of Country nodes
query = "MATCH (n:Country) RETURN COUNT(n)"

In [13]:
with driver.session() as session:
    result = session.run(query)
    # Get the single result record
    print(result.single())

<Record COUNT(n)=232>


In [14]:
with driver.session() as session:
    result = session.run(query)
    # Get the single result record
    print(result.single()["COUNT(n)"])

232


In [15]:
with driver.session() as session:
    result = session.run(query)
    count = result.single()["COUNT(n)"]
    data.append({"Entity": "Countries", "Count": count})
    print(f'Number of countries: {count}')

Number of countries: 232


In [16]:
# Query number of Airports
query = "MATCH (n:Airport) RETURN COUNT(n)"

In [17]:
with driver.session() as session:
    result = session.run(query)
    count = result.single()["COUNT(n)"]
    data.append({"Entity": "Airports", "Count": count})
    print(f'Number of Airports: {count}')

Number of Airports: 3503


In [18]:
# Query number of Cities
query = "MATCH (n:City) RETURN COUNT(n)"

In [19]:
with driver.session() as session:
    result = session.run(query)
    count = result.single()["COUNT(n)"]
    data.append({"Entity": "Cities", "Count": count})
    print(f'Number of Cities: {count}')

Number of Cities: 3359


In [20]:
# Query number of Regions
query = "MATCH (n:Region) RETURN COUNT(n)"

In [21]:
with driver.session() as session:
    result = session.run(query)
    count = result.single()["COUNT(n)"]
    data.append({"Entity": "Regions", "Count": count})
    print(f'Number of Regions: {count}')

Number of Regions: 1527


In [22]:
# Query number of Continents
query = "MATCH (n:Continent) RETURN COUNT(n)"

In [23]:
with driver.session() as session:
    result = session.run(query)
    count = result.single()["COUNT(n)"]
    data.append({"Entity": "Continents", "Count": count})
    print(f'Number of Continents: {count}')

Number of Continents: 6


In [24]:
# Convert the list of results into a pandas DataFrame
df = pd.DataFrame(data)

# Print the DataFrame
print(df)

       Entity  Count
0       Nodes   8627
1   Countries    232
2    Airports   3503
3      Cities   3359
4     Regions   1527
5  Continents      6


Distribution of the number of airports per continent 

In [25]:
# Define the Cypher query
query = """
MATCH (:Airport)-[:ON_CONTINENT]->(c:Continent)
RETURN c.name AS continentName, count(*) AS numAirports
ORDER BY numAirports DESC
"""

# Function to execute the query and return the results
def execute_query(driver, query):
    with driver.session() as session:
        result = session.run(query)
        # Extract the results as a list of dictionaries
        data = [{"continentName": record["continentName"], "numAirports": record["numAirports"]} for record in result]
    return data

# Execute the query
results = execute_query(driver, query)
results

[{'continentName': 'NA', 'numAirports': 989},
 {'continentName': 'AS', 'numAirports': 971},
 {'continentName': 'EU', 'numAirports': 605},
 {'continentName': 'AF', 'numAirports': 321},
 {'continentName': 'SA', 'numAirports': 313},
 {'continentName': 'OC', 'numAirports': 304}]

In [26]:
# Convert the results to a DataFrame
airports_df = pd.DataFrame(results)

# Print the DataFrame
print(airports_df)

  continentName  numAirports
0            NA          989
1            AS          971
2            EU          605
3            AF          321
4            SA          313
5            OC          304


Calculate the minimum, maximum, average, and standard deviation of the number of flights out of each airport.

In [27]:
# Define the Cypher query
query = """
MATCH (a:Airport)-[:HAS_ROUTE]->(:Airport)
WITH a, count(*) AS numberOfRoutes
RETURN 
    min(numberOfRoutes) AS minRoutes, 
    max(numberOfRoutes) AS maxRoutes, 
    avg(numberOfRoutes) AS avgRoutes, 
    stdev(numberOfRoutes) AS stdDevRoutes
"""

# Function to execute the query and return the results
def execute_query(driver, query):
    with driver.session() as session:
        result = session.run(query)
        # Extract the results as a list of dictionaries
        data = [record.data() for record in result]
    return data

# Execute the query
results = execute_query(driver, query)

In [28]:
# Convert the results to a Pandas DataFrame
df = pd.DataFrame(results)

# Display the DataFrame
print(df)

   minRoutes  maxRoutes  avgRoutes  stdDevRoutes
0          1        307  20.905363     38.287309


Create Node Graph Projection

In [29]:
query = """
CALL gds.graph.project(
    'airportGraph',
    'Airport',
    {
        HAS_ROUTE: {
            type: 'HAS_ROUTE',
            orientation: 'UNDIRECTED'
        }
    }
)
YIELD
    graphName,
    nodeProjection,
    nodeCount,
    relationshipProjection,
    relationshipCount,
    projectMillis
"""


In [30]:
def create_graph_projection(driver, query):
    with driver.session() as session:
        result = session.run(query)
        return [record.data() for record in result]

# Execute the query
projection_results = create_graph_projection(driver, query)

# Convert the results to a DataFrame
projection_df = pd.DataFrame(projection_results)

# Display the DataFrame
print(projection_df)


      graphName                                     nodeProjection  nodeCount  \
0  airportGraph  {'Airport': {'label': 'Airport', 'properties':...       3503   

                              relationshipProjection  relationshipCount  \
0  {'HAS_ROUTE': {'aggregation': 'DEFAULT', 'orie...              92778   

   projectMillis  
0             21  


In [31]:
projection_df

Unnamed: 0,graphName,nodeProjection,nodeCount,relationshipProjection,relationshipCount,projectMillis
0,airportGraph,"{'Airport': {'label': 'Airport', 'properties':...",3503,"{'HAS_ROUTE': {'aggregation': 'DEFAULT', 'orie...",92778,21


Create Node Graph Projection

In [32]:
# Define the Cypher query for graph projection
query = """
CALL gds.graph.project(
    'routes',
    'Airport',
    'HAS_ROUTE'
)
YIELD
    graphName, nodeProjection, nodeCount, relationshipProjection, relationshipCount
"""

# query = """
# CALL gds.graph.project(
#     'routes-airport',
#     {
#         Airport: {
#             label: 'Airport',
#             properties: ['iata', 'city']
#         }
#     },
#     'HAS_ROUTE'
# )
# YIELD graphName, nodeCount, relationshipCount;
# """


# Function to execute the query and return the results
def create_graph_projection(driver, query):
    with driver.session() as session:
        result = session.run(query)
        return [record.data() for record in result]

# Execute the query
projection_results = create_graph_projection(driver, query)

In [33]:
# Convert the results to a DataFrame
projection_df = pd.DataFrame(projection_results)

# Adjust Pandas display
pd.set_option('display.max_columns', None)  s
pd.set_option('display.width', 500)        

# Display the DataFrame
print(projection_df)

  graphName                                     nodeProjection  nodeCount                             relationshipProjection  relationshipCount
0    routes  {'Airport': {'label': 'Airport', 'properties':...       3503  {'HAS_ROUTE': {'aggregation': 'DEFAULT', 'orie...              46389


In [34]:
node_projection = projection_df['nodeProjection'].iloc[0]
node_projection

{'Airport': {'label': 'Airport', 'properties': {}}}

In [35]:
relationship_projection = projection_df['relationshipProjection'].iloc[0]
relationship_projection

{'HAS_ROUTE': {'aggregation': 'DEFAULT',
  'orientation': 'NATURAL',
  'indexInverse': False,
  'properties': {},
  'type': 'HAS_ROUTE'}}

List All Graphs

In [36]:
# Define the Cypher query for listing GDS graphs
query = """
CALL gds.graph.list()
"""

# Function to execute the query and return the results
def list_gds_graphs(driver, query):
    with driver.session() as session:
        result = session.run(query)
        return [record.data() for record in result]

# Execute the query
graph_list_results = list_gds_graphs(driver, query)



In [37]:
# Convert the results to a DataFrame
graph_list_df = pd.DataFrame(graph_list_results)

pd.set_option('display.max_columns', None)  
pd.set_option('display.width', 1000)        
pd.set_option('display.max_rows', None)      
pd.set_option('display.max_colwidth', None)  

# Display the DataFrame
print(graph_list_df)

                                                                                                        degreeDistribution     graphName database databaseLocation memoryUsage  sizeInBytes  nodeCount  relationshipCount                                                                                                                                                                                                                                                                                                                                                                                                                                                                                configuration   density                         creationTime                     modificationTime                                                                                 schema                                                                                                             schemaWithOrientation
0   

# Graph Algorithms

*PageRank Algorithm*

In [38]:
# Define the Cypher query for PageRank
query = """
CALL gds.pageRank.stream('routes')
YIELD nodeId, score
WITH gds.util.asNode(nodeId) AS n, score AS pageRank
RETURN n.iata AS iata, n.descr AS description, pageRank
ORDER BY pageRank DESC, iata ASC
"""

# Function to execute the query and return the results
def execute_pagerank_query(driver, query):
    with driver.session() as session:
        result = session.run(query)
        return [record.data() for record in result]

# Execute the query
pagerank_results = execute_pagerank_query(driver, query)

# Convert the results to a DataFrame
pagerank_df = pd.DataFrame(pagerank_results)


# Display the DataFrame
print(pagerank_df)

     iata                                                                    description   pageRank
0     DFW                                        Dallas/Fort Worth International Airport  11.979783
1     ORD                                           Chicago O'Hare International Airport  11.162988
2     DEN                                                   Denver International Airport  10.997299
3     ATL                             Hartsfield - Jackson Atlanta International Airport  10.389948
4     IST                                                 Istanbul International Airport   8.425801
5     CDG                                                        Paris Charles de Gaulle   8.401469
6     IAH                                                   George Bush Intercontinental   8.341141
7     FRA                                                              Frankfurt am Main   8.203205
8     LAX                                              Los Angeles International Airport   8.193558


In [40]:
# Define the Cypher query to write Page Rank Algorithm Results to the Database
query = """
CALL gds.pageRank.write('routes',
    {
        writeProperty: 'pageRank'
    }
)
YIELD nodePropertiesWritten, ranIterations
"""

# Function to execute the query and return the results
def execute_query(driver, query):
    with driver.session() as session:
        result = session.run(query)
        return [record.data() for record in result]

# Execute the query
results = execute_query(driver, query)

# Convert the results to a DataFrame
df = pd.DataFrame(results)

pd.set_option('display.max_columns', None)   
pd.set_option('display.width', 1000)      
pd.set_option('display.max_rows', None)    
pd.set_option('display.max_colwidth', None) 

# Display the DataFrame
print(df)

   nodePropertiesWritten  ranIterations
0                   3503             20


In [41]:
# Define the Cypher query after applying Page Rank Algorithm to the database
query = """
MATCH (a:Airport)
RETURN a.iata AS iata, a.descr AS description, a.pageRank AS pageRank
ORDER BY a.pageRank DESC, a.iata ASC
"""

# Function to execute the query and return the results
def execute_query(driver, query):
    with driver.session() as session:
        result = session.run(query)
        return [record.data() for record in result]

# Execute the query
results = execute_query(driver, query)

# Convert the results to a DataFrame
df = pd.DataFrame(results)

pd.set_option('display.max_columns', None)   
pd.set_option('display.width', 1000)      
pd.set_option('display.max_rows', None)    
pd.set_option('display.max_colwidth', None) 

# Display the DataFrame
print(df)

     iata                                                                    description   pageRank
0     DFW                                        Dallas/Fort Worth International Airport  11.979783
1     ORD                                           Chicago O'Hare International Airport  11.162988
2     DEN                                                   Denver International Airport  10.997299
3     ATL                             Hartsfield - Jackson Atlanta International Airport  10.389948
4     IST                                                 Istanbul International Airport   8.425801
5     CDG                                                        Paris Charles de Gaulle   8.401469
6     IAH                                                   George Bush Intercontinental   8.341141
7     FRA                                                              Frankfurt am Main   8.203205
8     LAX                                              Los Angeles International Airport   8.193558


*Community (cluster) detection via Louvain Modularity Algorithm*

In [43]:
# Define the Cypher query to Extract Louvain Algorithm Results 
def execute_query(driver, query):
    with driver.session() as session:
        result = session.run(query)
        return [record.data() for record in result]

query = """
CALL gds.louvain.stream('routes')
YIELD nodeId, communityId
WITH gds.util.asNode(nodeId) AS airport, communityId
RETURN
    communityId,
    COUNT(airport) AS numberOfAirports,
    COLLECT(DISTINCT airport.city) AS cities,
    COLLECT(DISTINCT airport.iata) AS iataCodes
ORDER BY numberOfAirports DESC, communityId;
"""

try:
    results = execute_query(driver, query)
    print("Louvain Algorithm Results:\n", results)
except Exception as e:
    print(f"Error running Louvain Modularity Algorithm: {e}")

# Convert the results to a DataFrame
df = pd.DataFrame(results)

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

# Display the DataFrame
df

Louvain Algorithm Results:
 [{'communityId': 3321, 'numberOfAirports': 697, 'cities': ['Atlanta', 'Anchorage', 'Austin', 'Nashville', 'Boston', 'Baltimore', 'Washington D.C.', 'Dallas', 'Fort Lauderdale', 'Houston', 'New York', 'Los Angeles', 'Orlando', 'Miami', 'Minneapolis', 'Chicago', 'West Palm Beach', 'Phoenix', 'Raleigh', 'Seattle', 'San Francisco', 'San Jose', 'Tampa', 'San Diego', 'Long Beach', 'Santa Ana', 'Salt Lake City', 'Las Vegas', 'Denver', 'White Plains', 'San Antonio', 'New Orleans', 'Newark', 'Cedar Rapids', 'Honolulu', 'El Paso', 'San Juan', 'Cleveland', 'Oakland', 'Tucson', 'Santa Fe', 'Philadelphia', 'Detroit', 'Toronto', 'Vancouver', 'Ottawa', 'Fort Myers', 'Montreal', 'Edmonton', 'Calgary', "St. John's", 'Mexico City', 'Kingston', 'Tallahassee', 'Pittsburgh', 'Portland', 'Oaklahoma City', 'Ontario', 'Rochester', 'Halifax', 'Winnipeg', 'Charlotte', 'Cancun', 'Palm Springs', 'Memphis', 'Cincinnati', 'Indianapolis', 'Kansas City', 'St Louis', 'Albuquerque', 'Milwauk

Unnamed: 0,communityId,numberOfAirports,cities,iataCodes
0,3321,697,"[Atlanta, Anchorage, Austin, Nashville, Boston, Baltimore, Washington D.C., Dallas, Fort Lauderdale, Houston, New York, Los Angeles, Orlando, Miami, Minneapolis, Chicago, West Palm Beach, Phoenix, Raleigh, Seattle, San Francisco, San Jose, Tampa, San Diego, Long Beach, Santa Ana, Salt Lake City, Las Vegas, Denver, White Plains, San Antonio, New Orleans, Newark, Cedar Rapids, Honolulu, El Paso, San Juan, Cleveland, Oakland, Tucson, Santa Fe, Philadelphia, Detroit, Toronto, Vancouver, Ottawa, Fort Myers, Montreal, Edmonton, Calgary, St. John's, Mexico City, Kingston, Tallahassee, Pittsburgh, Portland, Oaklahoma City, Ontario, Rochester, Halifax, Winnipeg, Charlotte, Cancun, Palm Springs, Memphis, Cincinnati, Indianapolis, Kansas City, St Louis, Albuquerque, Milwaukee, Harrison, Salina, Omaha, Tulsa, Puerto Vallarta, Kahului, Nassau, Freeport, George Town, Key West, Bridgetown, St. George, Charlotte Amalie, Hamilton, Scarborough, Port of Spain, Montego Bay, Little Rock, Kralendijk, Oranjestad, Norfolk, Jacksonville, Providence, Punta Cana, Harrisburg, Sacramento, Roatan Island, Tegucigalpa, Colorado Springs, ...]","[ATL, ANC, AUS, BNA, BOS, BWI, DCA, DFW, FLL, IAD, IAH, JFK, LAX, LGA, MCO, MIA, MSP, ORD, PBI, PHX, RDU, SEA, SFO, SJC, TPA, SAN, LGB, SNA, SLC, LAS, DEN, HPN, SAT, MSY, EWR, CID, HNL, HOU, ELP, SJU, CLE, OAK, TUS, SAF, PHL, DTW, YYZ, YVR, YOW, RSW, YUL, YEG, YYC, YYT, MEX, KIN, TLH, PIT, PWM, PDX, OKC, ONT, ROC, RST, YHZ, YWG, CLT, CUN, PSP, MEM, CVG, IND, MCI, DAL, STL, ABQ, MKE, MDW, HRO, SLN, OMA, TUL, PVR, OGG, NAS, FPO, GGT, EYW, BGI, ANU, STT, BDA, TAB, POS, MBJ, LIT, BON, AUA, ORF, JAX, ...]"
1,3307,514,"[London, Paris, Frankfurt, Helsinki, Dublin, Rome, Amsterdam, Prague, Barcelona, Madrid, Vienna, Zurich, Geneva, Brussels, Munich, Manchester, Cologne, Gothenburg, Venice, Shannon, Oslo, Stockholm, Nottingham, Edinburgh, Glasgow, Liverpool, Nice, Milan, Athens, Zagreb, Budapest, Alicante, Bilbao, Ibiza, Menorca, Tenerife, Larnaca, Warsaw, Luqa, Sofia, Belgrade, Tel Aviv, Hamburg, Stuttgart, Genoa, Naples, Pisa, Turin, Bologna, Verona, Nantes, Copenhagen, Luxembourg, Dusseldorf, Lisbon, Gibraltar, Tunis, Reykjavik, Gran Canaria, Southampton, Palma De Mallorca, Riga, Malaga, Funchal, Leeds, Aberdeen, Antalya, Saint Helier, Zakynthos, Rhodes, Bristol, Newcastle, Saint Peter Port, Eindhoven, Sevilla, Basle, Dubrovnik, Stavanger, Bergen, Tallinn, Cork, Wroclaw, Split, Belfast, Hannover, Lyon, Marseille, Bucharest, Rotterdam, Tangier, Faro, Mykonos Island, Santorini Island, Kiev, Rijeka, Toulouse/Blagnac, Porto, Culleredo, Innsbruck, Birmingham, ...]","[LHR, LGW, CDG, FRA, HEL, DUB, FCO, AMS, PRG, BCN, MAD, VIE, ZRH, GVA, BRU, MUC, MAN, CGN, LCY, GOT, VCE, SNN, OSL, ARN, STN, EMA, EDI, GLA, LPL, ORY, NCE, MXP, ATH, ZAG, BUD, ALC, BIO, IBZ, MAH, TFN, LCA, WAW, MLA, SOF, BEG, TLV, HAM, STR, GOA, NAP, PSA, TRN, BLQ, TSF, VRN, NTE, CPH, LUX, DUS, LIS, GIB, TUN, LTN, KEF, LPA, SOU, PMI, RIX, AGP, FNC, LBA, ABZ, AYT, JER, ZTH, RHO, BRS, NCL, GCI, EIN, SVQ, BSL, DBV, SVG, BGO, TLL, ORK, WRO, SPU, BHD, HAJ, LIN, LYS, MRS, OTP, RTM, TNG, FAO, JMK, JTR, ...]"
2,2281,403,"[Dubai, New Delhi, Mumbai, Doha, Calicut, Hyderabad, Chennai, Kolkata, Bengaluru, Cape Town, Johannesburg, Durban, Nairobi, Mombasa, Cairo, Addis Ababa, Kuwait, Istanbul, Manama, Abu Dhabi, Colombo, Jeddah, Muscat, Lagos, Harare, Luxor, Riyadh, Islamabad, Amman, Algiers, Karachi, Lahore, Jaipur, Accra, Kampala, Casablanca, Abuja, Beirut, Freetown, Luanda, Tripoli, Dhaka, Sylhet, Tehran, Port Louis, Mahe Island, Hassi Messaoud, Malé, Kigali, Arusha, Ad Dammam, Arbil, Dakar, Agra, Khajuraho, Varanasi, Mangalore, Ahmedabad, Jodhpur, Pune, Sharjah, Aden, Coimbatore, Cochin, Trivandrum, Tiruchirappally, Sana'a, Diu, Porbandar, Windhoek, Ankara, Lusaka, Hargeisa, Berbera, Djibouti City, Alexandria, Port Sudan, Juba, Khartoum, Dar es Salaam, Gheshm, Kabul, Douala, Constantine, Oran, Cotonou, Ouagadougou, Abidjan, Niamey, Sfax, Lomé, Brazzaville, Pointe Noire, Bangui, Yaoundé, St Denis, Antananarivo, Libreville, N'Djamena, Kinshasa, ...]","[DXB, DEL, BOM, DOH, CCJ, HYD, MAA, CCU, BLR, CPT, JNB, DUR, NBO, MBA, CAI, ADD, KWI, IST, BAH, AUH, CMB, JED, MCT, LOS, HRE, LXR, RUH, ISB, AMM, ALG, KHI, LHE, JAI, ACC, EBB, CMN, ABV, BEY, FNA, LAD, TIP, DAC, ZYL, IKA, MRU, SEZ, HME, SAW, MLE, KGL, JRO, DMM, EBL, DKR, AGR, HJR, VNS, IXE, AMD, JDH, PNQ, SHJ, ADE, CJB, COK, TRV, TRZ, SAH, DIU, PBD, WDH, ESB, LUN, HGA, BBO, JIB, HBE, PZU, JUB, KRT, DAR, GSM, KBL, DLA, CZL, ORN, COO, OUA, ABJ, NIM, SFA, LFW, BZV, PNR, BGF, NSI, RUN, TNR, LBV, NDJ, ...]"
3,2838,252,"[Tokyo, Singapore, Hong Kong, Beijing, Shanghai, Kuala Lumpur, Manila, Bangkok, Osaka, Seoul, Phnom Penh, Ho Chi Minh City, Hagta, Taipei, Kathmandu, Fukuoka, Sapporo, Denpasar, Jakarta, Guangzhou, Phuket, Nanjing, Chengdu, Hanoi, Kaohsiung City, Tokoname, Xiamen, Hangzhou, Vladivostok, Shenzhen, Hiroshima, Dili, Tianjin, Changsha, Penang, Wuhan, Haikou, Kunming, Fuzhou, Ningbo, Qingdao, Chongqing, Guiyang, Nanning, Kagoshima, Oita, Kanazawa, Yonago, Matsuyama, Takamatsu, Niigata, Sendai, Jeju City, Busan, Naha, Saipan Island, Babelthuap Island, Yangon, Chiang Mai, Krabi, Na Thon (Ko Samui Island), Hat Yai, Da Nang, Bandar Seri Begawan, Lapu-Lapu City, Ulaanbaatar, Taipa, Kotamadya Balikpapan, Kota Kinabalu, Angeles City, Kota Baharu, Iloilo City, Siem Reap, Changchun, Dalian, Shenyang, Kuching, Miri, Kuantan, Ipoh, Langkawi, Kuala Terengganu, Kuala Lumpur Subang, Davao City, Vientiane, Surabaya, Bandung-Java Island, Mataram, Kalibo, Pekanbaru-Sumatra Island, Palembang-Sumatra Island, Sukarata(Solo)-Java Island, Semarang-Java Island, Ujung Pandang-Celebes Island, Yogyakarta-Java Island, Taipei City, Shirahama, Kobe, Obihiro, Hakodate, ...]","[NRT, SIN, HKG, PEK, PVG, KUL, MNL, BKK, ITM, HND, ICN, PNH, SGN, GUM, TPE, KTM, FUK, KIX, CTS, DPS, CGK, CAN, HKT, NKG, CTU, HAN, KHH, NGO, XMN, HGH, VVO, SZX, HIJ, DIL, TSN, CSX, PEN, WUH, HAK, KMG, FOC, NGB, TAO, CKG, KWE, NNG, KOJ, OIT, KMQ, YGJ, MYJ, TAK, KIJ, SDJ, CJU, PUS, OKA, SPN, ROR, RGN, DMK, CNX, KBV, USM, HDY, DAD, BWN, CEB, ULN, MFM, BPN, BKI, CRK, KBR, ILO, REP, CGQ, DLC, SHE, KCH, MYY, KUA, IPH, LGK, TGG, SZB, DVO, VTE, SUB, BDO, LOP, KLO, PKU, PLM, SOC, SRG, UPG, JOG, TSA, SHM, ...]"
4,3260,186,"[Sydney, Melbourne, Perth, Auckland, Wellington, Brisbane, Christchurch, Canberra, Ayers Rock, Alice Springs, Gold Coast, Cairns, Maroochydore, Adelaide, Darwin, Nadi, Port Moresby, Papeete, Apia, Majuro Atoll, Banana, Tarawa, Yaren District, Nouméa, Avarua, Nausori, Nuku'alofa, Port Vila, Queenstown, Armidale, Broken Hill, Hamilton Island, Mackay, Ballina, Proserpine, Broome, Bathurst, Townsville, Gladstone, Griffith, Hervey Bay, Lord Howe Island, Lismore, Albury, Merimbula, Hobart, Mildura, Launceston, Moree, Moruya, Narrandera, Orange, Karratha, Parkes, Port Macquarie, Coffs Harbour, Dubbo, Burnt Pine, Tamworth, Wagga Wagga, Taree, Williamtown, Devonport, Currie, Mount Gambier, Kalgoorlie, Port Hedland, Burnie, Taupo, Dunedin, Gisborne, Hamilton, Kerikeri, Kaitaia, New Plymouth, Napier, Nelson, Palmerston North, Paraparaumu, Rotorua, Tauranga, Blenheim, Whakatane, Whangarei, Wanganui, Albany, Busselton, Derby, Esperance, Geraldton, Ravensthorpe, Newman, Paraburdoo, Kununurra, Exmouth, Christmas Island, Honiara, Luganville, Hokitika, Invercargill, ...]","[SYD, MEL, PER, AKL, WLG, BNE, CHC, CBR, AYQ, ASP, OOL, CNS, MCY, ADL, DRW, NAN, POM, PPT, APW, MAJ, CXI, TRW, INU, NOU, RAR, SUV, TBU, VLI, ZQN, ARM, BHQ, HTI, MKY, BNK, PPP, BME, BHS, TSV, GLT, GFF, HVB, LDH, LSY, AVV, ABX, MIM, HBA, MQL, LST, MRZ, MYA, NRA, OAG, KTA, PKE, PQQ, CFS, DBO, NLK, TMW, WGA, TRO, NTL, DPO, KNS, MGB, KGI, PHE, BWT, TUO, DUD, GIS, HLZ, KKE, KAT, NPL, NPE, NSN, PMR, PPQ, ROT, TRG, BHE, WHK, WRE, WAG, ALH, BQB, DCN, EPR, GET, RVT, ZNE, PBO, KNX, LEA, XCH, HIR, SON, HKK, ...]"
5,2461,118,"[Montevideo, Rio de Janeiro, Sao Paulo, Buenos Aires, Lima, Santiago, Curacao, Caracas, Guayaquil, Bogota, Panama City, Quito, Zandery, Belém, Brasília, Belo Horizonte, Curitiba, Manaus, Recife, Salvador, Asunción, Barranquilla, Cali, Rionegro, La Paz / El Alto, Barcelona, Maracaibo, Armenia, Santa Cruz, Rosario, Cordoba, Mendoza, Puerto Iguazu, Rawson, El Calafate, Ushuahia, San Carlos de Bariloche, Aracaju, Araçatuba, Cascavel, Campo Grande, Cuiabá, Foz Do Iguaçu, Florianópolis, Fortaleza, Goiânia, Ilhéus, Ipatinga, João Pessoa, Juazeiro Do Norte, Joinville, Campinas, Londrina, Maringá, Maceió, Navegantes, Porto Alegre, Passo Fundo, Porto Seguro, Vitória Da Conquista, Rio De Janeiro, Ribeirão Preto, Natal, São Luís, São Paulo, São José Do Rio Preto, Teresina, Uberlândia, Uberaba, Vitória, Arica, Copiapo, Balmaceda, Calama, Punta Arenas, Iquique, Antofagasta, Concepcion, Osorno, La Serena-Coquimbo, Temuco, Puerto Montt, Ciudad del Este, Cochabamba, Pucallpa, Tingo Maria, Chiclayo, Ayacucho, Andahuaylas, Anta, Jauja, Juliaca, Cajamarca, Tumbes, Huánuco, Iquitos, Arequipa, Trujillo, Tarapoto, Tacna, ...]","[MVD, GIG, GRU, EZE, LIM, SCL, CUR, CCS, GYE, BOG, PTY, UIO, PBM, BEL, BSB, CNF, CWB, MAO, REC, SSA, ASU, BAQ, CLO, MDE, LPB, BLA, MAR, AXM, VVI, ROS, AEP, COR, MDZ, IGR, REL, FTE, USH, BRC, AJU, ARU, PLU, CAC, CGR, CGB, IGU, FLN, FOR, GYN, IOS, IPN, JPA, JDO, JOI, VCP, LDB, MGF, MCZ, NVT, POA, PFB, BPS, VDC, SDU, RAO, NAT, SLZ, CGH, SJP, THE, UDI, UBA, VIX, ARI, CPO, BBA, CJC, PUQ, IQQ, ANF, CCP, ZOS, LSC, ZCO, PMC, AGT, CBB, PCL, TGI, CIX, AYP, ANS, ATA, JAU, JUL, CJA, TBP, HUU, IQT, AQP, TRU, ...]"
6,3232,64,"[Yellowknife, Iqaluit, La Grande Rivière, Kuujjuarapik, Hay River, Chibougamau, Fort Smith, Rouyn-Noranda, Val-d'Or, Kuujjuaq, Whatì, Chisasibi, Kangirsuk, Cambridge Bay, Kugluktuk, Clyde River, Flin Flon, Fort Simpson, Igloolik, Gillam, Nemiscau, Gjoa Haven, Waskaganish, Aupaluk, Kimmirut, Lutselk'e, Kangiqsualujjuaq, Moosonee, Umiujaq, Inukjuak, Puvirnituq, Fort Chipewyan, The Pas, Gamètì, Rankin Inlet, Sanikiluaq, Cape Dorset, Thompson, Tasiujaq, Hall Beach, Norman Wells, Déline, Pangnirtung, Taloyoak, Churchill, Coral Harbour, Fort Albany, Chesterfield Inlet, Repulse Bay, Akulivik, Tulita, Eastmain River, Whale Cove, Qikiqtarjuaq, Quaqtaq, Wemindji, Pond Inlet, Ulukhaktok, Fort Good Hope, Wekweètì, Inuvik, Arviat, Baker Lake, Arctic Bay]","[YZF, YFB, YGL, YGW, YHY, YMT, YSM, YUY, YVO, YVP, YLE, YKU, YKG, YCB, YCO, YCY, YFO, YFS, YGT, YGX, YNS, YHK, YKQ, YPJ, YLC, YSG, XGR, YMO, YUD, YPH, YPX, YPY, YQD, YRA, YRT, YSK, YTE, YTH, YTQ, YUX, YVQ, YWJ, YXP, YYH, YYQ, YZS, YFA, YCS, YUT, AKV, ZFN, ZEM, YXN, YVM, YQC, YNC, YIO, YHI, YGH, YFJ, YEV, YEK, YBK, YAB]"
7,2688,53,"[Moscow, Ashgabat, Astana, Baku, Almaty, St. Petersburg, Tashkent, Minsk, Atyrau, Tbilisi, Khabarovsk, Yuzhno-Sakhalinsk, Novosibirsk, Dushanbe, Nizhny Novgorod, Samara, Yerevan, Yekaterinburg, Urgench, Yakutsk, Irkutsk, Ulan Ude, Krasnoyarsk, Bishkek, Krasnodar, Mineralnyye Vody, Rostov-on-Don, Volgograd, Jebel Ali, Dnipropetrovsk, Odessa, Gazipaşa, Cherepovets, Donetsk, Chelyabinsk, Perm, Voronezh, Saratov, Ufa, Dzhezkazgan, Rimini, Karlovy Vary, Osh, Aktau, Omsk, Norilsk, Sochi, Surgut, Khudzhand, Namangan, Kazan]","[DME, SVO, ASB, VKO, TSE, GYD, ALA, LED, TAS, MSQ, GUW, TBS, KHV, UUS, OVB, DYU, GOJ, KUF, EVN, SVX, UGC, YKS, IKT, UUD, KJA, FRU, KRR, MRV, ROV, VOG, DWC, DNK, ODS, GZP, CEE, DOK, CEK, PEE, VOZ, RTW, UFA, DZN, RMI, KLV, OSS, SCO, OMS, NSK, AER, SGC, LBD, NMA, KZN]"
8,2369,15,"[Fairbanks, Barrow, Deadhorse, Healy, Barter Island Lrrs, Bettles, Central, Chalkyitsik, Galena, Anaktuvuk Pass, Manley Hot Springs, Nuiqsut, Ruby, Tanana, Atqasuk]","[FAI, BRW, SCC, HKB, BTI, BTT, CEM, CIK, GAL, AKP, MLY, NUI, RBY, TAL, ATK]"
9,3240,14,"[Fort Frances, Kenora, Red Lake, Sioux Lookout, Kingfisher Lake, Cat Lake, Kasabonika, Bearskin Lake, Muskrat Dam, Sachigo Lake, Big Trout Lake, Angling Lake, Sandy Lake, Dryden]","[YAG, YQK, YRL, YXL, KIF, YAC, XKS, XBE, MSA, ZPB, YTL, YAX, ZSJ, YHD]"


*Node similarity Algorithm*

In [51]:
# Define the Cypher query to Extract Node similarity Algorithm Results 
def execute_query(driver, query):
    with driver.session() as session:
        result = session.run(query)
        return [record.data() for record in result]

query = """
CALL gds.nodeSimilarity.stream('routes')
YIELD node1, node2, similarity
WITH gds.util.asNode(node1) AS n1, gds.util.asNode(node2) AS n2, similarity
RETURN
    n1.iata AS iata,
    n1.city AS city,
    COLLECT({iata:n2.iata, city:n2.city, similarityScore: similarity}) AS similarAirports
ORDER BY city LIMIT 20
"""

try:
    results = execute_query(driver, query)
    print("Node similarity Algorithm:\n", results)
except Exception as e:
    print(f"Error running Node similarity Algorithm: {e}")

# Convert the results to a DataFrame
df = pd.DataFrame(results)

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

# Display the DataFrame
df

Node similarity Algorithm:
 [{'iata': 'AAL', 'city': 'Aalborg', 'similarAirports': [{'iata': 'KRS', 'city': 'Kjevik', 'similarityScore': 0.3333333333333333}, {'iata': 'HAU', 'city': 'Karmøy', 'similarityScore': 0.2727272727272727}, {'iata': 'SZZ', 'city': 'Goleniow', 'similarityScore': 0.2608695652173913}, {'iata': 'AAR', 'city': 'Aarhus', 'similarityScore': 0.25}, {'iata': 'TRF', 'city': 'Torp', 'similarityScore': 0.24444444444444444}, {'iata': 'BLL', 'city': 'Billund', 'similarityScore': 0.23333333333333334}, {'iata': 'AES', 'city': 'Ålesund', 'similarityScore': 0.22727272727272727}, {'iata': 'GOT', 'city': 'Gothenburg', 'similarityScore': 0.21875}, {'iata': 'SVG', 'city': 'Stavanger', 'similarityScore': 0.21568627450980393}, {'iata': 'KYA', 'city': 'Konya', 'similarityScore': 0.21052631578947367}]}, {'iata': 'AAR', 'city': 'Aarhus', 'similarAirports': [{'iata': 'AAL', 'city': 'Aalborg', 'similarityScore': 0.25}, {'iata': 'GOT', 'city': 'Gothenburg', 'similarityScore': 0.234375}, {'i

Unnamed: 0,iata,city,similarAirports
0,AAL,Aalborg,"[{'iata': 'KRS', 'city': 'Kjevik', 'similarityScore': 0.3333333333333333}, {'iata': 'HAU', 'city': 'Karmøy', 'similarityScore': 0.2727272727272727}, {'iata': 'SZZ', 'city': 'Goleniow', 'similarityScore': 0.2608695652173913}, {'iata': 'AAR', 'city': 'Aarhus', 'similarityScore': 0.25}, {'iata': 'TRF', 'city': 'Torp', 'similarityScore': 0.24444444444444444}, {'iata': 'BLL', 'city': 'Billund', 'similarityScore': 0.23333333333333334}, {'iata': 'AES', 'city': 'Ålesund', 'similarityScore': 0.22727272727272727}, {'iata': 'GOT', 'city': 'Gothenburg', 'similarityScore': 0.21875}, {'iata': 'SVG', 'city': 'Stavanger', 'similarityScore': 0.21568627450980393}, {'iata': 'KYA', 'city': 'Konya', 'similarityScore': 0.21052631578947367}]"
1,AAR,Aarhus,"[{'iata': 'AAL', 'city': 'Aalborg', 'similarityScore': 0.25}, {'iata': 'GOT', 'city': 'Gothenburg', 'similarityScore': 0.234375}, {'iata': 'TLL', 'city': 'Tallinn', 'similarityScore': 0.22916666666666666}, {'iata': 'VXO', 'city': 'Växjö', 'similarityScore': 0.21739130434782608}, {'iata': 'SVG', 'city': 'Stavanger', 'similarityScore': 0.21153846153846154}, {'iata': 'HAU', 'city': 'Karmøy', 'similarityScore': 0.20833333333333334}, {'iata': 'KRS', 'city': 'Kjevik', 'similarityScore': 0.20833333333333334}, {'iata': 'OSD', 'city': 'Östersund', 'similarityScore': 0.2}, {'iata': 'AOI', 'city': 'Ancona', 'similarityScore': 0.19230769230769232}, {'iata': 'XRY', 'city': 'Jerez de la Forntera', 'similarityScore': 0.1875}]"
2,YXX,Abbotsford,"[{'iata': 'YQU', 'city': 'Grande Prairie', 'similarityScore': 0.2857142857142857}, {'iata': 'YQR', 'city': 'Regina', 'similarityScore': 0.23529411764705882}, {'iata': 'YQQ', 'city': 'Comox', 'similarityScore': 0.2222222222222222}, {'iata': 'YHM', 'city': 'Hamilton', 'similarityScore': 0.21052631578947367}, {'iata': 'YXJ', 'city': 'Fort St.John', 'similarityScore': 0.2}, {'iata': 'YXE', 'city': 'Saskatoon', 'similarityScore': 0.2}, {'iata': 'YLW', 'city': 'Kelowna', 'similarityScore': 0.1875}, {'iata': 'YXU', 'city': 'London', 'similarityScore': 0.17647058823529413}, {'iata': 'YMM', 'city': 'Fort McMurray', 'similarityScore': 0.16666666666666666}, {'iata': 'YXS', 'city': 'Prince George', 'similarityScore': 0.14285714285714285}]"
3,ABR,Aberdeen,"[{'iata': 'HIB', 'city': 'Hibbing', 'similarityScore': 1.0}, {'iata': 'BJI', 'city': 'Bemidji', 'similarityScore': 1.0}, {'iata': 'ISN', 'city': 'Williston', 'similarityScore': 0.5}, {'iata': 'ATY', 'city': 'Watertown', 'similarityScore': 0.5}, {'iata': 'RHI', 'city': 'Rhinelander', 'similarityScore': 0.5}, {'iata': 'INL', 'city': 'International Falls', 'similarityScore': 0.5}, {'iata': 'APN', 'city': 'Alpena', 'similarityScore': 0.5}, {'iata': 'BRD', 'city': 'Brainerd', 'similarityScore': 0.5}, {'iata': 'DIK', 'city': 'Dickinson', 'similarityScore': 0.5}, {'iata': 'LSE', 'city': 'La Crosse', 'similarityScore': 0.3333333333333333}]"
4,ABZ,Aberdeen,"[{'iata': 'INV', 'city': 'Inverness', 'similarityScore': 0.3023255813953488}, {'iata': 'MJV', 'city': 'San Javier', 'similarityScore': 0.2708333333333333}, {'iata': 'JER', 'city': 'Saint Helier', 'similarityScore': 0.25862068965517243}, {'iata': 'GLA', 'city': 'Glasgow', 'similarityScore': 0.24528301886792453}, {'iata': 'NCL', 'city': 'Newcastle', 'similarityScore': 0.24050632911392406}, {'iata': 'BHD', 'city': 'Belfast', 'similarityScore': 0.23529411764705882}, {'iata': 'GOT', 'city': 'Gothenburg', 'similarityScore': 0.23170731707317074}, {'iata': 'BLL', 'city': 'Billund', 'similarityScore': 0.22784810126582278}, {'iata': 'ORK', 'city': 'Cork', 'similarityScore': 0.22727272727272727}, {'iata': 'EDI', 'city': 'Edinburgh', 'similarityScore': 0.22377622377622378}]"
5,AHB,Abha,"[{'iata': 'ELQ', 'city': 'Buraidah', 'similarityScore': 0.5}, {'iata': 'YNB', 'city': 'Yenbo', 'similarityScore': 0.4375}, {'iata': 'HAS', 'city': 'Ha'il', 'similarityScore': 0.4}, {'iata': 'TUU', 'city': 'Tabuk', 'similarityScore': 0.375}, {'iata': 'ADE', 'city': 'Aden', 'similarityScore': 0.36363636363636365}, {'iata': 'RIY', 'city': 'Riyan', 'similarityScore': 0.35294117647058826}, {'iata': 'TIF', 'city': 'Ta’if', 'similarityScore': 0.3333333333333333}, {'iata': 'ASM', 'city': 'Asmara', 'similarityScore': 0.3157894736842105}, {'iata': 'SKT', 'city': 'Sialkot', 'similarityScore': 0.3}, {'iata': 'ATZ', 'city': 'Assiut', 'similarityScore': 0.26666666666666666}]"
6,ABJ,Abidjan,"[{'iata': 'BKO', 'city': 'Senou', 'similarityScore': 0.5}, {'iata': 'OUA', 'city': 'Ouagadougou', 'similarityScore': 0.4444444444444444}, {'iata': 'DKR', 'city': 'Dakar', 'similarityScore': 0.4}, {'iata': 'LFW', 'city': 'Lomé', 'similarityScore': 0.3870967741935484}, {'iata': 'COO', 'city': 'Cotonou', 'similarityScore': 0.34375}, {'iata': 'DLA', 'city': 'Douala', 'similarityScore': 0.30303030303030304}, {'iata': 'FNA', 'city': 'Freetown', 'similarityScore': 0.27586206896551724}, {'iata': 'ACC', 'city': 'Accra', 'similarityScore': 0.2608695652173913}, {'iata': 'NKC', 'city': 'Nouakchott', 'similarityScore': 0.25925925925925924}, {'iata': 'NSI', 'city': 'Yaoundé', 'similarityScore': 0.25925925925925924}]"
7,ABI,Abilene,"[{'iata': 'ACT', 'city': 'Waco', 'similarityScore': 1.0}, {'iata': 'GGG', 'city': 'Longview', 'similarityScore': 1.0}, {'iata': 'GCK', 'city': 'Garden City', 'similarityScore': 1.0}, {'iata': 'BPT', 'city': 'Beaumont/Port Arthur', 'similarityScore': 1.0}, {'iata': 'SPS', 'city': 'Wichita Falls', 'similarityScore': 1.0}, {'iata': 'SJT', 'city': 'San Angelo', 'similarityScore': 1.0}, {'iata': 'LAW', 'city': 'Lawton', 'similarityScore': 1.0}, {'iata': 'JLN', 'city': 'Joplin', 'similarityScore': 1.0}, {'iata': 'TXK', 'city': 'Texarkana', 'similarityScore': 1.0}, {'iata': 'LCH', 'city': 'Lake Charles', 'similarityScore': 0.5}]"
8,AZI,Abu Dhabi,"[{'iata': 'XSB', 'city': 'Sir Bani Yas Island', 'similarityScore': 0.2}, {'iata': 'BDH', 'city': 'Bandar Lengeh', 'similarityScore': 0.2}, {'iata': 'LFM', 'city': 'Lamerd', 'similarityScore': 0.16666666666666666}, {'iata': 'BUZ', 'city': 'Bushehr', 'similarityScore': 0.14285714285714285}, {'iata': 'KDH', 'city': 'Kandahar', 'similarityScore': 0.14285714285714285}, {'iata': 'GSM', 'city': 'Gheshm', 'similarityScore': 0.125}, {'iata': 'ZYL', 'city': 'Sylhet', 'similarityScore': 0.125}, {'iata': 'LRR', 'city': 'Lar', 'similarityScore': 0.1111111111111111}, {'iata': 'BBO', 'city': 'Berbera', 'similarityScore': 0.1111111111111111}, {'iata': 'HAS', 'city': 'Ha'il', 'similarityScore': 0.1}]"
9,AUH,Abu Dhabi,"[{'iata': 'DOH', 'city': 'Doha', 'similarityScore': 0.43781094527363185}, {'iata': 'MCT', 'city': 'Muscat', 'similarityScore': 0.4032258064516129}, {'iata': 'DXB', 'city': 'Dubai', 'similarityScore': 0.37254901960784315}, {'iata': 'BOM', 'city': 'Mumbai', 'similarityScore': 0.3586206896551724}, {'iata': 'RUH', 'city': 'Riyadh', 'similarityScore': 0.35172413793103446}, {'iata': 'BAH', 'city': 'Manama', 'similarityScore': 0.3448275862068966}, {'iata': 'JED', 'city': 'Jeddah', 'similarityScore': 0.34375}, {'iata': 'KWI', 'city': 'Kuwait', 'similarityScore': 0.3308270676691729}, {'iata': 'DEL', 'city': 'New Delhi', 'similarityScore': 0.32335329341317365}, {'iata': 'CAI', 'city': 'Cairo', 'similarityScore': 0.3197278911564626}]"


In [45]:
# Define the Cypher query to Extract Node similarity Algorithm Results for  top 3 most similar airports for each airport node in the graph
def execute_query(driver, query):
    with driver.session() as session:
        result = session.run(query)
        return [record.data() for record in result]

query = """
CALL gds.nodeSimilarity.stream(
    'routes',
    {
        topK: 3
    }
)
YIELD node1, node2, similarity
WITH gds.util.asNode(node1) AS n1, gds.util.asNode(node2) AS n2, similarity
RETURN
    n1.iata AS iata,
    n1.city AS city,
    COLLECT({iata:n2.iata, city:n2.city, similarityScore: similarity}) AS similarAirports
ORDER BY city LIMIT 20
"""

try:
    results = execute_query(driver, query)
    print("Node similarity Algorithm:\n", results)
except Exception as e:
    print(f"Error running Node similarity Algorithm: {e}")

# Convert the results to a DataFrame
df = pd.DataFrame(results)

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

# Display the DataFrame
df

Node similarity Algorithm:
 [{'iata': 'AAL', 'city': 'Aalborg', 'similarAirports': [{'iata': 'KRS', 'city': 'Kjevik', 'similarityScore': 0.3333333333333333}, {'iata': 'HAU', 'city': 'Karmøy', 'similarityScore': 0.2727272727272727}, {'iata': 'SZZ', 'city': 'Goleniow', 'similarityScore': 0.2608695652173913}]}, {'iata': 'AAR', 'city': 'Aarhus', 'similarAirports': [{'iata': 'AAL', 'city': 'Aalborg', 'similarityScore': 0.25}, {'iata': 'GOT', 'city': 'Gothenburg', 'similarityScore': 0.234375}, {'iata': 'TLL', 'city': 'Tallinn', 'similarityScore': 0.22916666666666666}]}, {'iata': 'YXX', 'city': 'Abbotsford', 'similarAirports': [{'iata': 'YQU', 'city': 'Grande Prairie', 'similarityScore': 0.2857142857142857}, {'iata': 'YQR', 'city': 'Regina', 'similarityScore': 0.23529411764705882}, {'iata': 'YQQ', 'city': 'Comox', 'similarityScore': 0.2222222222222222}]}, {'iata': 'ABR', 'city': 'Aberdeen', 'similarAirports': [{'iata': 'HIB', 'city': 'Hibbing', 'similarityScore': 1.0}, {'iata': 'BJI', 'city':

Unnamed: 0,iata,city,similarAirports
0,AAL,Aalborg,"[{'iata': 'KRS', 'city': 'Kjevik', 'similarityScore': 0.3333333333333333}, {'iata': 'HAU', 'city': 'Karmøy', 'similarityScore': 0.2727272727272727}, {'iata': 'SZZ', 'city': 'Goleniow', 'similarityScore': 0.2608695652173913}]"
1,AAR,Aarhus,"[{'iata': 'AAL', 'city': 'Aalborg', 'similarityScore': 0.25}, {'iata': 'GOT', 'city': 'Gothenburg', 'similarityScore': 0.234375}, {'iata': 'TLL', 'city': 'Tallinn', 'similarityScore': 0.22916666666666666}]"
2,YXX,Abbotsford,"[{'iata': 'YQU', 'city': 'Grande Prairie', 'similarityScore': 0.2857142857142857}, {'iata': 'YQR', 'city': 'Regina', 'similarityScore': 0.23529411764705882}, {'iata': 'YQQ', 'city': 'Comox', 'similarityScore': 0.2222222222222222}]"
3,ABR,Aberdeen,"[{'iata': 'HIB', 'city': 'Hibbing', 'similarityScore': 1.0}, {'iata': 'BJI', 'city': 'Bemidji', 'similarityScore': 1.0}, {'iata': 'ISN', 'city': 'Williston', 'similarityScore': 0.5}]"
4,ABZ,Aberdeen,"[{'iata': 'INV', 'city': 'Inverness', 'similarityScore': 0.3023255813953488}, {'iata': 'MJV', 'city': 'San Javier', 'similarityScore': 0.2708333333333333}, {'iata': 'JER', 'city': 'Saint Helier', 'similarityScore': 0.25862068965517243}]"
5,AHB,Abha,"[{'iata': 'ELQ', 'city': 'Buraidah', 'similarityScore': 0.5}, {'iata': 'YNB', 'city': 'Yenbo', 'similarityScore': 0.4375}, {'iata': 'HAS', 'city': 'Ha'il', 'similarityScore': 0.4}]"
6,ABJ,Abidjan,"[{'iata': 'BKO', 'city': 'Senou', 'similarityScore': 0.5}, {'iata': 'OUA', 'city': 'Ouagadougou', 'similarityScore': 0.4444444444444444}, {'iata': 'DKR', 'city': 'Dakar', 'similarityScore': 0.4}]"
7,ABI,Abilene,"[{'iata': 'ACT', 'city': 'Waco', 'similarityScore': 1.0}, {'iata': 'BPT', 'city': 'Beaumont/Port Arthur', 'similarityScore': 1.0}, {'iata': 'TXK', 'city': 'Texarkana', 'similarityScore': 1.0}]"
8,AZI,Abu Dhabi,"[{'iata': 'XSB', 'city': 'Sir Bani Yas Island', 'similarityScore': 0.2}, {'iata': 'BDH', 'city': 'Bandar Lengeh', 'similarityScore': 0.2}, {'iata': 'LFM', 'city': 'Lamerd', 'similarityScore': 0.16666666666666666}]"
9,AUH,Abu Dhabi,"[{'iata': 'DOH', 'city': 'Doha', 'similarityScore': 0.43781094527363185}, {'iata': 'MCT', 'city': 'Muscat', 'similarityScore': 0.4032258064516129}, {'iata': 'DXB', 'city': 'Dubai', 'similarityScore': 0.37254901960784315}]"


In [48]:
# Define the Cypher query to Extract Node similarity Algorithm Results for each airport and then returned the 10 airport pairs with the highest similarity across the whole graph.
def execute_query(driver, query):
    with driver.session() as session:
        result = session.run(query)
        return [record.data() for record in result]

query = """
CALL gds.nodeSimilarity.stream(
    'routes',
    {
        topK: 1,
        topN: 10
    }
)
YIELD node1, node2, similarity
WITH gds.util.asNode(node1) AS n1, gds.util.asNode(node2) AS n2, similarity AS similarityScore
RETURN
    n1.iata AS iata,
    n1.city AS city,
    {iata:n2.iata, city:n2.city} AS similarAirport,
    similarityScore
ORDER BY city
"""

try:
    results = execute_query(driver, query)
    print("Node similarity Algorithm:\n", results)
except Exception as e:
    print(f"Error running Node similarity Algorithm: {e}")

# Convert the results to a DataFrame
df = pd.DataFrame(results)

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

# Display the DataFrame
df

Node similarity Algorithm:
 [{'iata': 'ABI', 'city': 'Abilene', 'similarAirport': {'iata': 'TXK', 'city': 'Texarkana'}, 'similarityScore': 1.0}, {'iata': 'AEX', 'city': 'Alexandria', 'similarAirport': {'iata': 'GRK', 'city': 'Fort Hood/Killeen'}, 'similarityScore': 1.0}, {'iata': 'BPT', 'city': 'Beaumont/Port Arthur', 'similarAirport': {'iata': 'TXK', 'city': 'Texarkana'}, 'similarityScore': 1.0}, {'iata': 'CLL', 'city': 'College Station', 'similarAirport': {'iata': 'LCH', 'city': 'Lake Charles'}, 'similarityScore': 1.0}, {'iata': 'DRO', 'city': 'Durango', 'similarAirport': {'iata': 'SAF', 'city': 'Santa Fe'}, 'similarityScore': 1.0}, {'iata': 'GCK', 'city': 'Garden City', 'similarAirport': {'iata': 'TXK', 'city': 'Texarkana'}, 'similarityScore': 1.0}, {'iata': 'IAG', 'city': 'Niagara Falls', 'similarAirport': {'iata': 'PSM', 'city': 'Portsmouth'}, 'similarityScore': 1.0}, {'iata': 'SAF', 'city': 'Santa Fe', 'similarAirport': {'iata': 'DRO', 'city': 'Durango'}, 'similarityScore': 1.0},

Unnamed: 0,iata,city,similarAirport,similarityScore
0,ABI,Abilene,"{'iata': 'TXK', 'city': 'Texarkana'}",1.0
1,AEX,Alexandria,"{'iata': 'GRK', 'city': 'Fort Hood/Killeen'}",1.0
2,BPT,Beaumont/Port Arthur,"{'iata': 'TXK', 'city': 'Texarkana'}",1.0
3,CLL,College Station,"{'iata': 'LCH', 'city': 'Lake Charles'}",1.0
4,DRO,Durango,"{'iata': 'SAF', 'city': 'Santa Fe'}",1.0
5,GCK,Garden City,"{'iata': 'TXK', 'city': 'Texarkana'}",1.0
6,IAG,Niagara Falls,"{'iata': 'PSM', 'city': 'Portsmouth'}",1.0
7,SAF,Santa Fe,"{'iata': 'DRO', 'city': 'Durango'}",1.0
8,TXK,Texarkana,"{'iata': 'ABI', 'city': 'Abilene'}",1.0
9,ACT,Waco,"{'iata': 'TXK', 'city': 'Texarkana'}",1.0


In [49]:
# Define the Cypher query to Extract Node similarity Algorithm Results for each airport and then returned the 10 airport pairs with the lowest similarity across the whole graph.
def execute_query(driver, query):
    with driver.session() as session:
        result = session.run(query)
        return [record.data() for record in result]

query = """
CALL gds.nodeSimilarity.stream(
    'routes',
    {
        topK: 1,
        bottomN: 10
    }
)
YIELD node1, node2, similarity
WITH gds.util.asNode(node1) AS n1, gds.util.asNode(node2) AS n2, similarity AS similarityScore
RETURN
    n1.iata AS iata,
    n1.city AS city,
    {iata:n2.iata, city:n2.city} AS similarAirport,
    similarityScore
ORDER BY city
"""

try:
    results = execute_query(driver, query)
    print("Node similarity Algorithm:\n", results)
except Exception as e:
    print(f"Error running Node similarity Algorithm: {e}")

# Convert the results to a DataFrame
df = pd.DataFrame(results)

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

# Display the DataFrame
df

Node similarity Algorithm:
 [{'iata': 'VLY', 'city': 'Angelsey', 'similarAirport': {'iata': 'GCI', 'city': 'Saint Peter Port'}, 'similarityScore': 0.045454545454545456}, {'iata': 'BET', 'city': 'Bethel', 'similarAirport': {'iata': 'ANI', 'city': 'Aniak'}, 'similarityScore': 0.07142857142857142}, {'iata': 'BNB', 'city': 'Boende', 'similarAirport': {'iata': 'FIH', 'city': 'Kinshasa'}, 'similarityScore': 0.058823529411764705}, {'iata': 'HMV', 'city': 'Hemavan', 'similarAirport': {'iata': 'ARN', 'city': 'Stockholm'}, 'similarityScore': 0.005952380952380952}, {'iata': 'HJR', 'city': 'Khajuraho', 'similarAirport': {'iata': 'JAI', 'city': 'Jaipur'}, 'similarityScore': 0.058823529411764705}, {'iata': 'MMU', 'city': 'Morristown', 'similarAirport': {'iata': 'MSN', 'city': 'Madison'}, 'similarityScore': 0.05263157894736842}, {'iata': 'PRI', 'city': 'Praslin Island', 'similarAirport': {'iata': 'DUR', 'city': 'Durban'}, 'similarityScore': 0.0625}, {'iata': 'ISC', 'city': "St. Mary's", 'similarAirpo

Unnamed: 0,iata,city,similarAirport,similarityScore
0,VLY,Angelsey,"{'iata': 'GCI', 'city': 'Saint Peter Port'}",0.045455
1,BET,Bethel,"{'iata': 'ANI', 'city': 'Aniak'}",0.071429
2,BNB,Boende,"{'iata': 'FIH', 'city': 'Kinshasa'}",0.058824
3,HMV,Hemavan,"{'iata': 'ARN', 'city': 'Stockholm'}",0.005952
4,HJR,Khajuraho,"{'iata': 'JAI', 'city': 'Jaipur'}",0.058824
5,MMU,Morristown,"{'iata': 'MSN', 'city': 'Madison'}",0.052632
6,PRI,Praslin Island,"{'iata': 'DUR', 'city': 'Durban'}",0.0625
7,ISC,St. Mary's,"{'iata': 'EGC', 'city': 'Bergerac/Roumanière'}",0.071429
8,TVF,Thief River Falls,"{'iata': 'AZA', 'city': 'Phoenix'}",0.017857
9,WRL,Worland,"{'iata': 'DEN', 'city': 'Denver'}",0.00463
