# 3.2.b Simple Search Feature

In [1]:
import os
import sys
import gzip
import json
from pathlib import Path
import csv

import pandas as pd
import s3fs
import pyarrow as pa
from pyarrow.json import read_json
import pyarrow.parquet as pq
import fastavro
import pygeohash as pgh
import snappy
import jsonschema
from jsonschema.exceptions import ValidationError


endpoint_url='https://storage.budsc.midwest-datascience.com'

current_dir = Path(os.getcwd()).absolute()
schema_dir = current_dir.joinpath('schemas')
schema_dir.mkdir(parents=True, exist_ok=True)
results_dir = current_dir.joinpath('results')
results_dir.mkdir(parents=True, exist_ok=True)

def read_jsonl_data():
    s3 = s3fs.S3FileSystem(
        anon=True,
        client_kwargs={
            'endpoint_url': endpoint_url
        }
    )
    src_data_path = 'data/processed/openflights/routes.jsonl.gz'
    with s3.open(src_data_path, 'rb') as f_gz:
        with gzip.open(f_gz, 'rb') as f:
            records = [json.loads(line) for line in f.readlines()]
        

    return records

records = read_jsonl_data()

In [2]:

df = pd.json_normalize(records)

df = df.rename({'dst_airport.latitude': 'dst_airport_latitude', 'dst_airport.longitude': 'dst_airport_longitude'}, axis=1)  # new method
dst_airport_latitude = df['dst_airport_latitude']
dst_airport_longitude = df['dst_airport_longitude']

df['geohash'] = df.apply(lambda x: pgh.encode(x.dst_airport_latitude,x.dst_airport_longitude,precision=5), axis=1)
df.head(5)

Unnamed: 0,codeshare,equipment,airline.airline_id,airline.name,airline.alias,airline.iata,airline.icao,airline.callsign,airline.country,airline.active,...,dst_airport_longitude,dst_airport.altitude,dst_airport.timezone,dst_airport.dst,dst_airport.tz_id,dst_airport.type,dst_airport.source,dst_airport,src_airport,geohash
0,False,[CR2],410,Aerocondor,ANA All Nippon Airways,2B,ARD,AEROCONDOR,Portugal,True,...,49.278702,411.0,3.0,N,Europe/Moscow,airport,OurAirports,,,v1gh3
1,False,[CR2],410,Aerocondor,ANA All Nippon Airways,2B,ARD,AEROCONDOR,Portugal,True,...,49.278702,411.0,3.0,N,Europe/Moscow,airport,OurAirports,,,v1gh3
2,False,[CR2],410,Aerocondor,ANA All Nippon Airways,2B,ARD,AEROCONDOR,Portugal,True,...,43.081902,1054.0,3.0,N,Europe/Moscow,airport,OurAirports,,,szyes
3,False,[CR2],410,Aerocondor,ANA All Nippon Airways,2B,ARD,AEROCONDOR,Portugal,True,...,49.278702,411.0,3.0,N,Europe/Moscow,airport,OurAirports,,,v1gh3
4,False,[CR2],410,Aerocondor,ANA All Nippon Airways,2B,ARD,AEROCONDOR,Portugal,True,...,82.650703,365.0,7.0,N,Asia/Krasnoyarsk,airport,OurAirports,,,vcfbb


## the airports which is in a Radius of 300 Km from Bellevue University

In [18]:
from math import radians, cos, sin, asin, sqrt
def haversine(lon1, lat1, lon2, lat2):
  # convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

  # haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a))
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r

In [37]:
jersey_city_long_lat=(95.9182,41.1506)
def row_hsign(row):
    return haversine(*jersey_city_long_lat,row['dst_airport_longitude'],row['dst_airport.altitude'])

df['distance']=df.apply(row_hsign,axis=1)
df[df['distance']<=600]


Unnamed: 0,codeshare,equipment,airline.airline_id,airline.name,airline.alias,airline.iata,airline.icao,airline.callsign,airline.country,airline.active,...,dst_airport.altitude,dst_airport.timezone,dst_airport.dst,dst_airport.tz_id,dst_airport.type,dst_airport.source,dst_airport,src_airport,geohash,distance
443,False,[SF3],20710,Silver Airways (3M),,3M,DAK,Silver Wings,United States,True,...,2302.0,-5.0,U,America/New_York,airport,OurAirports,,,dnwz6,471.429287
1141,False,[320],17885,Interjet (ABC Aerolineas),,4O,IBS,INTERJET,Mexico,True,...,3021.0,-6.0,U,America/Costa_Rica,airport,OurAirports,,,d1u0g,239.379747
1755,False,[DHT],1581,CAL Cargo Air Lines,SN Brussels Airlines,5C,ICL,CAL,Israel,True,...,3021.0,-6.0,U,America/Costa_Rica,airport,OurAirports,,,d1u0g,239.379747
1756,False,[DHT],1581,CAL Cargo Air Lines,SN Brussels Airlines,5C,ICL,CAL,Israel,True,...,3021.0,-6.0,U,America/Costa_Rica,airport,OurAirports,,,d1u0g,239.379747
1759,False,[DHT],1581,CAL Cargo Air Lines,SN Brussels Airlines,5C,ICL,CAL,Israel,True,...,3021.0,-6.0,U,America/Costa_Rica,airport,OurAirports,,,d1u0g,239.379747
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63957,False,[73W],4547,Southwest Airlines,SkyWork,WN,SWA,SOUTHWEST,United States,True,...,501.0,-5.0,A,America/New_York,airport,OurAirports,,,dng11,277.468141
64074,False,[73W],4547,Southwest Airlines,SkyWork,WN,SWA,SOUTHWEST,United States,True,...,501.0,-5.0,A,America/New_York,airport,OurAirports,,,dng11,277.468141
64129,False,"[73C, 73W, 733]",4547,Southwest Airlines,SkyWork,WN,SWA,SOUTHWEST,United States,True,...,501.0,-5.0,A,America/New_York,airport,OurAirports,,,dng11,277.468141
64301,False,[73W],4547,Southwest Airlines,SkyWork,WN,SWA,SOUTHWEST,United States,True,...,501.0,-5.0,A,America/New_York,airport,OurAirports,,,dng11,277.468141
