### Imports

In [16]:
import sys
import os 
import math
import pytest

PARENT_PATH = os.path.dirname(os.getcwd())
sys.path.append(os.path.join(PARENT_PATH, "final_scripts\\day_1"))

from class_flight_data import FlightDataAnalyzer

---
### Function Definition

In [17]:
def distance_to(analyzer, airport_1: str, airport_2: str) -> float:
    """
    Calculates the real distance in kilometers between 'airport_1' and
    'airport_2', based on information stored in analyzer
    
    Parameters
    ---------------
    analyzer:
        Instance of FlightDataAnalyzer()
    airport_1: string
        ICAO code of the first airport.
    airport_1: string
        ICAO code of the second airport.
        
    Returns
    ---------------
    distance: float
        The real distance in kilometers between 'airport_1' and 
        'airport_2'.
    """
    df_airport_1 = analyzer.airports[
        analyzer.airports["ICAO"] == airport_1
    ]
    if df_airport_1.empty:
        raise ValueError(f"Airport code '{airport_1}' not found in airports dataset.")
        
    df_airport_2 = analyzer.airports[
        analyzer.airports["ICAO"] == airport_2
    ]
    if df_airport_2.empty:
        raise ValueError(f"Airport code '{airport_2}' not found.")
    
    # Convert latitude and longitude to radians
    lat_1 = math.radians(float(df_airport_1["Latitude"].iloc[0]))
    lon_1 = math.radians(float(df_airport_1["Longitude"].iloc[0]))
    lat_2 = math.radians(float(df_airport_2["Latitude"].iloc[0]))
    lon_2 = math.radians(float(df_airport_2["Longitude"].iloc[0]))

    # Earth radius in kilometers
    RADIUS = 6371

    # Haversine formula to calculate distance
    dlat = lat_2 - lat_1
    dlon = lon_2 - lon_1
    a = math.sin(dlat / 2) ** 2 + math.cos(lat_1) * math.cos(lat_2) * math.sin(dlon / 2) ** 2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    distance = RADIUS * c
    
    return distance

---
### Testing Data

In [18]:
# Testing the final output
analyzer = FlightDataAnalyzer()
df_test = analyzer.airports
display(df_test)

# list of unique codes
iata = df_test["IATA"].unique()
icao = df_test["ICAO"].unique()

Data already downloaded


Unnamed: 0_level_0,Airport ID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,Timezone,DST,Tz database time zone
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,1,Goroka Airport,Goroka,Papua New Guinea,GKA,AYGA,-6.081690,145.391998,5282,10,U,Pacific/Port_Moresby
1,2,Madang Airport,Madang,Papua New Guinea,MAG,AYMD,-5.207080,145.789001,20,10,U,Pacific/Port_Moresby
2,3,Mount Hagen Kagamuga Airport,Mount Hagen,Papua New Guinea,HGU,AYMH,-5.826790,144.296005,5388,10,U,Pacific/Port_Moresby
3,4,Nadzab Airport,Nadzab,Papua New Guinea,LAE,AYNZ,-6.569803,146.725977,239,10,U,Pacific/Port_Moresby
4,5,Port Moresby Jacksons International Airport,Port Moresby,Papua New Guinea,POM,AYPY,-9.443380,147.220001,146,10,U,Pacific/Port_Moresby
...,...,...,...,...,...,...,...,...,...,...,...,...
7693,14106,Rogachyovo Air Base,Belaya,Russia,\N,ULDA,71.616699,52.478298,272,\N,\N,\N
7694,14107,Ulan-Ude East Airport,Ulan Ude,Russia,\N,XIUW,51.849998,107.737999,1670,\N,\N,\N
7695,14108,Krechevitsy Air Base,Novgorod,Russia,\N,ULLK,58.625000,31.385000,85,\N,\N,\N
7696,14109,Desierto de Atacama Airport,Copiapo,Chile,CPO,SCAT,-27.261200,-70.779198,670,\N,\N,\N


In [10]:
# Check for equal codes - no different airports with same codes
display(df_test[(df_test["IATA"].isin(icao)) & (df_test["IATA"]!=r"\N")])
display(df_test[(df_test["ICAO"].isin(iata)) & (df_test["ICAO"]!=r"\N")])

Unnamed: 0_level_0,Airport ID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,Timezone,DST,Tz database time zone
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
4769,6134,Akutan Seaplane Base,Akutan,United States,KQA,KQA,54.13377,-165.778896,0,-9,A,America/Anchorage


Unnamed: 0_level_0,Airport ID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,Timezone,DST,Tz database time zone
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
4769,6134,Akutan Seaplane Base,Akutan,United States,KQA,KQA,54.13377,-165.778896,0,-9,A,America/Anchorage


In [12]:
# Check duplicates - some duplicates in Name, IATA combination
display(df_test[df_test.duplicated(subset=["Name", "ICAO"], keep=False)])
display(df_test[df_test.duplicated(subset=["Name", "IATA"], keep=False)])
display(df_test[df_test.duplicated(subset=["IATA", "ICAO"], keep=False)])


Unnamed: 0_level_0,Airport ID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,Timezone,DST,Tz database time zone
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1


Unnamed: 0_level_0,Airport ID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,Timezone,DST,Tz database time zone
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
5823,7846,Arlington Municipal Airport,Arlington,United States,\N,KGKY,32.663898,-97.094299,628,-6,A,America/Chicago
6131,8394,Shelby County Airport,Alabaster,United States,\N,KEET,33.176998,-86.782799,586,-6,A,America/Chicago
6540,9305,Shelby County Airport,Shelbyville,United States,\N,K2H0,39.4104,-88.845398,618,-6,A,America/Chicago
6876,11141,Arlington Municipal Airport,Arlington,United States,\N,KAWO,48.160702,-122.158997,142,-8,A,\N


Unnamed: 0_level_0,Airport ID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,Timezone,DST,Tz database time zone
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1


In [14]:
# Count number of \N in Name IATA and ICAO - IATA is very incomplete, ICAO only has a single problematic entry
display(df_test[df_test["Name"] == r"\N"].shape[0])
display(df_test[df_test["IATA"] == r"\N"].shape[0])
display(df_test[df_test["ICAO"] == r"\N"].shape[0])

display(df_test[df_test["ICAO"] == r"\N"])

0

1626

1

In [20]:
# Name has some problematic entries with tag (Duplicate) - so function will use ICAO as inputs
display(df_test[
    df_test["Name"].apply(lambda x: str.find(x, "Duplicate") != -1)
])

Unnamed: 0_level_0,Airport ID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,Timezone,DST,Tz database time zone
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
737,753,[Duplicate] Giebelstadt Army Air Field,Giebelstadt,Germany,GHF,ETEU,49.648102,9.96649,980,1,E,Europe/Berlin
4519,5849,(Duplicate) Playa Samara Airport,Playa Samara,Costa Rica,\N,MRSR,9.87,-85.48,10,-6,U,America/Costa_Rica
7358,13011,[Duplicate] Illertissen see EDMI - ED-0425 loc...,Not Specified,New Zealand,\N,XXXX,89.5,0.0001,0,0,U,\N


---
### Testing Function Outputs

In [23]:
# Using the first two entries of the airports df - manual calculations and online calculations have the same value
airport_1 = "AYGA"
airport_2 = "AYMD"
print(distance_to(analyzer, airport_1, airport_2))

106.7138992902683


---
### Testing Functionalities

In [30]:
# Test if error raise works - ValueError raised correctly
airport_1 = "error_code"
airport_2 = "AYMD"
print(distance_to(analyzer, airport_1, airport_2))

ValueError: Airport code 'error_code' not found in airports dataset.

---
### Run black and pylint & Test mypy on function script

In [34]:
# Black (also ran for the file in final_scripts)
!black ..\tests\day_1\function_pylint.py

reformatted ..\tests\day_1\function_pylint.py

All done! ✨ 🍰 ✨
1 file reformatted.


In [38]:
# First pylint run - Only one error about the naming of RADIUS (this follows PEP 8 convention for constants, so no change will be implemented)
!pylint ..\tests\day_1\function_pylint.py

************* Module function_pylint
c:\Users\marti\OneDrive - Nova SBE\Nova\Master's in Business Analytics\Disciplinas\2nd Semester\t1\Advanced Programming for Data Science\ADPRO_Project\tests\day_1\function_pylint.py:44:4: C0103: Variable name "RADIUS" doesn't conform to snake_case naming style (invalid-name)

------------------------------------------------------------------
Your code has been rated at 9.47/10 (previous run: 9.47/10, +0.00)



In [6]:
# mypy (slightly altered file to execute within script) - ignore the import error since mypy cannot find it due to env configuration
!mypy ..\tests\day_1\function_mypy.py

..\tests\day_1\function_mypy.py:8: [1m[91merror:[0m Cannot find implementation or library stub for module named [0m[1m"class_flight_data"[0m  [0m[93m[import-not-found][0m
..\tests\day_1\function_mypy.py:8: [94mnote:[0m See [4mhttps://mypy.readthedocs.io/en/stable/running_mypy.html#missing-imports[0m[0m
..\tests\day_1\function_mypy.py:63: [1m[91merror:[0m Argument 1 to [0m[1m"distance_to"[0m has incompatible type [0m[1m"int"[0m; expected [0m[1m"str"[0m  [0m[93m[arg-type][0m
[1m[91mFound 2 errors in 1 file (checked 1 source file)[0m


---
### Custom Tests (for pytest task)
[Check expected values using this website.](https://www.meridianoutpost.com/resources/etools/calculators/calculator-latitude-longitude-distance.php)

In [24]:
# Test two airports in different continents
def test_continent():
    """Test distant calculation for airports in different continents."""
    analyzer = FlightDataAnalyzer()
    airport_oceania = "AYGA"  # Oceania
    airport_asia = "ULDA"  # Asia/Europe (in Russia)
    expected_distance  = 10750.81
    assert round(
        distance_to(analyzer, airport_oceania, airport_asia)
    ) == round(expected_distance)
test_continent()
print(distance_to(analyzer, "AYGA", "ULDA"))

Data already downloaded
10751.328161197485


In [27]:
# Test if ValueError is raised when invalid code is input
def test_error_code():
    """Test if function identifies invalid ICAO codes."""
    analyzer = FlightDataAnalyzer()
    airport_1 = "AYGA"
    airport_invalid = "invalid_code"
    with pytest.raises(ValueError):
        distance_to(analyzer, airport_1, airport_invalid)
test_error_code()
distance_to(analyzer, "AYGA", "invalid_code")

Data already downloaded


ValueError: Airport code 'invalid_code' not found.

In [30]:
# Test if function can handle the distance between an airport and itself
def test_same():
    """Test if function can handle airports with same location."""
    analyzer = FlightDataAnalyzer()
    airport_1 = "AYGA"
    assert distance_to(analyzer, airport_1, airport_1) == 0
test_same()
distance_to(analyzer, "AYGA", "AYGA")

Data already downloaded


0.0

In [32]:
# run pytest - all tests passed, just one warning for pydantic in the output
!pytest ..\final_scripts\day_1\function_test_distance.py

platform win32 -- Python 3.11.8, pytest-8.0.2, pluggy-1.4.0
rootdir: c:\Users\marti\OneDrive - Nova SBE\Nova\Master's in Business Analytics\Disciplinas\2nd Semester\t1\Advanced Programming for Data Science\ADPRO_Project
collected 3 items

..\final_scripts\day_1\function_test_distance.py [32m.[0m[32m.[0m[32m.[0m[33m                     [100%][0m

..\..\..\..\..\..\..\..\..\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\pydantic\_internal\_config.py:210
  C:\Users\marti\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\pydantic\_internal\_config.py:210: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.0.3/migration/



---
### Run black and pylint on test script

In [34]:
# Black (also ran for the file in final_scripts)
!black ..\tests\day_1\function_test_pylint.py

reformatted ..\final_scripts\day_1\function_test_distance.py

All done! ✨ 🍰 ✨
1 file reformatted.


In [44]:
# First pylint run - only issues found are related to import errors related to env
!pylint ..\tests\day_1\function_test_pylint.py

************* Module function_test_pylint
c:\Users\marti\OneDrive - Nova SBE\Nova\Master's in Business Analytics\Disciplinas\2nd Semester\t1\Advanced Programming for Data Science\ADPRO_Project\tests\day_1\function_test_pylint.py:7:0: E0401: Unable to import 'class_flight_data' (import-error)
c:\Users\marti\OneDrive - Nova SBE\Nova\Master's in Business Analytics\Disciplinas\2nd Semester\t1\Advanced Programming for Data Science\ADPRO_Project\tests\day_1\function_test_pylint.py:8:0: E0401: Unable to import 'function_calculate_distance' (import-error)

------------------------------------------------------------------
Your code has been rated at 4.74/10 (previous run: 4.74/10, +0.00)



In [47]:
# Second pylint run - from the same directory as the modules causing import error
!pylint ..\final_scripts\day_1\function_test_distance.py


--------------------------------------------------------------------
Your code has been rated at 10.00/10 (previous run: 10.00/10, +0.00)

