In [None]:
%pip install serpapi
%pip install google-search-results
import pandas as pd
import serpapi
from serpapi import GoogleSearch
import requests

In [None]:
def google_api_search(dep_airport_code, arr_airport_code, dep_date, access_key = 'ffe30b8803eb73dbbe6d2889e77aba0c5023fc74a8c0ebc5886802d17f99397c'):

    """
    note: google flight api only allow 100 hits per month - i am using my key here, might need to change key if succeed limits
    Input: 
    dep_airport_code: str IATA code of the departure airport (e.g., "SFO" for San Francisco).
    arr_airport_code: str IATA code of the arrival airport (e.g., "JFK" for New York).
    dep_date:	str	Departure date in the format YYYY-MM-DD (e.g., "2024-11-20").

    Output:

    results: dict or None.
    merged df: contains extracted information from the result dict

    results['all_flights'] -> return all flights available between 2 locations
    results['all_flights'][0]['duration'] -> returns duration of the flight
    results['all_flights'][0]['airplane'] -> returns airplane of the flight
    results['all_flights'][0]['airline'] -> returns airline of the flight
    results['all_flights'][0]['flight_number'] -> returns flight number of the flight
    results['all_flights'][0]['flights'][0]['departure_airport']['time'] -> depature time
    results['all_flights'][0]['flights'][0]['arrival_airport']['time'] -> arrival time

    """
    
    params = {
                "engine": "google_flights",
                f"departure_id": {dep_airport_code.upper()},
                f"arrival_id": {arr_airport_code.upper()},
                "hl": "en",
                "gl": "us",
                f"outbound_date": {dep_date},
                "stops": "1",
                "type": "2",
                f"api_key": {access_key}
            }
    try:

        search = GoogleSearch(params)
        results = search.get_dict()
        if 'error' in results:
            print(f"ERROR: {results['error']}")
        else:
            results["all_flights"] = results["best_flights"] + results["other_flights"]
        
# Assuming results['all_flights'] contains the flight data
        flights = results.get('all_flights', [])

     
        flight_data = []

        for flight in flights:
            flight_info = {
                'Departure Airport Code': dep_airport_code,
                'Arrival Airport Code': arr_airport_code,
                'Departure Date': dep_date,
                'Duration': flight.get('flights', [{}])[0].get('duration', 'Duration not found'),
                'Airplane': flight.get('flights', [{}])[0].get('airplane', 'airplane not found'),
                'Airline': flight.get('flights', [{}])[0].get('airline', 'airline not found'),
                'Flight Number': flight.get('flights', [{}])[0].get('flight_number', 'flight_number not found'),
                'Departure Time': flight.get('flights', [{}])[0].get('departure_airport', {}).get('time', 'depature_time not found'),
                'Arrival Time': flight.get('flights', [{}])[0].get('arrival_airport', {}).get('time', 'arrival_time not found'),
            }
            flight_data.append(flight_info)

        
        flights_df = pd.DataFrame(flight_data)
        airport_location_df = pd.read_csv('iata-icao.csv')
       
        merged_df = flights_df.merge(airport_location_df, left_on='Departure Airport Code', right_on='iata', how='left')
        merged_df = merged_df.rename(columns={
            'latitude': 'Departure Latitude',
            'longitude': 'Departure Longitude',
            'region_name': 'Departure State'
        })
        merged_df = merged_df.drop(columns=['country_code','icao', 'airport', 'iata'])

        merged_df = merged_df.merge(airport_location_df, left_on='Arrival Airport Code', right_on='iata', how='left')
        merged_df = merged_df.rename(columns={
            'latitude': 'Arrival Latitude',
            'longitude': 'Arrival Longitude',
            'region_name': 'Arrival State'
        })
        merged_df = merged_df.drop(columns=['country_code', 'icao', 'airport', 'iata'])
        merged_df['Day_of_Month'] = pd.to_datetime(merged_df['Departure Date']).dt.day
        merged_df['Day_of_Week'] = pd.to_datetime(merged_df['Departure Date']).dt.dayofweek
        merged_df['Day_of_Week'] = merged_df['Day_of_Week']  + 1

        # # Add columns to the DataFrame
        # merged_df['aircraft_tail_number'] = None
        # merged_df['aircraft_icao_type'] = None

        # # Iterate over rows and fetch details
        # for index, row in merged_df.iterrows():
        #     flight_number = row['Flight Number']
        #     dep_airport_code = row['Departure Airport Code']
        #     registration, icao = get_aircraft_details(flight_number, dep_airport_code, access_key = '5028e53dc0c716f5517a3a61b7b92e33')
        #     #print(icao)
        #     merged_df.at[index, 'aircraft_tail_number'] = registration
        #     merged_df.at[index, 'aircraft_icao_type'] = icao

 

    except Exception as e:
        print('Error with calling Google API')
        print(e)
        results = None
        merged_df = None

    
    return results,merged_df

    



In [7]:
results_dict, feature_df = google_api_search("SFO", 'JFK', '2024-11-21')

In [8]:
feature_df

Unnamed: 0,Departure Airport Code,Arrival Airport Code,Departure Date,Duration,Airplane,Airline,Flight Number,Departure Time,Arrival Time,Departure State,Departure Latitude,Departure Longitude,Arrival State,Arrival Latitude,Arrival Longitude,Day_of_Month,Day_of_Week
0,SFO,JFK,2024-11-21,329,Airbus A320,JetBlue,B6 816,2024-11-21 06:00,2024-11-21 14:29,California,37.619,-122.375,New York,40.6397,-73.7789,21,4
1,SFO,JFK,2024-11-21,334,Airbus A321 (Sharklets),American,AA 2560,2024-11-21 07:25,2024-11-21 15:59,California,37.619,-122.375,New York,40.6397,-73.7789,21,4
2,SFO,JFK,2024-11-21,335,Boeing 767,Delta,DL 383,2024-11-21 09:50,2024-11-21 18:25,California,37.619,-122.375,New York,40.6397,-73.7789,21,4
3,SFO,JFK,2024-11-21,332,Boeing 737,Alaska,AS 38,2024-11-21 06:38,2024-11-21 15:10,California,37.619,-122.375,New York,40.6397,-73.7789,21,4
4,SFO,JFK,2024-11-21,339,Airbus A320,JetBlue,B6 416,2024-11-21 08:30,2024-11-21 17:09,California,37.619,-122.375,New York,40.6397,-73.7789,21,4
5,SFO,JFK,2024-11-21,343,Airbus A320,JetBlue,B6 2116,2024-11-21 12:25,2024-11-21 21:08,California,37.619,-122.375,New York,40.6397,-73.7789,21,4
6,SFO,JFK,2024-11-21,338,Boeing 737MAX 9 Passenger,Alaska,AS 20,2024-11-21 13:22,2024-11-21 22:00,California,37.619,-122.375,New York,40.6397,-73.7789,21,4
7,SFO,JFK,2024-11-21,346,Airbus A321neo,JetBlue,B6 1216,2024-11-21 13:40,2024-11-21 22:26,California,37.619,-122.375,New York,40.6397,-73.7789,21,4
8,SFO,JFK,2024-11-21,335,Airbus A321 (Sharklets),American,AA 16,2024-11-21 10:25,2024-11-21 19:00,California,37.619,-122.375,New York,40.6397,-73.7789,21,4
9,SFO,JFK,2024-11-21,335,Boeing 757,Delta,DL 354,2024-11-21 11:35,2024-11-21 20:10,California,37.619,-122.375,New York,40.6397,-73.7789,21,4


In [None]:
# def get_aircraft_details(flight_iata, dep_iata, access_key = '5028e53dc0c716f5517a3a61b7b92e33'):
#     params = {
#         'access_key': access_key,
#         'flight_iata': flight_iata.replace(" ", ""),
#         'dep_iata': dep_iata
#     }
#     try:
#         response = requests.get('https://api.aviationstack.com/v1/flights', params=params)
#         if response.status_code == 200:
#             data = response.json().get('data', [])

#             registration = 'Unknown'
#             icao = 'Unknown'
#             # Find the first flight with 'landed' status
#             for item in data:
#                 if item.get('flight_status') == 'landed':
#                     registration = item.get('aircraft', {}).get('registration', "Unknown")
#                     icao = item.get('aircraft', {}).get('icao', "Unknown")
#                     #return registration, icao
#                 elif item.get('flight_status') == 'scheduled' and type(item['aircraft']) is not None:
#                     print(1)
#                     registration = item.get('aircraft', {}).get('registration', "Unknown")
#                     icao = item.get('aircraft', {}).get('icao', "Unknown")
#                     #return registration, icao
#                 else:
#                     continue
                

            
#             # Debug: No landed flights found
#             #print(f"No 'landed' flight found for {flight_iata} departing {dep_iata}.")
#             return registration, icao
#     except Exception as e:
#         print(f"Error fetching registration data for flight {flight_iata}: {e}")
#         return "Unknown", "Unknown"