In [1]:
# General packages
import requests
import json
import time
import datetime
import random
import os
import sys

# data / numerical handling
import pandas as pd
import numpy as np

# data visualization
import seaborn as sb
import matplotlib as mp

# Scraping libraries
from bs4 import BeautifulSoup as bs4


## Overview

#### About
This notebook contains code for webscraping data on eVTOL / UAM aircraft from https://transportup.com/the-hangar/

#### Packages
* [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/bs4/doc/) 
* Standard libraries across Python 3* Anaconda distribution.

#### Files
* Jupyter Notebook for analysis: transportup_scrape.ipynb
* Python files for custom functions
    * transportup_funcs.py
    
* .csv files for read and writing to dataframes
    * tu_directory_{date}.csv --> dataframe of aircraft links (to scrape)
    * tu_results_df_{date}.csv --> dataframe of scraped and cleaned aircraft data


In [2]:
def get_bs4(endpoint):

    """ Returns a bs4 object from a .get().text request at specified endpoint """
    
    # get request -- initialize html response object
    # https://docs.python-requests.org/en/master/user/quickstart/
    html = requests.get(endpoint).text 

    # initiate bs4 object
    # https://www.crummy.com/software/BeautifulSoup/bs4/doc/
    soup = bs4(html, 'html.parser')
    
    return soup

# Part I -- Build TransportUP aircraft directory dataframe

In [3]:
def get_acdata(category, cat_atags):
    
    """
    
    takes a category and the category's atags 
    and builds a list of dictionaries for data frame input
    
    """
    
    # store of dataframe row data (dicts)
    dict_list = []

    ## all key aircraft a tags have a span tag
    for i, a in enumerate(cat_atags):

        # to store data per aircraft
        ac_data = {}

        if a.span:

            # get category
            try: 
                ac_data['category'] = category
            except:
                ac_data['category'] = None

            # get link
            try:
                ac_data['link'] = a.get('href') 
            except:
                ac_data['link'] = None

            # get name
            try:
                name = a.find('span', class_ = 'x-anchor-text-primary').text.strip()
                ac_data['ac_name'] = name 
            except:
                ac_data['ac_name'] = None

            dict_list.append(ac_data)
            
    return dict_list
    
    

## Main Hangar page soup

In [4]:
endpoint = "https://transportup.com/the-hangar/"
soup = get_bs4(endpoint)

## Set up categories and atags

In [5]:
## class for the TransportUP Watchlist aircraft data
## 2x 'a' tags per aircraft
## second 'a' tag contains correct data for scraping

watchlist_a = soup.find(class_ = "e444-16 x-section").find_all('a')
market_a = soup.find(class_ = "e444-102 x-section").find_all('a')

cat_atags_all = [watchlist_a, market_a]
categories_all = ['watchlist', 'market']

## Loop through category data and append to df

In [6]:
df = pd.DataFrame(data = None, columns = ['category', 'ac_name', 'link'])

for category, atags in zip(categories_all, cat_atags_all):
    df = df.append(get_acdata(category, atags), ignore_index =True)

In [7]:
df.tail()

Unnamed: 0,category,ac_name,link
87,market,Koncepto Millenya,https://transportup.com/koncepto-millenya
88,market,Varon Vehicles V200,https://transportup.com/varon-v200/
89,market,Ghost X V 2.2,https://transportup.com/grug-group-ghost-x-v-2-2/
90,market,Imaginactive Onyx,https://transportup.com/imaginactive-onyx/
91,market,Volocopter VoloDrone,https://transportup.com/volocopter-volodrone/


In [8]:
df.shape

(92, 3)

## Drop duplicate rows

In [9]:
df.drop_duplicates(inplace = True)
df.shape

(92, 3)

#### check for duplicate links

In [10]:
link_dupes = df[df.duplicated(['link']) == True]
link_dupes

Unnamed: 0,category,ac_name,link
52,market,Vickers WAVE eVTOL,https://transportup.com/hoversurf-formula/
69,market,,https://transportup.com/elroy-air-chaparral/
71,market,,https://transportup.com/kitty-hawk-heaviside/
74,market,,https://transportup.com/amsl-aero-vertiia/
75,market,,https://transportup.com/pegasus-vertical-busin...




##### NOTE**

Vicker's Wave eVTOL has an incorrect link. Keep the aircraft in dataframe, 'n/a' the link.

In [11]:
df['link'][52] = None

In [12]:
link_dupes = df[df.duplicated(['link']) == True]
link_dupes

Unnamed: 0,category,ac_name,link
69,market,,https://transportup.com/elroy-air-chaparral/
71,market,,https://transportup.com/kitty-hawk-heaviside/
74,market,,https://transportup.com/amsl-aero-vertiia/
75,market,,https://transportup.com/pegasus-vertical-busin...


In [13]:
df.drop(index = list(link_dupes.index), inplace = True)
df[df.duplicated(['link']) == True]

Unnamed: 0,category,ac_name,link


In [14]:
df.shape

(88, 3)

## Check for null values

In [16]:
def check_na(updated_df):
    """ checks dataframe for NA values and returns a dictionary with columns and their NA counts sorted by counts """
    
    na_cols = list(updated_df.columns)

    # find number of NA values for each column if they exist
    # build dictionary
    na_response = {col: updated_df['{}'.format(col)].isna().value_counts()[1] for col in na_cols \
                   if len(updated_df['{}'.format(col)].isna().value_counts())>1}
    
    if len(na_response) != 0:
        
        # sort dict by greatest NAs count 
        new_nas = dict(sorted(na_response.items(), key = lambda item: item[1]))
        return new_nas
    else: return "No NA values in the dataframe"

In [17]:
check_na(df)

{'link': 1}

In [18]:
df.head()

Unnamed: 0,category,ac_name,link
0,watchlist,Airbus CityAirbus,https://transportup.com/airbus-cityairbus/
1,watchlist,Boeing PAV,https://transportup.com/aurora-flight-sciences...
2,watchlist,Bell Nexus Air Taxi,https://transportup.com/bell-air-taxi
3,watchlist,WISK Cora,https://transportup.com/wisk-cora/
4,watchlist,EHang 216,https://transportup.com/ehang-216/


## Send to .csv

In [19]:
# builds .csv with today's date
today = datetime.date.today()
df.to_csv('tu_directory_{}.{}.{}.csv'.format(today.month, today.day, today.year))

## Read in .csv -- Optional

In [20]:
transup_directory = pd.read_csv('tu_directory_7.12.2021.csv')
transup_directory.drop(columns = 'Unnamed: 0', inplace = True)
transup_directory

Unnamed: 0,category,ac_name,link
0,watchlist,Airbus CityAirbus,https://transportup.com/airbus-cityairbus/
1,watchlist,Boeing PAV,https://transportup.com/aurora-flight-sciences...
2,watchlist,Bell Nexus Air Taxi,https://transportup.com/bell-air-taxi
3,watchlist,WISK Cora,https://transportup.com/wisk-cora/
4,watchlist,EHang 216,https://transportup.com/ehang-216/
...,...,...,...
83,market,Koncepto Millenya,https://transportup.com/koncepto-millenya
84,market,Varon Vehicles V200,https://transportup.com/varon-v200/
85,market,Ghost X V 2.2,https://transportup.com/grug-group-ghost-x-v-2-2/
86,market,Imaginactive Onyx,https://transportup.com/imaginactive-onyx/


# PART II -- Scraping aircraft data

#### Data Points of interest

1. Quick Summary
    
2. Stage of Development

3. Technical Details


    Examples:
    
    * Aircraft Type: 

    * Powerplant: 

    * Range:

    * Top Speed:

    * Propeller Configuration: 

    * Passenger/Payload Capacity:

    * Autonomy Level:

    * Wingspan/Dimensions:
    
    * Key Suppliers: 
    

6. References


#### quick summary

In [33]:
def get_summary(soup):

    """ returns 'quick summary' from aircraft page """
    
    # ptags
    ptags = acsoup.find_all('p')

    # second index ptag
    summary = ptags[2].text.strip()
    
    return summary

#### Stage of Development

In [34]:

def get_devstage(soup):
    
    """
    
    Overview:
    
    Returns the development stage in a string format given a bs4 soup object
    
    More details:
    
    x-icon picture index to development stage

    0: "preliminary design"
    1: "prototype build"
    2: "flight testing"
    3: "certification"
    4: "commercially operating"

    """
    stages_dict = {0: "preliminary design", 1: "prototype build", \
                  2: "flight testing", 3: "certification", 4: "commercially operating"}

    # x-icons are in 'i' tags
    itags = acsoup.find_all('i')

    dev_stages = []
    
    try:

        for tag in itags:
            if 'data-x-icon' in tag.prettify():
                dev_stages.append(tag)

        for i, stage in enumerate(dev_stages):
            # color attribute required, and 0% features of color attributes indicate false positives
            if 'color' in stage.prettify() and '0%' not in stage.prettify():
                stage_ind = i
                break

        dev_stage = stages_dict[stage_ind]
        
    except:
        dev_stage = None

    return dev_stage

#### Technical Details and other

In [35]:

def get_details(soup, details_dict):
    
    """
    Builds a dictionary of all 'strong' elements as keys and element details as value.
    Returns a dictionary 

    Params:

    * soup object

    * dictionary to build or update -- can be empty

    Returns:

    * dictionary of detail keys and their values


    ## Example detail keys

    #         * Powerplant: 

    #         * Range:

    #         * Top Speed:

    #         * Propeller Configuration: 

    #         * Passenger/Payload Capacity:

    #         * Autonomy Level:

    #         * Wingspan/Dimensions:

    #         * Key Suppliers: 


    """
    acsoup = soup

    ptags = acsoup.find_all('p')
    
    try:

        for i, p in enumerate(ptags):
            if p.strong:

                # get detail key
                key = p.strong.text.strip()
                key = key.replace(":", "")

                # get detail value
                val = p.strong.next_sibling

                # add to dict
                details_dict[key] = val
                
    except:
        
        pass
            
    return details_dict


#### References

In [36]:
def get_references(soup):
    
    """ gets reference links from aircraft page. Returns a list."""
    
    ## Resources are the last unordered list
    uls = acsoup.find_all('ul')
    refs = uls[-1]
    
    # list items
    lis = refs.find_all('li')
    
    links = []
    
    try:
    
        for li in lis:
            if li.a:
                link = li.a.get('href')
                if 'transportup' not in link: 
                    links.append(link)
                else: pass
            else: pass
            
    except:
        # empty list
        return links
        
        # links list
    return links
    

#### New Dataframe to populate

In [37]:
# cols = list(transup_directory.columns)
# results_df = pd.DataFrame(data = None, columns = cols)

#### Or Updating an Existing Dataframe

In [38]:
# results_df = pd.read_csv('tu_results_df_7.11.2021.csv')
# results_df.drop(columns = ['Unnamed: 0'], inplace = True)

In [39]:
results_df.head()

Unnamed: 0,category,ac_name,ac_type,link,ac_type_detail,Autonomy Level,Dimensions,Powerplant,Propeller Configuration,Range,Top Speed,dev_stage,references,summary,Passenger/Payload Capacity,Wingspan/Dimensions,prod_stage,powerplant_category,autonomous level
0,watchlist,Airbus CityAirbus,Wingless VTOL,https://transportup.com/airbus-cityairbus/,"Wingless eVTOL, Intracity",Autonomous,"9ft propellor diameter, 26 ft length",All-Electric,8 lift/thrust rotors (4 dual rotors),60 miles,<span> </span>,preliminary design,['http://www.airbus.com/newsroom/press-release...,The Airbus CityAirbus is the long view approac...,,,pre production,electric,autonomous
1,watchlist,Boeing PAV,Winged VTOL,https://transportup.com/aurora-flight-sciences...,Winged VTOL,Autonomous,"30 feet wingspan, 28ft length",All-electric,"8 lifting rotors, 1 pusher propeller",50 miles,Undisclosed,flight testing,['http://www.aurora.aero/pav-evtol-passenger-a...,The Boeing Passenger Air Vehicle is a venture ...,Two-seater,,prototype production,electric,autonomous
2,watchlist,Bell Nexus Air Taxi,Winged VTOL,https://transportup.com/bell-air-taxi,Winged VTOL,Semi-Autonomous,,6HX - Hybrid-Electric; 4EX - All-Electric,6HX - 6 tilt-rotors; 4EX - 4 tilt-rotors (al...,6HX - 150 miles; 4EX - 60 miles,150 mph,prototype build,"['http://www.bellflight.com', 'http://eepurl.c...","The Bell Nexus is Bell Flight's Air Taxi, of w...",,,prototype production,hybrid electric,semi autonomous
3,watchlist,WISK Cora,Winged VTOL,https://transportup.com/wisk-cora/,,Autonomous,,,6 lift propellers under each wing (non-tilting...,,,flight testing,"['http://kittyhawk.aero', 'http://eepurl.com/d...",Cora is an experimental airworthiness certific...,,,prototype production,undisclosed,autonomous
4,watchlist,EHang 216,Wingless VTOL,https://transportup.com/ehang-216/,<span> </span>,<span> </span>,"18' 4"" wingspan, 5' 9"" length",: All-Electric,16 lift/thrust rotors (8 dual rotors),22 miles with max payload,80 mph,certification,"['http://www.ehang.com/ehang184/', 'http://eep...",The Ehang AAV is the largest autonomous aerial...,"2 Passengers, or 485 lbs total payload",,lrp,electric,undisclosed


## PART II -- Scraping new aircraft data

In [40]:

def get_tu_acdata(directory_df, results_df):
    
    """
    
    Overview:
    
    Compares url links from up-to-date directory with url links 
    in current dataframe -- or new dataframe if starting from scratch -- and updates results_df with new data.
    
    Use case:
    
    * Starting a dataframe from scratch --> pass an empty df parameter
    
    * Updating a current dataframe --> pass current dataframe
    
    
    """

    transup_directory = directory_df
    
    update_counter = 0

    ## if the transup aircraft link is not already in the new dataframe, scrape.
    for i, link in enumerate(transup_directory['link']):

        if link not in list(results_df['link']):

            # data for dataframe append
            ac_data = {}

            try:

                # get soup object
                acsoup = get_bs4(link)

                # get category from source dataframe
                ac_data['category'] = transup_directory['category'][i]

                # get category from source dataframe
                ac_data['ac_name'] = transup_directory['ac_name'][i]

                # get link
                ac_data['link'] = link

                # returns summary string
                summary = get_summary(acsoup)
                ac_data['summary'] = summary

                # returns string of dev stage
                dev_stage = get_devstage(acsoup)
                ac_data['dev_stage'] = dev_stage

                # returns list
                references = get_references(acsoup)
                ac_data['references'] = references

                # returns dict OR updated dict
                ac_data = get_details(acsoup, ac_data)

                # print(ac_data)

                results_df = results_df.append(ac_data, ignore_index = True)

                print("Successfully appended a row to dataframe, index: ", i, "link: ", link)
                
                update_counter += 1

            except: 
                print("Error at: ", i, ", link: ", link)

            time.sleep(random.randint(5, 7))
            
            print("")
            print("Summary: ")
            
            if update_counter > 0:
                print("Added ", update_counter, "new aircraft")
            else:
                print("Nothing was updated")

#### Run loop function to add new aircraft to dataframe

In [29]:
get_tu_acdata(transup_directory, results_df)

Error at:  52 , link:  nan

Summary: 
Nothing was updated


In [41]:
results_df.head(3)

Unnamed: 0,category,ac_name,ac_type,link,ac_type_detail,Autonomy Level,Dimensions,Powerplant,Propeller Configuration,Range,Top Speed,dev_stage,references,summary,Passenger/Payload Capacity,Wingspan/Dimensions,prod_stage,powerplant_category,autonomous level
0,watchlist,Airbus CityAirbus,Wingless VTOL,https://transportup.com/airbus-cityairbus/,"Wingless eVTOL, Intracity",Autonomous,"9ft propellor diameter, 26 ft length",All-Electric,8 lift/thrust rotors (4 dual rotors),60 miles,<span> </span>,preliminary design,['http://www.airbus.com/newsroom/press-release...,The Airbus CityAirbus is the long view approac...,,,pre production,electric,autonomous
1,watchlist,Boeing PAV,Winged VTOL,https://transportup.com/aurora-flight-sciences...,Winged VTOL,Autonomous,"30 feet wingspan, 28ft length",All-electric,"8 lifting rotors, 1 pusher propeller",50 miles,Undisclosed,flight testing,['http://www.aurora.aero/pav-evtol-passenger-a...,The Boeing Passenger Air Vehicle is a venture ...,Two-seater,,prototype production,electric,autonomous
2,watchlist,Bell Nexus Air Taxi,Winged VTOL,https://transportup.com/bell-air-taxi,Winged VTOL,Semi-Autonomous,,6HX - Hybrid-Electric; 4EX - All-Electric,6HX - 6 tilt-rotors; 4EX - 4 tilt-rotors (al...,6HX - 150 miles; 4EX - 60 miles,150 mph,prototype build,"['http://www.bellflight.com', 'http://eepurl.c...","The Bell Nexus is Bell Flight's Air Taxi, of w...",,,prototype production,hybrid electric,semi autonomous


## Send to .csv

In [42]:
# builds .csv with today's date
# today = datetime.date.today()
# results_df.to_csv('tu_results_df_{}.{}.{}.csv'.format(today.month, today.day, today.year))

# PART III -- Data Cleaning

#### Read in .csv

In [43]:
# results_df = pd.read_csv('tu_results_df_7.11.2021.csv')
# results_df.drop(columns = ['Unnamed: 0'], inplace = True)

In [44]:
results_df.shape

(87, 19)

In [309]:
results_df.head(3)

Unnamed: 0,category,ac_name,ac_type,link,ac_type_detail,Autonomy Level,Dimensions,Powerplant,Propeller Configuration,Range,Top Speed,dev_stage,references,summary,Passenger/Payload Capacity,Wingspan/Dimensions,prod_stage,powerplant_category,autonomous level
0,watchlist,Airbus CityAirbus,Wingless VTOL,https://transportup.com/airbus-cityairbus/,"Wingless eVTOL, Intracity",Autonomous,"9ft propellor diameter, 26 ft length",All-Electric,8 lift/thrust rotors (4 dual rotors),60 miles,<span> </span>,preliminary design,['http://www.airbus.com/newsroom/press-release...,The Airbus CityAirbus is the long view approac...,,,pre production,electric,autonomous
1,watchlist,Boeing PAV,Winged VTOL,https://transportup.com/aurora-flight-sciences...,Winged VTOL,Autonomous,"30 feet wingspan, 28ft length",All-electric,"8 lifting rotors, 1 pusher propeller",50 miles,Undisclosed,flight testing,['http://www.aurora.aero/pav-evtol-passenger-a...,The Boeing Passenger Air Vehicle is a venture ...,Two-seater,,prototype production,electric,autonomous
2,watchlist,Bell Nexus Air Taxi,Winged VTOL,https://transportup.com/bell-air-taxi,Winged VTOL,Semi-Autonomous,,6HX - Hybrid-Electric; 4EX - All-Electric,6HX - 6 tilt-rotors; 4EX - 4 tilt-rotors (al...,6HX - 150 miles; 4EX - 60 miles,150 mph,prototype build,"['http://www.bellflight.com', 'http://eepurl.c...","The Bell Nexus is Bell Flight's Air Taxi, of w...",,,prototype production,hybrid electric,semi autonomous


### III.i - Handling null values

* There are alot of columns with null values for each row. 

* Find the columns where a significant majority are null. 

* Delete

In [45]:
# Work from a copy() initiailly
cdf = results_df.copy()

#### Establish criteria for dropping columns

* if column has > 70% null values, drop it.

In [46]:
def drop_nullcols(df, nullperc):

    """ 
    Drops columns of a dataframe based on a percentage parameter for (# null rows / total rows). 
    
    Returns list of dropped columns. 
    
    """

    null_cols = []
    for df_col in list(cdf.columns):
        if len(cdf[cdf[df_col].isna()]) / len(cdf) >= nullperc:
            null_cols.append(df_col)

    cdf.drop(columns = null_cols, inplace = True)
    
    print("These are the dropped columns...")
    
    return null_cols
            

In [47]:
drop_nullcols(cdf, .7)

These are the dropped columns...


[]

#### III.ii - Standardizing key column values

Columns of interest:

* Aircraft Type
* Autonomy Level
* Powerplant
* Range
* Top Speed

#### Aircraft Type

In [314]:

def assign_actype(x):
    
    """ 
    df.apply(lambda x: ac_typer(x)) support function.
    
    Provides logic and return value for Aircraft Type value grouping.
    
    """
    
    # ac_types = ['Winged VTOL', 'Wingless VTOL', 'STOL', 'Other']

    # type conversion
    x = str(x)

    # Winged VTOL
    if 'winged' in x.lower() and 'vtol' in x.lower():
        x = 'Winged VTOL'
    elif 'wingless' in x.lower() and 'vtol' in x.lower():
        x = 'Wingless VTOL'
    elif 'stol' in x.lower():
        x = 'STOL'
    else:
        x = None
        
    return x

#### run function, add new ac_type column

In [315]:
# cdf['ac_type'] = cdf['Aircraft Type'].apply(lambda x: assign_actype(x))

"""

Clean Aircraft Type category if necessary --

* If 'Aircraft Type'.isna() == False:

* Move the data into 'ac_type_detail' with lambda

* After the lambda move:

* If 'Aircraft Type'.isna() == len(df):

* Delete the 'Aircraft Type' column


"""

In [50]:
# cdf.rename(columns = {'Aircraft Type': 'ac_type_detail'}, inplace = True)
# cdf.head(3)

In [51]:
cdf['ac_type'].value_counts()

Winged VTOL      60
Wingless VTOL    25
STOL              2
Name: ac_type, dtype: int64

#### Send to .csv

In [318]:
# # builds .csv with today's date
# today = datetime.date.today()
# cdf.to_csv('tu_results_df_{}.{}.{}.csv'.format(today.month, today.day, today.year))

#### Read in df after manual updates

In [52]:
# cdf = pd.read_csv('tu_results_df_7.11.2021.csv')
# cdf.drop(columns = ['Unnamed: 0'], inplace = True)

### Dev stage cleanup

In [54]:
cdf['dev_stage'].isna().value_counts()

False    87
Name: dev_stage, dtype: int64

In [56]:
def handle_null_devstage(df):
    
    """
    
    Changes null values in dev_stage to 'prototype build' if they are in the 'market' category
    
    Rationale: 
    Market category is less watched, less publicized. So likely to be a project in its infancy.
    
    """
    
    cdf = df
    
    # get indices for null dev_stage and 'category' == market
    updi = np.where((cdf['dev_stage'].isna() == True) & (cdf['category'] == 'market'))
    ind_upd = list(updi[0])

    for i in ind_upd:
        cdf['dev_stage'][i] = 'prototype build'



In [57]:
handle_null_devstage(cdf)
cdf['dev_stage'].isna().value_counts()

False    87
Name: dev_stage, dtype: int64

In [58]:
cdf['dev_stage'].value_counts()

prototype build       37
flight testing        30
preliminary design    15
certification          5
Name: dev_stage, dtype: int64

#### assign production stage from dev_stage data

In [60]:
def assign_prodstage(x):
    
    """
    Takes a series object from df['dev_stage'] and returns the appropriate value for df['prod_stage'] 
    based on conversion logic set below. Update as needed.
    
    Prototype: prototype production

    LRP: low rate production

    FRP: full rate production
    
    
    """
    
    dev_stages = ['prototype build', 
                    'flight testing', 
                    'preliminary design', 
                    'certification', 
                    'commercially operating']
    
    # assignment values for dev_stage to prod_stage key
    newdvals = [1, 1, 0, 2, 3]
    dev_stage_numdict = {key: val for key, val in zip(dev_stages, newdvals)}

    prod_stages = ['prototype production', 'lrp', 'frp']
    # assign key:val for dev_stage to prod_stage conversion
    prod_stage_dict = {0: 'pre production', 1: 'prototype production', 2: 'lrp', 3: 'frp'}

    if x in dev_stage_numdict:
        return prod_stage_dict[dev_stage_numdict[x]]

In [61]:
# cdf['prod_stage'] = cdf['dev_stage'].apply(lambda x: assign_prodstage(x))

In [62]:
cdf['prod_stage'].value_counts()

prototype production    67
pre production          15
lrp                      5
Name: prod_stage, dtype: int64

#### Powerplant column

In [63]:
plant_counts = cdf['Powerplant'].value_counts()
plant_counts[ plant_counts > 1]

All-electric        8
: All-Electric      7
 All-Electric       5
 All-Electric       2
All-Electric        2
 Hybrid-Electric    2
Name: Powerplant, dtype: int64

In [65]:
def assign_pplant(x):
    
    """ 
    df.apply(lambda x: x) support function
    
    Provides logic and return value for power plant category value grouping.
    
    """
    
    plant_cats = ['electric', 'hybrid electric', 'undisclosed']

    # type conversion
    x = str(x)

    # Winged VTOL
    if 'hybrid' in x.lower():
        # hybrid electric index 1
        x = plant_cats[1]
    elif 'hybrid' not in x.lower() and 'electric' in x.lower():
        # all electric index 0
        x = plant_cats[0]
    else:
        # undisclosed
        x = plant_cats[2]
        
    return x

In [66]:
# cdf['powerplant_category'] = cdf['Powerplant'].apply(lambda x: assign_pplant(x))
cdf['powerplant_category'].value_counts()

undisclosed        35
electric           33
hybrid electric    19
Name: powerplant_category, dtype: int64

#### autonomy col

In [67]:
def assign_autonlevel(x):
    
    """ 
    df.apply(lambda x: x) support function
    
    Provides logic and return value for autonomy level category value grouping.
    
    """
    
    auto_cats = ['semi autonomous', 'autonomous', 'piloted semi autonomous', 'piloted', 'undisclosed']

    # type conversion
    x = str(x)

    if 'semi' in x.lower() and 'pilot' not in x.lower():
        x = auto_cats[0]

    elif 'autonomous' in x.lower() and 'semi' not in x.lower():    
        x = auto_cats[1]
        
    elif 'pilot' in x.lower() and 'semi' in x.lower():    
        x = auto_cats[2]

    elif x.lower() == 'piloted':
        x = auto_cats[3]
        
    else:
        # undisclosed
        x = auto_cats[4]
        
    return x

In [68]:
# cdf['autonomous level'] = cdf['Autonomy Level'].apply(lambda x: assign_autonlevel(x))
cdf['autonomous level'].value_counts()

undisclosed                42
autonomous                 18
semi autonomous            16
piloted                     6
piloted semi autonomous     5
Name: autonomous level, dtype: int64

#### Update .csv

In [69]:
# builds .csv with today's date
today = datetime.date.today()
cdf.to_csv('tu_results_df_{}.{}.{}.csv'.format(today.month, today.day, today.year))