In [1]:
%load_ext kedro.ipython

In [3]:
# helper node functions
import pandas as pd

def _is_true(x: pd.Series) -> pd.Series:
    return x == "t"


def _parse_percentage(x: pd.Series) -> pd.Series:
    x = x.str.replace("%", "")
    x = x.astype(float) / 100
    return x


def _parse_money(x: pd.Series) -> pd.Series:
    x = x.str.replace("$", "").str.replace(",", "")
    x = x.astype(float)
    return x


In [None]:
catalog

In [6]:
reviews = catalog.load("reviews")
reviews.head()

Unnamed: 0,shuttle_id,review_scores_rating,review_scores_comfort,review_scores_amenities,review_scores_trip,review_scores_crew,review_scores_location,review_scores_price,number_of_reviews,reviews_per_month
0,45163,91.0,10.0,9.0,9.0,9.0,9.0,9.0,26,0.77
1,49438,96.0,10.0,10.0,10.0,10.0,10.0,9.0,61,0.62
2,10750,97.0,10.0,10.0,10.0,10.0,10.0,10.0,467,4.66
3,4146,95.0,10.0,10.0,10.0,10.0,9.0,9.0,318,3.22
4,5067,97.0,10.0,9.0,10.0,10.0,9.0,10.0,22,0.29


In [10]:
companies = catalog.load("companies")
companies["iata_approved"] = _is_true(companies["iata_approved"])
companies["company_rating"] = _parse_percentage(companies["company_rating"])
companies.head()

Unnamed: 0,id,company_rating,company_location,total_fleet_count,iata_approved
0,3888,1.0,Isle of Man,1.0,False
1,46728,1.0,,1.0,False
2,34618,0.38,Isle of Man,1.0,False
3,28619,1.0,Bosnia and Herzegovina,1.0,False
4,8240,,Chile,1.0,True


In [8]:
shuttles = catalog.load("shuttles")
shuttles["d_check_complete"] = _is_true(shuttles["d_check_complete"])
shuttles["moon_clearance_complete"] = _is_true(shuttles["moon_clearance_complete"])
shuttles["price"] = _parse_money(shuttles["price"])
shuttles.head()

Unnamed: 0,id,shuttle_location,shuttle_type,engine_type,engine_vendor,engines,passenger_capacity,cancellation_policy,crew,d_check_complete,moon_clearance_complete,price,company_id
0,45163,Sao Tome and Principe,Type V5,Plasma,ThetaBase Services,2.0,4,moderate,2.0,False,False,1715.0,32413
1,49438,Wallis and Futuna,Type V2,Plasma,ThetaBase Services,3.0,5,moderate,3.0,False,False,3405.0,14122
2,10750,Niue,Type F5,Quantum,ThetaBase Services,1.0,2,strict,1.0,True,False,1806.0,47761
3,4146,Malta,Type V2,Quantum,ThetaBase Services,1.0,2,moderate,1.0,False,False,1676.0,26648
4,5067,Malta,Type V2,Plasma,ThetaBase Services,5.0,10,strict,5.0,False,False,4718.0,26648


In [12]:
# Merge suttle and reviews
rated_shuttles = shuttles.merge(reviews, left_on="id", right_on="shuttle_id")
rated_shuttles.head()

Unnamed: 0,id,shuttle_location,shuttle_type,engine_type,engine_vendor,engines,passenger_capacity,cancellation_policy,crew,d_check_complete,...,shuttle_id,review_scores_rating,review_scores_comfort,review_scores_amenities,review_scores_trip,review_scores_crew,review_scores_location,review_scores_price,number_of_reviews,reviews_per_month
0,45163,Sao Tome and Principe,Type V5,Plasma,ThetaBase Services,2.0,4,moderate,2.0,False,...,45163,91.0,10.0,9.0,9.0,9.0,9.0,9.0,26,0.77
1,49438,Wallis and Futuna,Type V2,Plasma,ThetaBase Services,3.0,5,moderate,3.0,False,...,49438,96.0,10.0,10.0,10.0,10.0,10.0,9.0,61,0.62
2,10750,Niue,Type F5,Quantum,ThetaBase Services,1.0,2,strict,1.0,True,...,10750,97.0,10.0,10.0,10.0,10.0,10.0,10.0,467,4.66
3,4146,Malta,Type V2,Quantum,ThetaBase Services,1.0,2,moderate,1.0,False,...,4146,95.0,10.0,10.0,10.0,10.0,9.0,9.0,318,3.22
4,5067,Malta,Type V2,Plasma,ThetaBase Services,5.0,10,strict,5.0,False,...,5067,97.0,10.0,9.0,10.0,10.0,9.0,10.0,22,0.29


In [14]:
rated_shuttles.dtypes


id                           int64
shuttle_location            object
shuttle_type                object
engine_type                 object
engine_vendor               object
engines                    float64
passenger_capacity           int64
cancellation_policy         object
crew                       float64
d_check_complete              bool
moon_clearance_complete       bool
price                      float64
company_id                   int64
shuttle_id                   int64
review_scores_rating       float64
review_scores_comfort      float64
review_scores_amenities    float64
review_scores_trip         float64
review_scores_crew         float64
review_scores_location     float64
review_scores_price        float64
number_of_reviews            int64
reviews_per_month          float64
dtype: object

In [17]:
# merge rated shuttles with company info 
model_input_table = rated_shuttles.merge(companies, left_on="company_id", right_on="id")
model_input_table.head()

Unnamed: 0,id_x,shuttle_location,shuttle_type,engine_type,engine_vendor,engines,passenger_capacity,cancellation_policy,crew,d_check_complete,...,review_scores_crew,review_scores_location,review_scores_price,number_of_reviews,reviews_per_month,id_y,company_rating,company_location,total_fleet_count,iata_approved
0,45163,Sao Tome and Principe,Type V5,Plasma,ThetaBase Services,2.0,4,moderate,2.0,False,...,9.0,9.0,9.0,26,0.77,32413,1.0,Faroe Islands,1.0,False
1,49438,Wallis and Futuna,Type V2,Plasma,ThetaBase Services,3.0,5,moderate,3.0,False,...,10.0,10.0,9.0,61,0.62,14122,1.0,Malta,1.0,True
2,10750,Niue,Type F5,Quantum,ThetaBase Services,1.0,2,strict,1.0,True,...,10.0,10.0,10.0,467,4.66,47761,1.0,Niue,2.0,False
3,4146,Malta,Type V2,Quantum,ThetaBase Services,1.0,2,moderate,1.0,False,...,10.0,9.0,9.0,318,3.22,26648,1.0,Niue,2.0,True
4,5067,Malta,Type V2,Plasma,ThetaBase Services,5.0,10,strict,5.0,False,...,10.0,9.0,10.0,22,0.29,26648,1.0,Niue,2.0,True


In [18]:
# drop null values
model_input_table.dropna


[1m<[0m[1;95mbound[0m[39m method DataFrame.dropna of         id_x       shuttle_location shuttle_type engine_type  \[0m
[1;36m0[0m[39m      [0m[1;36m45163[0m[39m  Sao Tome and Principe      Type V5      Plasma   [0m
[1;36m1[0m[39m      [0m[1;36m49438[0m[39m      Wallis and Futuna      Type V2      Plasma   [0m
[1;36m2[0m[39m      [0m[1;36m10750[0m[39m                   Niue      Type F5     Quantum   [0m
[1;36m3[0m[39m       [0m[1;36m4146[0m[39m                  Malta      Type V2     Quantum   [0m
[1;36m4[0m[39m       [0m[1;36m5067[0m[39m                  Malta      Type V2      Plasma   [0m
[33m...[0m[39m      [0m[33m...[0m[39m                    [0m[33m...[0m[39m          [0m[33m...[0m[39m         [0m[33m...[0m[39m   [0m
[1;36m15526[0m[39m  [0m[1;36m63513[0m[39m             Micronesia      Type V5      Plasma   [0m
[1;36m15527[0m[39m  [0m[1;36m44668[0m[39m                 Rwanda      Type F5     Quantu