In [34]:
# import needed packages
import pandas as pd
from IPython.display import Image # for embedded images in notebook

In [2]:
# import dataset
airbnbUsers = pd.read_csv(r'data/airbnb_users.csv')

In [4]:
# view dataset
airbnbUsers.head(10)

Unnamed: 0,id,date_account_created,timestamp_first_active,date_first_booking,gender,age,signup_method,signup_flow,language,affiliate_channel,affiliate_provider,first_affiliate_tracked,signup_app,first_device_type,first_browser,country_destination
0,gxn3p5htnn,6/28/2010,20090300000000.0,,-unknown-,,facebook,0,en,direct,direct,untracked,Web,Mac Desktop,Chrome,NDF
1,820tgsjxq7,5/25/2011,20090500000000.0,,MALE,38.0,facebook,0,en,seo,google,untracked,Web,Mac Desktop,Chrome,NDF
2,4ft3gnwmtx,9/28/2010,20090600000000.0,8/2/2010,FEMALE,56.0,basic,3,en,direct,direct,untracked,Web,Windows Desktop,IE,US
3,bjjt8pjhuk,12/5/2011,20091000000000.0,9/8/2012,FEMALE,42.0,facebook,0,en,direct,direct,untracked,Web,Mac Desktop,Firefox,other
4,87mebub9p4,9/14/2010,20091200000000.0,2/18/2010,-unknown-,41.0,basic,0,en,direct,direct,untracked,Web,Mac Desktop,Chrome,US
5,osr2jwljor,1/1/2010,20100100000000.0,1/2/2010,-unknown-,,basic,0,en,other,other,omg,Web,Mac Desktop,Chrome,US
6,lsw9q7uk0j,1/2/2010,20100100000000.0,1/5/2010,FEMALE,46.0,basic,0,en,other,craigslist,untracked,Web,Mac Desktop,Safari,US
7,0d01nltbrs,1/3/2010,20100100000000.0,1/13/2010,FEMALE,47.0,basic,0,en,direct,direct,omg,Web,Mac Desktop,Safari,US
8,a1vcnhxeij,1/4/2010,20100100000000.0,7/29/2010,FEMALE,50.0,basic,0,en,other,craigslist,untracked,Web,Mac Desktop,Safari,US
9,6uh8zyj2gn,1/4/2010,20100100000000.0,1/4/2010,-unknown-,46.0,basic,0,en,other,craigslist,omg,Web,Mac Desktop,Firefox,US


---
## Request 1:
What is the average `age` of those who use each web browser type?

### Issues with dataset for this request
- the `age` variable has some suspect entries
- ranges from 1 to 150
- some entries in `age` are a year (eg. 2014)
- some ages are younger than `date_account_created` date (error, impossible) (see Figure 1)
- AirBnB's Terms of Service require a user to be 18 years of age ([source](https://www.airbnb.com/help/article/2876/age-requirements#:~:text=We%20all%20love%20to%20wander,account%20to%20travel%20or%20host.))
- the oldest verified human age is 122 years old ([source](https://en.wikipedia.org/wiki/List_of_the_verified_oldest_people))

![screenshot-examples-years-entered-as-age.png](screenshots/screenshot-examples-years-entered-as-age.png)

![screenshot-age-younger-than-account-created.png](screenshots/screenshot-age-younger-than-account-created.png)

![screenshot-years-entered-as-age.png](screenshots/screenshot-years-entered-as-age.png)


In [33]:
# filtering for ages over 122 years (oldest verified human age)
airbnbUsers_filtered = airbnbUsers[airbnbUsers['age'] <= 122]

In [14]:
# filtering for ages over 122 years (oldest verified human age)
airbnbUsers_filtered = airbnbUsers_filtered[airbnbUsers_filtered['age'] >= 18]

In [15]:
# print new data frame
airbnbUsers_filtered.head(10)

Unnamed: 0,id,date_account_created,timestamp_first_active,date_first_booking,gender,age,signup_method,signup_flow,language,affiliate_channel,affiliate_provider,first_affiliate_tracked,signup_app,first_device_type,first_browser,country_destination
1,820tgsjxq7,5/25/2011,20090500000000.0,,MALE,38.0,facebook,0,en,seo,google,untracked,Web,Mac Desktop,Chrome,NDF
2,4ft3gnwmtx,9/28/2010,20090600000000.0,8/2/2010,FEMALE,56.0,basic,3,en,direct,direct,untracked,Web,Windows Desktop,IE,US
3,bjjt8pjhuk,12/5/2011,20091000000000.0,9/8/2012,FEMALE,42.0,facebook,0,en,direct,direct,untracked,Web,Mac Desktop,Firefox,other
4,87mebub9p4,9/14/2010,20091200000000.0,2/18/2010,-unknown-,41.0,basic,0,en,direct,direct,untracked,Web,Mac Desktop,Chrome,US
6,lsw9q7uk0j,1/2/2010,20100100000000.0,1/5/2010,FEMALE,46.0,basic,0,en,other,craigslist,untracked,Web,Mac Desktop,Safari,US
7,0d01nltbrs,1/3/2010,20100100000000.0,1/13/2010,FEMALE,47.0,basic,0,en,direct,direct,omg,Web,Mac Desktop,Safari,US
8,a1vcnhxeij,1/4/2010,20100100000000.0,7/29/2010,FEMALE,50.0,basic,0,en,other,craigslist,untracked,Web,Mac Desktop,Safari,US
9,6uh8zyj2gn,1/4/2010,20100100000000.0,1/4/2010,-unknown-,46.0,basic,0,en,other,craigslist,omg,Web,Mac Desktop,Firefox,US
10,yuuqmid2rp,1/4/2010,20100100000000.0,1/6/2010,FEMALE,36.0,basic,0,en,other,craigslist,untracked,Web,Mac Desktop,Firefox,US
11,om1ss59ys8,1/5/2010,20100100000000.0,,FEMALE,47.0,basic,0,en,other,craigslist,untracked,Web,iPhone,-unknown-,NDF


In [17]:
# what is the average `age` of those who use each web browser type?
# added `astype(int)` to return mean age as truncated to integer
airbnbUsers_filtered.groupby('first_browser')['age'].mean().astype(int)

first_browser
-unknown-             34
AOL Explorer          59
Android Browser       36
Apple Mail            39
Avant Browser         48
BlackBerry Browser    36
Camino                43
Chrome                35
Chrome Mobile         34
Chromium              35
CometBird             48
Comodo Dragon         42
CoolNovo              25
Firefox               39
IE                    42
IE Mobile             36
IceWeasel             39
Iron                  33
Kindle Browser        35
Maxthon               34
Mobile Firefox        46
Mobile Safari         38
Mozilla               31
NetNewsWire           44
Opera                 37
Opera Mini            33
Opera Mobile          30
PS Vita browser       32
Pale Moon             39
RockMelt              35
Safari                38
SeaMonkey             27
Silk                  41
SiteKiosk             45
SlimBrowser           33
Sogou Explorer        26
Stainless             52
TenFourFox            39
TheWorld Browser      28
Yandex.Brow

---
## Request 2:
What is the total `signup_flow` for each device?

In [18]:
# what is the total `signup_flow` for each device?
# NOTE: unfiltered dataset
airbnbUsers.groupby('first_device_type')['signup_flow'].sum()

first_device_type
Android Phone          39795
Android Tablet          4259
Desktop (Other)         1345
Mac Desktop            77131
Other/Unknown         115019
SmartPhone (Other)       420
Windows Desktop        52400
iPad                   31810
iPhone                375248
Name: signup_flow, dtype: int64

In [19]:
# what is the total `signup_flow` for each device?
# NOTE: FILTERED dataset
airbnbUsers_filtered.groupby('first_device_type')['signup_flow'].sum()

first_device_type
Android Phone          15712
Android Tablet          1856
Desktop (Other)          670
Mac Desktop            49517
Other/Unknown          30566
SmartPhone (Other)       180
Windows Desktop        30755
iPad                   16323
iPhone                167869
Name: signup_flow, dtype: int64

---
## Request 3:
Include the `country` information from `airbnb_sample_submission.csv` in the `airbnb_test_users.csv` file

In [20]:
# import dataset
airbnbTestUsers = pd.read_csv(r'data/airbnb_test_users.csv')

In [21]:
# import dataset
airbnbSampleSubmissions = pd.read_csv(r'data/airbnb_sample_submission.csv')

In [30]:
# view datasets to see overlap
airbnbTestUsers.head()

Unnamed: 0,id,date_account_created,timestamp_first_active,date_first_booking,gender,age,signup_method,signup_flow,language,affiliate_channel,affiliate_provider,first_affiliate_tracked,signup_app,first_device_type,first_browser
0,5uwns89zht,7/1/2014,20140700000000.0,,FEMALE,35.0,facebook,0,en,direct,direct,untracked,Moweb,iPhone,Mobile Safari
1,jtl0dijy2j,7/1/2014,20140700000000.0,,-unknown-,,basic,0,en,direct,direct,untracked,Moweb,iPhone,Mobile Safari
2,xx0ulgorjt,7/1/2014,20140700000000.0,,-unknown-,,basic,0,en,direct,direct,linked,Web,Windows Desktop,Chrome
3,6c6puo6ix0,7/1/2014,20140700000000.0,,-unknown-,,basic,0,en,direct,direct,linked,Web,Windows Desktop,IE
4,czqhjk3yfe,7/1/2014,20140700000000.0,,-unknown-,,basic,0,en,direct,direct,untracked,Web,Mac Desktop,Safari


In [31]:
# view datasets to see overlap
airbnbSampleSubmissions.head()

Unnamed: 0,id,country
0,5uwns89zht,NDF
1,jtl0dijy2j,NDF
2,xx0ulgorjt,NDF
3,6c6puo6ix0,NDF
4,czqhjk3yfe,NDF


In [27]:
# check the column names
airbnbTestUsers.columns

Index(['id', 'date_account_created', 'timestamp_first_active',
       'date_first_booking', 'gender', 'age', 'signup_method', 'signup_flow',
       'language', 'affiliate_channel', 'affiliate_provider',
       'first_affiliate_tracked', 'signup_app', 'first_device_type',
       'first_browser'],
      dtype='object')

In [28]:
# check the column names
airbnbSampleSubmissions.columns

Index(['id', 'country'], dtype='object')

In [24]:
# user Outer Join to include all data in original dataset
airbnbTestUsers2 = pd.merge(airbnbTestUsers, airbnbSampleSubmissions, how='outer')

In [25]:
# view result
airbnbTestUsers2

Unnamed: 0,id,date_account_created,timestamp_first_active,date_first_booking,gender,age,signup_method,signup_flow,language,affiliate_channel,affiliate_provider,first_affiliate_tracked,signup_app,first_device_type,first_browser,country
0,5uwns89zht,7/1/2014,2.014070e+13,,FEMALE,35.0,facebook,0,en,direct,direct,untracked,Moweb,iPhone,Mobile Safari,NDF
1,jtl0dijy2j,7/1/2014,2.014070e+13,,-unknown-,,basic,0,en,direct,direct,untracked,Moweb,iPhone,Mobile Safari,NDF
2,xx0ulgorjt,7/1/2014,2.014070e+13,,-unknown-,,basic,0,en,direct,direct,linked,Web,Windows Desktop,Chrome,NDF
3,6c6puo6ix0,7/1/2014,2.014070e+13,,-unknown-,,basic,0,en,direct,direct,linked,Web,Windows Desktop,IE,NDF
4,czqhjk3yfe,7/1/2014,2.014070e+13,,-unknown-,,basic,0,en,direct,direct,untracked,Web,Mac Desktop,Safari,NDF
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62091,cv0na2lf5a,9/30/2014,2.014090e+13,,-unknown-,31.0,basic,0,en,direct,direct,untracked,Web,Windows Desktop,IE,NDF
62092,zp8xfonng8,9/30/2014,2.014090e+13,,-unknown-,,basic,23,ko,direct,direct,untracked,Android,Android Phone,-unknown-,NDF
62093,fa6260ziny,9/30/2014,2.014090e+13,,-unknown-,,basic,0,de,direct,direct,linked,Web,Windows Desktop,Firefox,NDF
62094,87k0fy4ugm,9/30/2014,2.014090e+13,,-unknown-,,basic,0,en,sem-brand,google,omg,Web,Mac Desktop,Safari,NDF
