In [2]:
## Getting the data and loading packages
library(tidyverse)
library(stringr)
library(ggmap)
library(forcats)
library(rvest)

Loading required package: xml2
“package ‘xml2’ was built under R version 3.3.2”
Attaching package: ‘rvest’

The following object is masked from ‘package:purrr’:

    pluck

The following object is masked from ‘package:readr’:

    guess_encoding



In [9]:
## Scraping data from Wikipedia
html.world_ports <- read_html("https://en.wikipedia.org/wiki/List_of_busiest_container_ports")
df.world_ports <- html_table(html_nodes(html.world_ports, "table")[[1]], fill=TRUE)

In [10]:
## inspecting
glimpse(df.world_ports)

Observations: 50
Variables: 18
$ `#`          <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...
$ Port         <chr> "Shanghai", "Singapore", "Shenzhen", "Ningbo-Zhoushan"...
$ Jurisdiction <chr> "China", "Singapore", "China", "China", "South Korea",...
$ Region       <chr> "East Asia", "Southeast Asia", "East Asia", "East Asia...
$ Location     <chr> "Yangtze Delta", "Malacca Strait", "Pearl River Delta"...
$ `2016`       <chr> "37,133[12]", "30,904[13]", "23,979[14]", "21,560[15]"...
$ `2015[1]`    <chr> "36,537[12]", "30,922[13]", "24,204[14]", "20,620[15]"...
$ `2014[1]`    <chr> "35,268", "33,869", "23,798", "19,450", "18,423", "22,...
$ `2013[2]`    <chr> "33,617", "32,240", "23,280", "17,351", "17,690", "22,...
$ `2012[3]`    <chr> "32,529", "31,649", "22,940", "16,670", "17,046", "23,...
$ `2011[4]`    <chr> "31,700", "29,937", "22,570", "14,686", "16,185", "24,...
$ `2010[5]`    <chr> "29,069", "28,431", "22,510", "13,144", "14,157", "23,...
$ `2009[6]`    <chr> 

In [12]:
## Renaming the variables
 # transform names to lowercase
colnames(df.world_ports) <- colnames(df.world_ports) %>% tolower()

# inspect
colnames(df.world_ports)

In [18]:
# Get geospatial information (longitude/latitude)
geocode.world_ports <- geocode(df.world_ports$port)

Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Shanghai&sensor=false
Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Singapore&sensor=false
“geocode failed with status OVER_QUERY_LIMIT, location = "Singapore"”Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Shenzhen&sensor=false
“geocode failed with status OVER_QUERY_LIMIT, location = "Shenzhen"”Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Ningbo-Zhoushan&sensor=false
Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Busan&sensor=false
Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Hong%20Kong&sensor=false
“geocode failed with status OVER_QUERY_LIMIT, location = "Hong Kong"”Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Guangzhou&sensor=false
“geocode failed with status OVER_QUERY_LIMIT, location = "Guangzhou"”In

In [21]:
head(geocode.world_ports)

lon,lat
121.4737,31.23039
,
,
121.9878,29.90195
129.0756,35.17955
,


In [22]:
#--------------------------------------------------------
# COMBINE:
# - bind the new lat/long data to df.world_ports data frame
#--------------------------------------------------------
df.world_ports <- cbind(df.world_ports, geocode.world_ports)
head(df.world_ports)

#,port,jurisdiction,region,location,2016,2015[1],2014[1],2013[2],2012[3],2011[4],2010[5],2009[6],2008[7],2007[8],2006[9],2005[10],2004[11],lon,lat
1,Shanghai,China,East Asia,Yangtze Delta,"37,133[12]","36,537[12]",35268,33617,32529,31700,29069,25002,27980,26150,21710,18084,14557,121.4737,31.23039
2,Singapore,Singapore,Southeast Asia,Malacca Strait,"30,904[13]","30,922[13]",33869,32240,31649,29937,28431,25866,29918,27932,24792,23192,21329,,
3,Shenzhen,China,East Asia,Pearl River Delta,"23,979[14]","24,204[14]",23798,23280,22940,22570,22510,18250,21414,21099,18469,16197,13615,,
4,Ningbo-Zhoushan,China,East Asia,Yangtze Delta,"21,560[15]","20,620[15]",19450,17351,16670,14686,13144,10502,11226,9349,7068,5208,4006,121.9878,29.90195
5,Busan,South Korea,East Asia,Korean Strait,"19,850[16]","19,469[16]",18423,17690,17046,16185,14157,11954,13425,13270,12039,11843,11430,129.0756,35.17955
6,Hong Kong,Hong Kong SAR,East Asia,Pearl River Delta,"19,813[17]","20,073[17]",22374,22352,23117,24384,23532,20983,24248,23881,23539,22427,21984,,


In [23]:
## Manually code missing latitude/longitude data
#=========================================================================================
# RECODE lon and lat
# - There are 4 lon/lat values that weren't found with geocode()
# - We'll just hand code them
# - The values can be obtained at http://www.latlong.net/convert-address-to-lat-long.html
#
#
# Tanjung Pelepas, Johor Bahru: lon = 103.551035, lat = 1.362374
# Yingkou:..................... lon = 122.108231, lat = 40.266062
# Valencia, Spain:............. lon = -0.3762881, lat = 39.46991
# Malta Freeport:.............. lon = 14.537637 , lat = 35.816287
#
#=========================================================================================

df.world_ports <- df.world_ports %>%
                        mutate( lon = case_when(.$port == "Tanjung Pelepas" ~ 103.551035
                                                ,.$port == "Yingkou"        ~ 122.108231
                                                ,.$port == "Valencia"       ~ -0.3762881
                                                ,.$port == "Malta Freeport" ~ 14.537637
                                                ,.$port == "Port Said" ~ 32.301866
                                                ,.$port == "Nanjing" ~ 118.796877
                                                ,TRUE ~ .$lon
                                                )
                                ,lat = case_when(.$port == "Tanjung Pelepas" ~ 1.362374
                                                ,.$port == "Yingkou"        ~ 40.266062
                                                ,.$port == "Valencia"       ~ 39.46991
                                                ,.$port == "Malta Freeport" ~ 35.816287
                                                ,.$port == "Port Said" ~ 31.265289
                                                ,.$port == "Nanjing" ~ 32.060255
                                                ,TRUE ~ .$lat
                                                )
                                )

# CHECK
df.world_ports %>% filter(port == "Tanjung Pelepas") %>% select(lat,lon)
df.world_ports %>% filter(port == "Yingkou") %>% select(lat,lon)
df.world_ports %>% filter(port == "Valencia") %>% select(lat,lon)
df.world_ports %>% filter(port == "Malta Freeport") %>% select(lat,lon)


“package ‘bindrcpp’ was built under R version 3.3.2”

lat,lon
1.362374,103.551


lat,lon
40.26606,122.1082


lat,lon
39.46991,-0.3762881


lat,lon


In [24]:
df.world_ports

#,port,jurisdiction,region,location,2016,2015[1],2014[1],2013[2],2012[3],2011[4],2010[5],2009[6],2008[7],2007[8],2006[9],2005[10],2004[11],lon,lat
1,Shanghai,China,East Asia,Yangtze Delta,"37,133[12]","36,537[12]",35268,33617.0,32529.0,31700.0,29069.0,25002,27980.0,26150.0,21710.0,18084.0,14557.0,121.4737021,31.23039
2,Singapore,Singapore,Southeast Asia,Malacca Strait,"30,904[13]","30,922[13]",33869,32240.0,31649.0,29937.0,28431.0,25866,29918.0,27932.0,24792.0,23192.0,21329.0,,
3,Shenzhen,China,East Asia,Pearl River Delta,"23,979[14]","24,204[14]",23798,23280.0,22940.0,22570.0,22510.0,18250,21414.0,21099.0,18469.0,16197.0,13615.0,,
4,Ningbo-Zhoushan,China,East Asia,Yangtze Delta,"21,560[15]","20,620[15]",19450,17351.0,16670.0,14686.0,13144.0,10502,11226.0,9349.0,7068.0,5208.0,4006.0,121.9878356,29.901952
5,Busan,South Korea,East Asia,Korean Strait,"19,850[16]","19,469[16]",18423,17690.0,17046.0,16185.0,14157.0,11954,13425.0,13270.0,12039.0,11843.0,11430.0,129.0756416,35.179554
6,Hong Kong,Hong Kong SAR,East Asia,Pearl River Delta,"19,813[17]","20,073[17]",22374,22352.0,23117.0,24384.0,23532.0,20983,24248.0,23881.0,23539.0,22427.0,21984.0,,
7,Guangzhou,China,East Asia,Pearl River Delta,"18,858[18]",17625,16160,15309.0,14744.0,14400.0,12550.0,11190,11001.0,9200.0,6600.0,4685.0,3308.0,,
8,Qingdao,China,East Asia,Yellow Sea,"18,010[19]",17510,16624,15520.0,14503.0,13020.0,12012.0,10260,10320.0,9462.0,7702.0,6307.0,5140.0,120.382609,36.067108
9,Dubai,United Arab Emirates,Western Asia,Arab Peninsula,"14,772[20]",15592,14750,13641.0,13270.0,13000.0,11600.0,11124,11827.0,10653.0,8923.0,7619.0,6429.0,,
10,Tianjin,China,East Asia,Yellow Sea,"14,490[21]",14090,14050,13010.0,12300.0,11500.0,10080.0,8700,8500.0,7103.0,5950.0,4801.0,3814.0,117.3616476,39.343357
