# Tree

In [1]:
using Requests, Cascadia,Gumbo, AbstractTrees,ProgressMeter

In [2]:
using AbstractTrees
import AbstractTrees: children, printnode
import Base: start, done, next

In [3]:
immutable Area 
    name::String
    code::String
    level::Int
    subarea::Vector
end
    
    

In [4]:
immutable Location
    name::String
    lat::Float64
    lon::Float64
end
    

In [5]:
start(area::Area) = start(area.subarea)

function next(area::Area,i)
    (v,i) = next(area.subarea,i)
end

done(area::Area,i) = done(area.subarea,i)

children(area::Area) = area.subarea

printnode(io::IO, area::Area) = Base.print_with_color(:blue, io, area.name)
printnode(io::IO, location::Location) = Base.print_with_color(:green, io, location.name)
# printnode(io::IO, f::File) = print(io, basename(f.path))

printnode (generic function with 5 methods)

## Helper Functions

In [6]:
url = "https://mobile.nwstbus.com.hk/nwp3/getlocation.php"

"https://mobile.nwstbus.com.hk/nwp3/getlocation.php"

In [7]:
function extract_code(str)
    
    group = match(r"'([^']+)'",str)[1]
    key = split(group,"|")[1]
    code = split(group,"|")[2]
    return key => code
end

extract_code (generic function with 1 method)

In [8]:
function extract_lis(html_node)
    lis = matchall(sel"li",html_node);
    l = []
    for li in lis
        if haskey(li.attributes,"onclick")
            push!(l,li.attributes["onclick"])
        end
    end
    return l
end

extract_lis (generic function with 1 method)

# ETA API

The first set is to get all of the sublocations for the main areas, because of the hirechial nature of them it makes sense to use a tree to store this information.

In [9]:
function add_areas!(top_area,level)
    
    data = Dict(
        "l"=> level,
        "t"=>"s",
        "lang"=>"1", 
    )
    
    for area in Leaves(top_area)
        data["k"] = area.code
        res = post(url,data=data)
        html = res |> String |> parsehtml
        try
            for (name,code) in extract_code.(extract_lis(html.root))
                push!(area.subarea, Area(name,code,level,[]))
            end
        end
    end
    return top_area
end

add_areas! (generic function with 1 method)

In [10]:
ALL = Area("All","ALL",1,[])

areas = [
    "Kowllon" => "KL",
    "Hong Kong" => "HK",
    "New Territories" => "NT",
]

for (name,area_code) in areas
    push!(ALL.subarea, Area(name,area_code,2,[]))
end
    

ALL = add_areas!(ALL,2); #district
ALL = add_areas!(ALL,3); # sub-distric

In [12]:
function extract_location(str)
    group = match(r"'([^']+)'",str)[1]
    name = match(r"[^|]+",group).match
    lat = split(group,"|")[end-2]
    lon = split(group,"|")[end-1]
    lat = parse(lat)
    lon = parse(lon)
    return Location(name,lat,lon)
end

extract_location (generic function with 1 method)

In [13]:
function add_locations!(area)
    
    
    data = Dict(
        "l"=> 4,
        "t"=>"s",
        "lang"=>"1", 
    )
    
    if typeof(area) == Area
        try
            data["k"] = area.code
            res = post(url,data=data)
            html = res |> String |> parsehtml
            for li in extract_lis(html.root)
                push!(area.subarea ,extract_location(li))
            end
        end
    end
end
    
    

add_locations! (generic function with 1 method)

In [14]:
for area in Leaves(ALL)
    add_locations!(area)
end

In [25]:
print_tree(ALL)

[34mAll[39m
├─ [34mKowllon[39m
│  ├─ [34mKowloon City District[39m
│  │  ├─ [34mHo Man Tin (Oi Man / Ho Man Tin Estate)[39m
│  │  │  ├─ [32mArea:Ho Man Tin (Oi Man / Ho Man Tin Estate)[39m
│  │  │  ├─ [32mCarmel Secondary School[39m
│  │  │  ├─ [32mCascades[39m
│  │  │  ├─ [32mChun Man Court (near Carmel Village Street)[39m
│  │  │  ├─ [32mChun Man Court (near Chung Hau Street)[39m
│  │  │  ├─ [32mCivil Engineering and Development Building[39m
│  │  │  ├─ [32mDragon View[39m
│  │  │  ├─ [32mFat Kwong Street Sports Centre[39m
│  │  │  ├─ [32mHo Man Tin (South) Estate[39m
│  │  │  ├─ [32mHo Man Tin Bus Terminus[39m
│  │  │  ├─ [32mHo Man Tin Estate[39m
│  │  │  ├─ [32mHo Man Tin Park / Sports Centre[39m
│  │  │  ├─ [32mHo Man Tin Police Quarters[39m
│  │  │  ├─ [32mHo Man Tin Station Exit B2[39m
│  │  │  ├─ [32mHomantin Government Offices[39m
│  │  │  ├─ [32mHomantin Plaza[39m
│  │  │  ├─ [32mHong Kong Football Association Building[39m
│  │  │  ├

## ETA API

In [16]:
using StatsBase

In [18]:
bus_stops = collect(Leaves(ALL));

In [19]:
a = sample(bus_stops)
b = sample(bus_stops)

Location("Light Rail Tai Hing (South) Stop", 22.4026727446, 113.9719924604)

In [20]:
function get_route_details(start::Location,finish::Location)
    data = Dict(
        "slat"=> start.lat,
        "slon"=> start.lon,
        "elat"=>finish.lat,
        "elon"=>finish.lon,
        "loc" =>"$(start.name), $(finish.name)",
        "leg"=>2,
        "ws"=>1.3,
        "t"=>"2018-05-26 23:46",
        "l"=>1,
        "m1"=>"T"
        
    )
    data
end

get_route_details (generic function with 1 method)

In [21]:
d = get_route_details(a,b)

Dict{String,Any} with 10 entries:
  "elon" => 113.972
  "leg"  => 2
  "slon" => 114.256
  "t"    => "2018-05-26 23:46"
  "slat" => 22.3247
  "loc"  => "Po Lam Estate (Po Kan House / Po Kim House), Light Rail Tai Hing (…
  "l"    => 1
  "m1"   => "T"
  "elat" => 22.4027
  "ws"   => 1.3

In [22]:
route_url = "https://mobile.nwstbus.com.hk/nwp3/ppsearch_p3.php?"

"https://mobile.nwstbus.com.hk/nwp3/ppsearch_p3.php?"

In [23]:
res = post(route_url,data=d)

Response(200 OK, 13 headers, 6342 bytes in body)

In [24]:
# res |> String |> parsehtml

HTML Document:
<!DOCTYPE >
<HTML>
  <head></head>
  <body>
    ﻿
    <div id="p2p_routelist"style="overflow-x: hidden; overflow-y: hidden; background-color:#FFFFFF;">
      <div overflow-x:=""overflow-y:=""hidden;=""id="p2p_routelist_title">
        <table border="0"cellspacing="0"width="100%"cellpadding="0">
          <tbody>
            <tr height="5">
              <td bgcolor="#6c3f98">
                <font color="#FFFFFF">
                   
                </font>
              </td>
            </tr>
          </tbody>
        </table>
        <table border="0"cellspacing="0"width="100%"cellpadding="3">
          <tbody>
            <tr>
              <td valign="middle"width="50"align="center">
                <img alt="From"height="40px"id="ppsearch_focus"src="wpoint_from_en.png"title="From"width="40px"tabindex="303"onload="document.getElementById('ppsearch_focus').focus();"></img>
              </td>
              <td valign="middle"width="210"tabindex="304"align="left">
  

# ETA

In [100]:
# print_tree(ALL)

In [35]:
# slat: 22.325538023145
# slon: 114.21640493082
# elat: 22.256886
# elon: 114.233259
# t: 2018-05-26 20:26
# ws: 1.3
# leg: 2
# loc: Cheerful Court,Cape Collinson Buddhist Cemetery
# m1: T
# l: 1
# ssid: 5b095262805d0
# sysid: 10

# Bus Routes

It's usefull to know all of the buses and there routes.

In [33]:
d = Dict(
    "rtype"=>"X",
    "skey"=>"Input Route No.",
    "l"=>1,
    "sysid"=>52
)



Dict{String,Any} with 4 entries:
  "rtype" => "X"
  "l"     => 1
  "skey"  => "Input Route No."
  "sysid" => 52

In [35]:
routesearch_url = "https://mobile.nwstbus.com.hk/nwp3/routesearch.php?"
res = post(routesearch_url,data=d)

Response(200 OK, 13 headers, 650498 bytes in body)

In [36]:
html = res |> String |> parsehtml;

In [40]:
bus_number = matchall(sel".routenocell",html.root);

In [52]:
bus_number[1].parent.parent.children[3]

Gumbo.HTMLElement{:td}:
<td valign="middle"align="left">
  <table>
    <tbody>
      <tr>
        <td valign="middle"width="100%"align="left">
          To: Felix Villas
        </td>
      </tr>
      <tr>
        <td valign="middle"width="100%"align="left">
          <font style="font-weight: normal; color:#555555;">
            Citybus
          </font>
        </td>
      </tr>
    </tbody>
  </table>
</td>


In [61]:
m = matchall(sel"tr",bus_number[1].parent.parent.children[3])

2-element Array{Gumbo.HTMLNode,1}:
 Gumbo.HTMLElement{:tr}:
<tr>
  <td valign="middle"width="100%"align="left">
    To: Felix Villas
  </td>
</tr>
                                                            
 Gumbo.HTMLElement{:tr}:
<tr>
  <td valign="middle"width="100%"align="left">
    <font style="font-weight: normal; color:#555555;">
      Citybus
    </font>
  </td>
</tr>


In [69]:
Leaves.(m)

2-element Array{AbstractTrees.Leaves{Gumbo.HTMLElement{:tr}},1}:
 AbstractTrees.Leaves{Gumbo.HTMLElement{:tr}}(Gumbo.HTMLElement{:tr}:
<tr>
  <td valign="middle"width="100%"align="left">
    To: Felix Villas
  </td>
</tr>
)                                                            
 AbstractTrees.Leaves{Gumbo.HTMLElement{:tr}}(Gumbo.HTMLElement{:tr}:
<tr>
  <td valign="middle"width="100%"align="left">
    <font style="font-weight: normal; color:#555555;">
      Citybus
    </font>
  </td>
</tr>
)

In [63]:
a = m[1]

Gumbo.HTMLElement{:tr}:
<tr>
  <td valign="middle"width="100%"align="left">
    To: Felix Villas
  </td>
</tr>


In [71]:
b = first(Leaves(a))

HTML Text: To: Felix Villas

In [72]:
text(b)

"To: Felix Villas"

In [37]:
html

HTML Document:
<!DOCTYPE >
<HTML>
  <head></head>
  <body>
    <div id="itemlist_title">
      <table border="0"cellspacing="0"width="100%"cellpadding="0">
        <tbody>
          <tr>
            <td height="15"colspan="2"align="right">
              <img onclick="showp2pmenu2();"alt="Open/Close route input"id="slidebar"src="v-slide-c.png"title="Open/Close route input"style="cursor:pointer; width:100%; height:19px;"tabindex="310"onkeypress="if (event.keyCode==13){showp2pmenu2();}"onload="document.getElementById('slidebar').focus();"></img>
            </td>
          </tr>
          <tr height="22"tabindex="310"bgcolor="#6c3f98">
            <td colspan="2"align="right">
              <table width="100%">
                <tbody>
                  <tr>
                    <td style="padding-left:10px;">
                      <font color="#FFFFFF">
                        All Routes
                      </font>
                    </td>
                    <td style="border:1px solid