Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vCard/JSON extraction #19443

Closed
wants to merge 32 commits into from
Closed
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
eb2c7e0
Merge pull request #11 from servo/master
niravjain Nov 13, 2017
015ef37
Merge pull request #12 from servo/master
CJ8664 Nov 22, 2017
43aeb7c
Added microdata module
niravjain Nov 24, 2017
f306835
Code to send msg from servo to servoshell (EmbedderMsg)
CJ8664 Nov 25, 2017
0530512
Merge branch 'master' of https://github.com/CJ8664/servo
CJ8664 Nov 25, 2017
f939632
naming convention
CJ8664 Nov 25, 2017
3d68d2a
trying serde_json crate
CJ8664 Nov 25, 2017
a4ed961
Uploading erroneous code
CJ8664 Nov 25, 2017
4628943
Uploading erroneous code
CJ8664 Nov 25, 2017
cbfc666
Merge remote-tracking branch 'refs/remotes/origin/serde_try' into ser…
CJ8664 Nov 25, 2017
146dd58
Merge pull request #13 from CJ8664/serde_try
CJ8664 Nov 25, 2017
9284187
serde json working for Hashmap
CJ8664 Nov 25, 2017
dd4dc70
Updated the servo-shell communication
CJ8664 Nov 29, 2017
43650c2
vCard working
CJ8664 Nov 29, 2017
19408a0
Adding adr to vCard
niravjain Nov 30, 2017
79d140d
Adding adr to vCard
niravjain Nov 30, 2017
aab6900
Merged vcard and json logic
CJ8664 Nov 30, 2017
35468f8
Fixed tidy errors
niravjain Nov 30, 2017
8cafd64
Updated code to pass the type of microdata as a parameter
CJ8664 Dec 1, 2017
87d1efa
Merge branch 'vcard' of https://github.com/CJ8664/servo into vcard
CJ8664 Dec 1, 2017
c580c3f
Added code to notify user via change in title
CJ8664 Dec 1, 2017
3cd9731
Created dummy test cases
CJ8664 Dec 1, 2017
d0cb12e
Removed JSON code and replaced with a stub
CJ8664 Dec 1, 2017
0ae2d08
Merge pull request #14 from CJ8664/vcard
CJ8664 Dec 1, 2017
7131823
Merge pull request #15 from servo/master
CJ8664 Dec 1, 2017
a3e5b9f
Updated manifest
CJ8664 Dec 1, 2017
0e5023d
Partially updated the code based on reviews
CJ8664 Dec 2, 2017
37f70c6
Fixed lint issues
CJ8664 Dec 2, 2017
543de1c
Fixed lint issues
CJ8664 Dec 2, 2017
a781bba
Merge fix
CJ8664 Dec 12, 2017
04a043d
Merge branch 'servo-master'
CJ8664 Dec 12, 2017
c740aab
Rebuild cargo
CJ8664 Dec 12, 2017
File filter...
Filter file types
Jump to…
Jump to file
Failed to load files.

Always

Just for now

1,322 Cargo.lock

Large diffs are not rendered by default.

@@ -13,7 +13,7 @@ use msg::constellation_msg::{Key, KeyModifiers, KeyState, PipelineId, TopLevelBr
use net_traits::image::base::Image;
use profile_traits::mem;
use profile_traits::time;
use script_traits::{AnimationState, ConstellationMsg, EventResult, LoadData};
use script_traits::{AnimationState, ConstellationMsg, EventResult, LoadData, Microdata};
use servo_url::ServoUrl;
use std::fmt::{Debug, Error, Formatter};
use std::sync::mpsc::{Receiver, Sender};
@@ -146,6 +146,10 @@ pub enum EmbedderMsg {
LoadStart(TopLevelBrowsingContextId),
/// The load of a page has completed
LoadComplete(TopLevelBrowsingContextId),
/// Sends the extracted microdata from webpage.
/// The parameter is an enum containing either VCardData or JSONData.
/// These entires have a String that represents the actual microdata
SendMicrodata(Microdata),
}

/// Messages from the painting thread and the constellation thread to the compositor thread.
@@ -237,6 +241,7 @@ impl Debug for EmbedderMsg {
EmbedderMsg::SetFullscreenState(..) => write!(f, "SetFullscreenState"),
EmbedderMsg::LoadStart(..) => write!(f, "LoadStart"),
EmbedderMsg::LoadComplete(..) => write!(f, "LoadComplete"),
EmbedderMsg::SendMicrodata(..) => write!(f, "SendMicrodata"),
}
}
}
@@ -11,7 +11,7 @@ use gleam::gl;
use ipc_channel::ipc::IpcSender;
use msg::constellation_msg::{Key, KeyModifiers, KeyState, TopLevelBrowsingContextId, TraversalDirection};
use net_traits::net_error_list::NetError;
use script_traits::{LoadData, MouseButton, TouchEventType, TouchId, TouchpadPressurePhase};
use script_traits::{LoadData, Microdata, MouseButton, TouchEventType, TouchId, TouchpadPressurePhase};
use servo_geometry::DeviceIndependentPixel;
use servo_url::ServoUrl;
use std::fmt::{Debug, Error, Formatter};
@@ -192,4 +192,7 @@ pub trait WindowMethods {
/// will want to avoid blocking on UI events, and just
/// run the event loop at the vsync interval.
fn set_animation_state(&self, _state: AnimationState) {}

/// Print Microdata on the Console or write to file
fn write_microdata(&self, _data: Microdata) {}
}
@@ -1321,6 +1321,9 @@ impl<Message, LTF, STF> Constellation<Message, LTF, STF>
FromScriptMsg::SetFullscreenState(state) => {
self.embedder_proxy.send(EmbedderMsg::SetFullscreenState(source_top_ctx_id, state));
}
FromScriptMsg::SendMicrodata(result) => {
self.embedder_proxy.send(EmbedderMsg::SendMicrodata(result));
}
}
}

@@ -82,6 +82,8 @@ script_plugins = {path = "../script_plugins"}
script_traits = {path = "../script_traits"}
selectors = { path = "../selectors" }
serde = "1.0"
serde_derive = "1.0"
serde_json = "1.0"
servo_allocator = {path = "../allocator"}
servo_arc = {path = "../servo_arc"}
servo_atoms = {path = "../atoms"}
@@ -99,6 +99,7 @@ use ipc_channel::ipc::{self, IpcSender};
use js::jsapi::{JSContext, JSRuntime};
use js::jsapi::JS_GetRuntime;
use metrics::{InteractiveFlag, InteractiveMetrics, InteractiveWindow, ProfilerMetadataFactory, ProgressiveWebMetric};
use microdata;
use msg::constellation_msg::{BrowsingContextId, Key, KeyModifiers, KeyState, TopLevelBrowsingContextId};
use net_traits::{FetchResponseMsg, IpcSend, ReferrerPolicy};
use net_traits::CookieSource::NonHTTP;
@@ -1710,6 +1711,14 @@ impl Document {

// Step 12.
// TODO: completely loaded.

// Step 13.

This comment has been minimized.

Copy link
@jdm

jdm Dec 1, 2017

Member

The comments here refer to actual step numbers defined in the HTML specification. We should not add ones for steps that do not exist :)

This comment has been minimized.

Copy link
@CJ8664

CJ8664 Dec 2, 2017

Author

The previous comments were already present, we just added step 13 for microdata as discussed our logic starts after page completes loading.

This comment has been minimized.

Copy link
@jdm

jdm Dec 2, 2017

Member

Right, but the comments including steps refer to the steps in https://html.spec.whatwg.org/multipage/#the-end . There is no step there about microdata; that's something we're adding that is not part of the specification.

let htmlelement = self.get_html_element();
let result = microdata::parse(self, htmlelement.unwrap().upcast::<Node>());
if let Some(data) = result {
let event = ScriptMsg::SendMicrodata(data);
self.send_to_constellation(event);
}
}

// https://html.spec.whatwg.org/multipage/#pending-parsing-blocking-script
@@ -3947,7 +3956,7 @@ impl DocumentMethods for Document {
}

fn update_with_current_time_ms(marker: &Cell<u64>) {
if marker.get() == Default::default() {
if marker.get() == 0 {
let time = time::get_time();
let current_time_ms = time.sec * 1000 + time.nsec as i64 / 1000000;
marker.set(current_time_ms as u64);
@@ -85,6 +85,8 @@ extern crate script_layout_interface;
extern crate script_traits;
extern crate selectors;
extern crate serde;
extern crate serde_derive;
extern crate serde_json;
extern crate servo_allocator;
extern crate servo_arc;
#[macro_use] extern crate servo_atoms;
@@ -120,6 +122,7 @@ mod dom;
pub mod fetch;
mod layout_image;
mod mem;
pub mod microdata;
mod microtask;
mod network_listener;
pub mod script_runtime;
@@ -0,0 +1,158 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

use dom::bindings::codegen::Bindings::DocumentBinding::DocumentBinding::DocumentMethods;
use dom::bindings::codegen::Bindings::ElementBinding::ElementBinding::ElementMethods;
use dom::bindings::inheritance::Castable;
use dom::bindings::root::DomRoot;
use dom::document::Document;
use dom::element::Element;
use dom::htmlelement::HTMLElement;
use dom::node::Node;
use script_traits::Microdata;
use std::borrow::Cow;
use std::collections::HashMap;

pub fn parse(doc: &Document, node: &Node) -> Option<Microdata> {
let serialized_vcard = parse_vcard(doc);
let serialized_json = parse_json(node);
if !serialized_vcard.is_empty() {
return Some(Microdata::VCardData(serialized_vcard.to_owned()));
} else if !serialized_vcard.is_empty() {

This comment has been minimized.

Copy link
@alexwhitman

alexwhitman Dec 11, 2017

Should be serialized_json here?

This comment has been minimized.

Copy link
@CJ8664

CJ8664 Dec 12, 2017

Author

Hi, Alex, can you please provide more information about your query? This change addresses the issue #18528

This comment has been minimized.

Copy link
@alexwhitman

alexwhitman Dec 13, 2017

Lines 20 and 22 are checking the same if condition. It would seem to me that line 22 should be looking at serialized_json rather than serialized_vcard.

return Some(Microdata::JSONData(serialized_json.to_owned()));
} else {
return None
}
}

pub fn parse_vcard(doc: &Document) -> String {
let ele = doc.upcast::<Node>();
let mut start_vcard = false;
let mut result: String = String::new();
let mut master_map: HashMap<String, HashMap<String, String>> = HashMap::new();
let mut master_key: String = String::new();

result += "BEGIN:VCARD\nPROFILE:VCARD\nVERSION:4.0\nSOURCE:";
result += doc.url().as_str();

let title = doc.Title();
if !title.is_empty() && !title.trim().is_empty() {
result += "\nNAME:";
result += title.trim();
}

result += "\n";

for element in ele.traverse_preorder().filter_map(DomRoot::downcast::<Element>) {
if element.is::<HTMLElement>() {
if element.has_attribute(&local_name!("itemtype")) {
let mut atoms = element.get_tokenlist_attribute(&local_name!("itemtype"), );
if !atoms.is_empty() {
let val = atoms.remove(0);
if val.trim() == "http://microformats.org/profile/hcard" {
if !start_vcard {
start_vcard = true;
} else {
break;
}
}
}
}
if start_vcard {
let mut atoms = element.get_tokenlist_attribute(&local_name!("itemprop"), );
if !atoms.is_empty() {
let temp_key = atoms.remove(0);
if element.has_attribute(&local_name!("itemscope")) {
master_key = String::from(temp_key.trim()).to_owned();
let dup_master_key = Cow::Borrowed(&master_key);
master_map.entry(dup_master_key.to_string()).or_insert(HashMap::new());
} else {
let temp = String::from(temp_key.trim()).to_owned();
let dup_key = Cow::Borrowed(&temp);
let data = String::from(element.GetInnerHTML().unwrap());
let dup_master_key = Cow::Borrowed(&master_key);
let temp_map = master_map.entry(dup_master_key.to_string()).or_insert(HashMap::new());
temp_map.insert(dup_key.to_string(), String::from(data));
}
}
}
}
}
let vcard_parts = ["n", "org", "tel", "adr"];
for info_type in vcard_parts.iter() {
let detail_map_val = master_map.get(*info_type);
if detail_map_val.is_none() {
continue;
}
let detail_map = detail_map_val.unwrap();
match *info_type {
"n" => {
let mut n_value: String = String::new();

let name_parts = ["family-name", "given-name",
"additional-name", "honorific-prefix", "honorific-suffix"];
for part in name_parts.iter() {
if detail_map.contains_key(*part) {
n_value += format!("{};", detail_map.get(*part).unwrap()).as_str();
}
}
n_value.pop();

result += format!("{}:{}\n", info_type.to_ascii_uppercase(), n_value).as_str();
},
"org" => {
let mut org_value: String = String::new();

let org_parts = ["organization-name", "organization-unit"];
for part in org_parts.iter() {
if detail_map.contains_key(*part) {
org_value += format!("{};", detail_map.get(*part).unwrap()).as_str();
}
}
org_value.pop();

result += format!("{}:{}\n", info_type.to_ascii_uppercase(), org_value).as_str();
},
"tel" => {
let mut tel_value: String = String::new();

let tel_parts = ["value"];
for part in tel_parts.iter() {
if detail_map.contains_key(*part) {
tel_value += format!("{};", detail_map.get(*part).unwrap()).as_str();
}
}
tel_value.pop();

result += format!("{}:{}\n", info_type.to_ascii_uppercase(), tel_value).as_str();
},
"adr" => {
let mut adr_value: String = String::new();

let adr_parts = ["street-address", "locality", "region", "postal-code",
"country-name", "post-office-box", "extended-address"];
for part in adr_parts.iter() {
if detail_map.contains_key(*part) {
adr_value += format!("{};", detail_map.get(*part).unwrap()).as_str();
}
}
adr_value.pop();

result += format!("{}:{}\n", info_type.to_ascii_uppercase(), adr_value).as_str();
},
_ => {},
}
}
result += "END:VCARD";
if start_vcard {
return result;
} else {
return "".to_string();
}
}

pub fn parse_json(node: &Node) -> String {
// TODO Write the logic for JSON Parsing
return "".to_string();
}
@@ -161,6 +161,15 @@ pub enum JsEvalResult {
Ok(Vec<u8>)
}

/// The result of parsing microdata from a webpage
#[derive(Debug, Deserialize, Serialize)]
pub enum Microdata {
/// The String that has the vCard information
VCardData(String),
/// The String that has the JSON information
JSONData(String),
}

impl LoadData {
/// Create a new `LoadData` object.
pub fn new(url: ServoUrl,
@@ -9,6 +9,7 @@ use IFrameLoadInfo;
use IFrameLoadInfoWithData;
use LayoutControlMsg;
use LoadData;
use Microdata;
use MozBrowserEvent;
use WorkerGlobalScopeInit;
use WorkerScriptLoadOrigin;
@@ -163,6 +164,10 @@ pub enum ScriptMsg {
GetScreenAvailSize(IpcSender<(Size2D<u32>)>),
/// Requests that the compositor shut down.
Exit,
/// Sends the extracted microdata from webpage.
/// The parameter is an enum containing either VCardData or JSONData.
/// These entires have a String that represents the actual microdata
SendMicrodata(Microdata),
}

/// Entities required to spawn service workers
@@ -461,6 +461,10 @@ impl<Window> Servo<Window> where Window: WindowMethods + 'static {
// TODO(pcwalton): Specify which frame's load completed.
self.compositor.window.load_end(top_level_browsing_context);
},

(EmbedderMsg::SendMicrodata(result), ShutdownState::NotShuttingDown) => {
self.compositor.window.write_microdata(result);
},
}
}
}
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.