Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

美团编程题:编写一个算法解析以下符号,转换为json树的结构 #111

Open
sisterAn opened this issue Sep 21, 2020 · 8 comments
Labels

Comments

@sisterAn
Copy link
Owner

sisterAn commented Sep 21, 2020

// 编写一个算法解析以下符号,转换为json树的结构
let str = `<xml><div><p><a/></p><p></p></div></xml>`
@oxyzhg
Copy link

oxyzhg commented Sep 22, 2020

interface TNode {
  name: string;
  children: TNode[] | null;
}

const startTagReg: RegExp = /\<(\w+)\>/; // 匹配开始标签
const endTagReg: RegExp = /\<\/(\w+)\>/; // 匹配结束标签
const closeSelfTagReg: RegExp = /\<(\w+)\/\>/; // 匹配自闭合标签
const textNodeReg: RegExp = /\>(.*?)\<\//; // 匹配文本内容
const tagReg: RegExp = /\<\/?(\w+)\/?\>/g; // 全局匹配标签

let matchedTags: string[] = str.match(tagReg); // 在字符串中匹配到的标签数组

let htmlTree: TNode = null; // 保存生成的节点树
let nodeStacks: TNode[] = []; // 保存遍历过程中的节点栈
let currentNode: TNode | undefined = undefined;

// 根据标签创建新节点
function createNode(tag: string): TNode {
  const tagName = tag.replace(tagReg, "$1");
  return {
    name: tagName,
    children: null
  };
}
// 将节点插入到当前操作栈的节点中
function insertNode(node: TNode) {
  if (htmlTree === null) {
    htmlTree = node;
  } else {
    if (currentNode.children === null) {
      currentNode.children = [node];
    } else {
      currentNode.children.push(node);
    }
  }
}

for (let tag of matchedTags) {
  if (startTagReg.test(tag)) {
    // 创建新节点
    const node = createNode(tag);
    // 向树插入新节点
    insertNode(node);
    // 压入栈,并进入该栈
    nodeStacks.push(node);
    currentNode = nodeStacks[nodeStacks.length - 1];
  } else if (endTagReg.test(tag)) {
    // 找栈中的相对应的节点索引
    let index = nodeStacks
      .reverse()
      .findIndex(el => el.name === tag.replace(tagReg, "$1"));
    index = nodeStacks.length - index;
    nodeStacks.reverse();
    // 删除索引外的栈
    nodeStacks.splice(index - 1);
    // 设置删除后的最后一项为当前节点
    currentNode = nodeStacks[nodeStacks.length - 1];
  } else if (closeSelfTagReg.test(tag)) {
    // 创建新节点
    const node = createNode(tag);
    // 插入新节点
    insertNode(node);
    // 自闭合不需要进栈出栈
  }
}

console.log(matchedTags);
console.log(htmlTree);
{
  "name": "xml",
  "children": [
    {
      "name": "div",
      "children": [
        {
          "name": "p",
          "children": [
            {
              "name": "a",
              "children": null
            }
          ]
        },
        {
          "name": "p",
          "children": null
        }
      ]
    }
  ]
}

早上在地铁看到这题给我整懵了,趁快要吃饭的时间赶紧记录下思考过程,结合前些天看源码,就想到利用模拟进栈出栈的思路,并保留最后可操作节点供插入新值。开始标签就创建节点,并进入新栈;结束标签就找最后一个对应开始标签栈,其余移除栈;自闭合标签不需要进栈出栈,直接更新节点就好。

@dinjufen
Copy link

比较简陋的版本,假设给的字符串一定是标准的,没有容错处理,有不好的地方望指出。
思路:类似于常见的括号处理的题,

  1. 遇到起始标签则入栈,
  2. 遇到结束标签则查找最近的起始标签,新建node,将首尾间的所有node存为children,并将中间的所有node和标签出栈;
  3. 遇到单闭合标签则直接生成node,入栈。

后续可以逐渐完善,增加各种错误处理

function createNode(name) {
  return {
    name: name,
    children: null
  }
}

function parse(str) {
  const stack = [];
  let i = 0, j = 0;
  while (j < str.length) {
  while (str[i] !== '<') {
    i++;
  }
  i++;
  j = i;

  while (str[j] !== '>') {
    j++;
  }

  let ss = str.substring(i, j); // 去除<>之后的部分
  if (ss.startsWith('/')) {
    const name = ss.substring(1, ss.length);
    if (stack.length > 0) {
      let k = stack.length - 1;
      while (k >= 0) {
        if (stack[k] === name) {
          let node = createNode(stack[k]);
          let length = stack.length-1;
          while (length > k) {
            if (node.children === null) {
              node.children = []
            }
            node.children.push(stack[length]);
            stack.pop();
            length--;
          }
          stack.pop();
          stack.push(node)
          break;
        }
        k--;
      }
    } else {
      return null;
    }
  } else if (ss.endsWith('/')) {
    const name = ss.substring(0, ss.length - 1)
    let node = createNode(name)
    stack.push(node)
  } else {
    stack.push(ss)
  }

  i = j;
  j++;
  }
  return stack[0];
}

@sisterAn
Copy link
Owner Author

sisterAn commented Sep 23, 2020

interface TNode {
  name: string;
  children: TNode[] | null;
}

const startTagReg: RegExp = /\<(\w+)\>/; // 匹配开始标签
const endTagReg: RegExp = /\<\/(\w+)\>/; // 匹配结束标签
const closeSelfTagReg: RegExp = /\<(\w+)\/\>/; // 匹配自闭合标签
const textNodeReg: RegExp = /\>(.*?)\<\//; // 匹配文本内容
const tagReg: RegExp = /\<\/?(\w+)\/?\>/g; // 全局匹配标签

let matchedTags: string[] = str.match(tagReg); // 在字符串中匹配到的标签数组

let htmlTree: TNode = null; // 保存生成的节点树
let nodeStacks: TNode[] = []; // 保存遍历过程中的节点栈
let currentNode: TNode | undefined = undefined;

// 根据标签创建新节点
function createNode(tag: string): TNode {
  const tagName = tag.replace(tagReg, "$1");
  return {
    name: tagName,
    children: null
  };
}
// 将节点插入到当前操作栈的节点中
function insertNode(node: TNode) {
  if (htmlTree === null) {
    htmlTree = node;
  } else {
    if (currentNode.children === null) {
      currentNode.children = [node];
    } else {
      currentNode.children.push(node);
    }
  }
}

for (let tag of matchedTags) {
  if (startTagReg.test(tag)) {
    // 创建新节点
    const node = createNode(tag);
    // 向树插入新节点
    insertNode(node);
    // 压入栈,并进入该栈
    nodeStacks.push(node);
    currentNode = nodeStacks[nodeStacks.length - 1];
  } else if (endTagReg.test(tag)) {
    // 找栈中的相对应的节点索引
    let index = nodeStacks
      .reverse()
      .findIndex(el => el.name === tag.replace(tagReg, "$1"));
    index = nodeStacks.length - index;
    nodeStacks.reverse();
    // 删除索引外的栈
    nodeStacks.splice(index - 1);
    // 设置删除后的最后一项为当前节点
    currentNode = nodeStacks[nodeStacks.length - 1];
  } else if (closeSelfTagReg.test(tag)) {
    // 创建新节点
    const node = createNode(tag);
    // 插入新节点
    insertNode(node);
    // 自闭合不需要进栈出栈
  }
}

console.log(matchedTags);
console.log(htmlTree);
{
  "name": "xml",
  "children": [
    {
      "name": "div",
      "children": [
        {
          "name": "p",
          "children": [
            {
              "name": "a",
              "children": null
            }
          ]
        },
        {
          "name": "p",
          "children": null
        }
      ]
    }
  ]
}

早上在地铁看到这题给我整懵了,趁快要吃饭的时间赶紧记录下思考过程,结合前些天看源码,就想到利用模拟进栈出栈的思路,并保留最后可操作节点供插入新值。开始标签就创建节点,并进入新栈;结束标签就找最后一个对应开始标签栈,其余移除栈;自闭合标签不需要进栈出栈,直接更新节点就好。

@oxyzhg 这部分主要是用来判断什么的?直接出栈不就可以吗?

 // 找栈中的相对应的节点索引
let index = nodeStacks
  .reverse()
  .findIndex(el => el.name === tag.replace(tagReg, "$1"));
index = nodeStacks.length - index;
nodeStacks.reverse();
// 删除索引外的栈
nodeStacks.splice(index - 1);
// 设置删除后的最后一项为当前节点
currentNode = nodeStacks[nodeStacks.length - 1];

@oxyzhg
Copy link

oxyzhg commented Sep 23, 2020

@sisterAn 正常的话就是直接出栈的哈,这里当时考虑了栈尾元素不一定是当前闭合标签相匹配,可能想多了。

@LOVEwitch
Copy link

比较简陋的版本,假设给的字符串一定是标准的,没有容错处理,有不好的地方望指出。
思路:类似于常见的括号处理的题,用一个栈保存所有形如

的起始标签,遇到

则查找最近的

新建name为p的node,将首尾间的所有node存为p的children,并将中间的所有node和p标签出栈;
遇到

这种标签则直接生成node,入栈。
后续可以逐渐完善,增加各种错误处理
javascript let str =

`
function createNode(name) {
return {
name: name,
children: null
}
}

function parse(str) {
const stack = []
let i = 0, j = 0;
while (j < str.length) {
while (str[i] !== '<') {
i++;
}
i++;
j = i;

while (str[j] !== '>') {
  j++;
}

let ss = str.substring(i, j); // 去除<>之后的部分
if (ss.startsWith('/')) {
  const name = ss.substring(1, ss.length);
  if (stack.length > 0) {
    let k = stack.length - 1;
    while (k >= 0) {
      if (stack[k] === name) {
        let node = createNode(stack[k]);
        let length = stack.length-1;
        while (length > k) {
          if (node.children === null) {
            node.children = []
          }
          node.children.push(stack[length]);
          stack.pop();
          length--;
        }
        stack.pop();
        stack.push(node)
        break;
      }
      k--;
    }
  } else {
    return null;
  }
} else if (ss.endsWith('/')) {
  const name = ss.substring(0, ss.length - 1)
  let node = createNode(name)
  stack.push(node)
} else {
  stack.push(ss)
}

i = j;
j++;

}
return stack[0];
}
`

let index = nodeStacks .reverse() .findIndex(el => el.name === tag.replace(tagReg, "$1")); index = nodeStacks.length - index; nodeStacks.reverse();这段可以用lastIndexOf代替IndexOf吧,就不用来回reverse了

@mingju0421
Copy link

const DOM解析 = str => {
    let 标签开始 = /^<(\w+)>/
    let 标签结束 = /^<\/(\w+)>/
    let 自闭合标签 = /^<(\w+)\/>/
    let DOMList = []
    while (str) {
        let 临时标签
        if (标签开始.test(str)) {
            let dom = {}
            临时标签 = str.match(标签开始)
            dom.name = 临时标签[1]
            str = str.slice(临时标签[0].length)
            DOMList.push(dom)
        } else if (标签结束.test(str)) {
            临时标签 = str.match(标签结束)
            str = str.slice(临时标签[0].length)
            let dom = DOMList.pop()
            if (!str) return dom
            let parentDOM = DOMList[DOMList.length - 1]
            parentDOM.children && parentDOM.children.push(dom) || (parentDOM.children = [dom])
        } else {
            let dom = {}
            临时标签 = str.match(自闭合标签)
            dom.name = 临时标签[1]
            str = str.slice(临时标签[0].length)
            let parentDOM = DOMList[DOMList.length - 1]
            parentDOM.children && parentDOM.children.push(dom) || (parentDOM.children = [dom])
        }
    }
}
const str = '<div><ul><li><a></a><input/></li><li><a></a><input/></li><li><a></a><input/></li><li><a></a><input/></li></ul></div>'

console.log(JSON.stringify(DOM解析(str)))

@xllpiupiu
Copy link

function domParse(str) {

    const startTagReg = /\<(\w+)\>/;//匹配开始标签
    const endTagReg = /\<\/(\w+)\>/;//匹配结束标签
    const closeSelfTagReg = /\<(\w+)\/\>/;//匹配自闭合标签
    const textNodeReg = /\>(.*?)\<\//;//匹配文本内容
    const tagReg = /\<\/?(\w+)\/?\>/g;//全局匹配

    let htmlTree = null;
    let nodeStacks = [];
    let currentNode = null;
    //根据标签创建新的节点
    class CreateNode {
        constructor(tag) {
            const tagName = tag.replace(tagReg, "$1");//去掉<>/
            this.name = tagName
            this.children = null
        }
    }
    //将节点插入到当前操作栈的节点中
    function insertNode(node) {
        if (htmlTree === null) {
            htmlTree = node;
        } else {
            if (currentNode.children === null) {
                currentNode.children = [node];
            } else {
                currentNode.children.push(node);
            }
        }
    }
    const matchedTags = str.match(tagReg);//字符串中匹配到的标签数组
    console.log('matchedTags: ', matchedTags);
    for (let tag of matchedTags) {
        if (startTagReg.test(tag)) {
            //创建新节点
            const node = new CreateNode(tag);
            //向树插入新节点
            insertNode(node);
            //压入栈
            nodeStacks.push(node);
            //更新当前节点
            currentNode = nodeStacks[nodeStacks.length - 1];
        } else if (endTagReg.test(tag)) {
            nodeStacks.pop();
            //更新当前节点
            currentNode = nodeStacks[nodeStacks.length - 1]
        } else if (closeSelfTagReg.test(tag)) {
            const node =new CreateNode(tag)
            insertNode(node);
        }
    }
    return htmlTree;
}

@XfLoops
Copy link

XfLoops commented Oct 26, 2021

// 编写一个算法解析以下符号,转换为json树的结构
let str = `<xml><div><p><a/><a/></p><p></p></div><div><a/></div></xml>`

function parseXML(str) {
    const openTagRegex = /^<(\w+)>/
    const closeTagRegex = /^<\/(\w+)>/
    const selfTagRegex = /^<(\w+)\/>/
    const regs = [openTagRegex, closeTagRegex, selfTagRegex]
        
    const matchTag = (str) => {
        let newStr, tag, reg, match
        for(const _reg of regs) {
            if (_reg.test(str)) {
                const [_match, _tag] = _reg.exec(str)
                match = _match
                tag = _tag
                reg = _reg
                break
            }
        }
        newStr = match ? str.replace(match, '') : str
        return [reg, tag, newStr]
    }
    
    const createNode = (tag) => ({name: tag, children: []})
    const stack = []

    while(str) {
        const [reg, tag, newStr] = matchTag(str)
        switch (reg) {
            case openTagRegex:
                stack.push(createNode(tag))
                break
            case closeTagRegex:
                if (stack.length > 1) {
                    const top = stack.pop()
                    stack[stack.length - 1].children.push(top)
                }
                break
            case selfTagRegex:
                stack[stack.length - 1].children.push(createNode(tag))
                break
            default:
                throw new Error(`invalid tag string`)
        }
        str = newStr
    }
    return stack.pop()
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

No branches or pull requests

7 participants