Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/#84/list entity plugin double aggregation #85

Merged
merged 3 commits into from
Sep 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 7 additions & 15 deletions dialogy/plugins/text/list_entity_plugin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from typing import Any, Dict, List, Optional, Tuple

import pandas as pd
import pydash as py_
from tqdm import tqdm

from dialogy import constants as const
Expand Down Expand Up @@ -183,12 +182,11 @@ def get_entities(self, transcripts: List[str]) -> List[BaseEntity]:
"""
matches_on_transcripts = self._search(transcripts)
logger.debug(matches_on_transcripts)
entity_metadata = []
entities: List[BaseEntity] = []

for i, matches_on_transcript in enumerate(matches_on_transcripts):
for text, label, value, span in matches_on_transcript:
entity = {
entity_dict = {
"start": span[0],
"end": span[1],
"body": text,
Expand All @@ -204,18 +202,12 @@ def get_entities(self, transcripts: List[str]) -> List[BaseEntity]:
"values": [{"value": value}],
},
}
entity_metadata.append(entity)
entity_groups = py_.group_by(entity_metadata, lambda e: e["__group"])
logger.debug("entity groups:")
logger.debug(pformat(entity_groups))

for _, grouped_entities in entity_groups.items():
entity = sorted(grouped_entities, key=lambda e: e["alternative_index"])[0]
del entity["__group"]
entity["score"] = round(len(grouped_entities) / len(transcripts), 4)
entity_ = KeywordEntity.from_dict(entity)
entity_.add_parser(self).set_value()
entities.append(entity_)

del entity_dict["__group"]
entity_ = KeywordEntity.from_dict(entity_dict)
entity_.add_parser(self).set_value()
entities.append(entity_)

logger.debug("Parsed entities")
logger.debug(entities)

Expand Down
2 changes: 1 addition & 1 deletion docs/.buildinfo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: 4ff607b2d55930797f1169943f8bc6fe
config: 6295a05aa3ec70c95f888807ce0d283a
tags: 645f666f9bcd5a90fca523b33c5a78b7
5 changes: 3 additions & 2 deletions docs/_modules/dialogy/base/entity_extractor.html
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,8 @@ <h1>Source code for dialogy.base.entity_extractor</h1><div class="highlight"><pr
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">apply_filters</span><span class="p">(</span><span class="n">aggregate_entities</span><span class="p">)</span></div>

<span class="k">def</span> <span class="nf">_make_transform_values</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">transcript</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
<div class="viewcode-block" id="EntityExtractor.make_transform_values"><a class="viewcode-back" href="../../../source/dialogy.base.html#dialogy.base.entity_extractor.EntityExtractor.make_transform_values">[docs]</a> <span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">make_transform_values</span><span class="p">(</span><span class="n">transcript</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
<span class="sd">"""</span>
<span class="sd"> Make transcripts from a string/json-string.</span>

Expand All @@ -322,7 +323,7 @@ <h1>Source code for dialogy.base.entity_extractor</h1><div class="highlight"><pr
<span class="n">transcript</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">transcript</span><span class="p">)</span>
<span class="k">return</span> <span class="n">normalize</span><span class="p">(</span><span class="n">transcript</span><span class="p">)</span>
<span class="k">except</span> <span class="p">(</span><span class="n">json</span><span class="o">.</span><span class="n">JSONDecodeError</span><span class="p">,</span> <span class="ne">TypeError</span><span class="p">):</span>
<span class="k">return</span> <span class="n">normalize</span><span class="p">(</span><span class="n">transcript</span><span class="p">)</span></div>
<span class="k">return</span> <span class="n">normalize</span><span class="p">(</span><span class="n">transcript</span><span class="p">)</span></div></div>
</pre></div>
</article>
<footer>
Expand Down
9 changes: 3 additions & 6 deletions docs/_modules/dialogy/cli.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<meta name="color-scheme" content="light dark"><link rel="index" title="Index" href="../../genindex.html" /><link rel="search" title="Search" href="../../search.html" />

<meta name="generator" content="sphinx-3.5.4, furo 2021.04.11.beta34"/>
<title>dialogy.cli - dialogy 0.8.2 documentation</title>
<title>dialogy.cli - dialogy 0.8.1 documentation</title>
<link rel="stylesheet" href="../../_static/styles/furo.css?digest=59ab60ac09ea94ccfe6deddff6d715cce948a6fc">
<link rel="stylesheet" href="../../_static/pygments.css">
<link media="(prefers-color-scheme: dark)" rel="stylesheet" href="../../_static/pygments_dark.css">
Expand Down Expand Up @@ -87,7 +87,7 @@
</label>
</div>
<div class="header-center">
<a href="../../index.html"><div class="brand">dialogy 0.8.2 documentation</div></a>
<a href="../../index.html"><div class="brand">dialogy 0.8.1 documentation</div></a>
</div>
<div class="header-right">
<label class="toc-overlay-icon toc-header-icon no-toc" for="__toc">
Expand All @@ -101,7 +101,7 @@
<div class="sidebar-sticky"><a class="sidebar-brand" href="../../index.html">


<span class="sidebar-brand-text">dialogy 0.8.2 documentation</span>
<span class="sidebar-brand-text">dialogy 0.8.1 documentation</span>

</a><form class="sidebar-search-container" method="get" action="../../search.html">
<input class="sidebar-search" placeholder=Search name="q">
Expand All @@ -115,7 +115,6 @@
<li class="toctree-l1 has-children"><a class="reference internal" href="../../source/dialogy.plugins.html">dialogy.plugins package</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/><label for="toctree-checkbox-1"><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2 has-children"><a class="reference internal" href="../../source/dialogy.plugins.text.html">dialogy.plugins.text package</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/><label for="toctree-checkbox-2"><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l3"><a class="reference internal" href="../../source/dialogy.plugins.text.calibration.html">dialogy.plugins.text.calibration package</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../source/dialogy.plugins.text.canonicalization.html">dialogy.plugins.text.canonicalization package</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../source/dialogy.plugins.text.classification.html">dialogy.plugins.text.classification package</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../source/dialogy.plugins.text.duckling_plugin.html">dialogy.plugins.text.duckling_plugin package</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../source/dialogy.plugins.text.list_entity_plugin.html">dialogy.plugins.text.list_entity_plugin package</a></li>
Expand All @@ -128,7 +127,6 @@
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../../source/dialogy.plugins.text.html">dialogy.plugins.text package</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/><label for="toctree-checkbox-3"><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="../../source/dialogy.plugins.text.calibration.html">dialogy.plugins.text.calibration package</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../source/dialogy.plugins.text.canonicalization.html">dialogy.plugins.text.canonicalization package</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../source/dialogy.plugins.text.classification.html">dialogy.plugins.text.classification package</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../source/dialogy.plugins.text.duckling_plugin.html">dialogy.plugins.text.duckling_plugin package</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../source/dialogy.plugins.text.list_entity_plugin.html">dialogy.plugins.text.list_entity_plugin package</a></li>
Expand All @@ -138,7 +136,6 @@
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../source/dialogy.plugins.text.calibration.html">dialogy.plugins.text.calibration package</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../source/dialogy.plugins.text.canonicalization.html">dialogy.plugins.text.canonicalization package</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../source/dialogy.plugins.text.classification.html">dialogy.plugins.text.classification package</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../source/dialogy.plugins.text.duckling_plugin.html">dialogy.plugins.text.duckling_plugin package</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../source/dialogy.plugins.text.list_entity_plugin.html">dialogy.plugins.text.list_entity_plugin package</a></li>
Expand Down
10 changes: 5 additions & 5 deletions docs/_modules/dialogy/parser/text/entity/duckling_parser.html
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ <h1>Source code for dialogy.parser.text.entity.duckling_parser</h1><div class="h
<span class="kn">import</span> <span class="nn">attr</span>
<span class="kn">import</span> <span class="nn">pytz</span>
<span class="kn">import</span> <span class="nn">requests</span>
<span class="kn">from</span> <span class="nn">pytz.tzinfo</span> <span class="kn">import</span> <span class="n">BaseTzInfo</span> <span class="c1"></span>
<span class="kn">from</span> <span class="nn">pytz.tzinfo</span> <span class="kn">import</span> <span class="n">BaseTzInfo</span> <span class="c1"># type: ignore</span>

<span class="kn">from</span> <span class="nn">dialogy.constants</span> <span class="kn">import</span> <span class="n">EntityKeys</span>
<span class="kn">from</span> <span class="nn">dialogy.plugin</span> <span class="kn">import</span> <span class="n">Plugin</span><span class="p">,</span> <span class="n">PluginFn</span>
Expand Down Expand Up @@ -402,12 +402,12 @@ <h1>Source code for dialogy.parser.text.entity.duckling_parser</h1><div class="h
<span class="c1"># 2. create an Entity object from the entity dict.</span>
<span class="k">for</span> <span class="n">entity</span> <span class="ow">in</span> <span class="n">entities_json</span><span class="p">:</span>
<span class="k">if</span> <span class="n">entity</span><span class="p">[</span><span class="n">EntityKeys</span><span class="o">.</span><span class="n">VALUE</span><span class="p">][</span><span class="n">EntityKeys</span><span class="o">.</span><span class="n">TYPE</span><span class="p">]</span> <span class="o">==</span> <span class="n">EntityKeys</span><span class="o">.</span><span class="n">INTERVAL</span><span class="p">:</span>
<span class="bp">cls</span> <span class="o">=</span> <span class="n">dimension_entity_map</span><span class="p">[</span><span class="n">entity</span><span class="p">[</span><span class="n">EntityKeys</span><span class="o">.</span><span class="n">DIM</span><span class="p">]][</span><span class="n">EntityKeys</span><span class="o">.</span><span class="n">INTERVAL</span><span class="p">]</span> <span class="c1"></span>
<span class="bp">cls</span> <span class="o">=</span> <span class="n">dimension_entity_map</span><span class="p">[</span><span class="n">entity</span><span class="p">[</span><span class="n">EntityKeys</span><span class="o">.</span><span class="n">DIM</span><span class="p">]][</span><span class="n">EntityKeys</span><span class="o">.</span><span class="n">INTERVAL</span><span class="p">]</span> <span class="c1"># type: ignore</span>
<span class="n">duckling_entity</span> <span class="o">=</span> <span class="bp">cls</span><span class="o">.</span><span class="n">from_dict</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mutate_entity</span><span class="p">(</span><span class="n">entity</span><span class="p">))</span>
<span class="n">duckling_entity</span><span class="o">.</span><span class="n">set_value</span><span class="p">()</span>
<span class="n">entity_object_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">duckling_entity</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">entity</span><span class="p">[</span><span class="n">EntityKeys</span><span class="o">.</span><span class="n">VALUE</span><span class="p">][</span><span class="n">EntityKeys</span><span class="o">.</span><span class="n">TYPE</span><span class="p">]</span> <span class="o">==</span> <span class="n">EntityKeys</span><span class="o">.</span><span class="n">VALUE</span><span class="p">:</span>
<span class="bp">cls</span> <span class="o">=</span> <span class="n">dimension_entity_map</span><span class="p">[</span><span class="n">entity</span><span class="p">[</span><span class="n">EntityKeys</span><span class="o">.</span><span class="n">DIM</span><span class="p">]][</span><span class="n">EntityKeys</span><span class="o">.</span><span class="n">VALUE</span><span class="p">]</span> <span class="c1"></span>
<span class="bp">cls</span> <span class="o">=</span> <span class="n">dimension_entity_map</span><span class="p">[</span><span class="n">entity</span><span class="p">[</span><span class="n">EntityKeys</span><span class="o">.</span><span class="n">DIM</span><span class="p">]][</span><span class="n">EntityKeys</span><span class="o">.</span><span class="n">VALUE</span><span class="p">]</span> <span class="c1"># type: ignore</span>
<span class="n">duckling_entity</span> <span class="o">=</span> <span class="bp">cls</span><span class="o">.</span><span class="n">from_dict</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mutate_entity</span><span class="p">(</span><span class="n">entity</span><span class="p">))</span>
<span class="n">duckling_entity</span><span class="o">.</span><span class="n">set_value</span><span class="p">()</span>
<span class="n">entity_object_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">duckling_entity</span><span class="p">)</span>
Expand Down Expand Up @@ -482,12 +482,12 @@ <h1>Source code for dialogy.parser.text.entity.duckling_parser</h1><div class="h
<span class="n">access</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">access</span>
<span class="n">mutate</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">mutate</span>
<span class="n">input_</span> <span class="o">=</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">access</span><span class="p">,</span> <span class="n">Callable</span><span class="p">):</span> <span class="c1"></span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">access</span><span class="p">,</span> <span class="n">Callable</span><span class="p">):</span> <span class="c1"># type: ignore</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">"Expected `access` to be Callable,"</span>
<span class="sa">f</span><span class="s2">" got access=</span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">access</span><span class="p">)</span><span class="si">}</span><span class="s2"> mutate=</span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">mutate</span><span class="p">)</span><span class="si">}</span><span class="s2">"</span>
<span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">mutate</span><span class="p">,</span> <span class="n">Callable</span><span class="p">):</span> <span class="c1"></span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">mutate</span><span class="p">,</span> <span class="n">Callable</span><span class="p">):</span> <span class="c1"># type: ignore</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">"Expected `mutate` to be Callable,"</span>
<span class="sa">f</span><span class="s2">" got access=</span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">access</span><span class="p">)</span><span class="si">}</span><span class="s2"> mutate=</span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">mutate</span><span class="p">)</span><span class="si">}</span><span class="s2">"</span>
Expand Down
Loading