sociocom · shuntaroy · Jun 20, 2023 · Jun 19, 2023 · Jun 20, 2023
diff --git a/.gitignore b/.gitignore
@@ -164,4 +164,3 @@ cython_debug/
 #.idea/
 
 data/
-*.csv
diff --git a/EntityNormalizer/Dictionaries.py b/EntityNormalizer/Dictionaries.py
@@ -0,0 +1,25 @@
+import os
+
+from EntityNormalizer import EntityDictionary
+
+
+class MedDicCancerADE(EntityDictionary):
+
+    def __init__(self, source_column='出現形', target_column='[細分類]', index: bool = False):
+        super().__init__(
+            path=os.path.join(os.path.dirname(__file__), "resources/MedDic-CANCER-ADE-JA_202306.csv"),
+            source_column=source_column,
+            target_column=target_column,
+            index=index,
+        )
+
+
+class MedDicCancerDrug(EntityDictionary):
+
+    def __init__(self, source_column='出現形', target_column='[細分類]', index: bool = False):
+        super().__init__(
+            path=os.path.join(os.path.dirname(__file__), "resources/MedDic-CANCER-DRUG-JA_202306.csv"),
+            source_column=source_column,
+            target_column=target_column,
+            index=index,
+        )
diff --git a/EntityNormalizer/resources/MedDic-CANCER-ADE-JA_202306.csv b/EntityNormalizer/resources/MedDic-CANCER-ADE-JA_202306.csv
diff --git a/EntityNormalizer/resources/MedDic-CANCER-DRUG-JA_202306.csv b/EntityNormalizer/resources/MedDic-CANCER-DRUG-JA_202306.csv
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1 @@
+include EntityNormalizer/resources/*
diff --git a/README.md b/README.md
@@ -5,14 +5,15 @@ Python tool for normalizing entities based on a dictionary.
 ## Usage
 
 This tool can be used as:
-- a *command line tool*, by cloning this repository and running `pip install .` under the root of this source; then you can run`main.py` with the required parameters to process your entity-listed file.
+- a *command line tool*, by cloning this repository and running `pip install .` under the root of this source; 
+then you can run`main.py` with the required parameters to process your entity-listed file.
 - a *Python package* , by installing the package using `pip install EntityNormalizer`
 
 ### Input and output
 
 The input file must contain one entity per line. 
 The output file will contain the normalized entities, again, one per line.  
-The dicitory file must be a comma-separated table file, i.e., `csv`.
+The dictionary file must be a comma-separated table file, i.e., `csv`.
 
 If the entity does not produce any match in the dictionary, it will be normalized to `[NO_MATCH]`. 
 If the entity is found in the dictionary but the normalization is empty, it will be normalized to `[NO_NORM_FOUND]`.  
@@ -36,23 +37,63 @@ If the entity is found in the dictionary but the normalization is empty, it will
 #### Example
 
 - With column names:  
+
     `python main.py data/input.txt data/output.txt data/dictionary.csv surface_form_col normalization_col --matching_threshold 50`
-- With integer column indexes: 
+- With integer column indexes:
+
     `python main.py data/input.txt data/output.txt data/dictionary.csv --index source 0 target 2 --matching_threshold 80`
 
 ---
 
  ### Python package usage
-After instalation,  the `normalized` function can be invoked with the dicitonary and a `list` of entities to produce a `list` of normalized entities.
+After installation,  the `normalize` function can be invoked with the dicitonary and a `list` of entities to produce a `list` of normalized entities.
 
 #### Example
 ```python
 from EntityNormalizer import EntityDictionary, normalize
 
-entities  =  [entity1, entity2, entity3]
+entities = ['entity1', 'entity2', 'entity3']
 
-normalization_dictionary  = EntityDictionary('data/dictionary.csv',  'surface_forms',  'normalizations')
-normalized  = normalize(entities,  normalization_dictionary,  matching_threshold=70)
+normalization_dictionary = EntityDictionary('data/dictionary.csv', 'surface_forms', 'normalizations')
+normalized = normalize(entities, normalization_dictionary, matching_threshold=70)
 
 print(normalized)
 ```
+
+## Bundled dictionaries
+
+This library comes with a set of bundled dictionaries, which can be found under the `resources` folder:
+
+- MedDic-CANCER-ADE-JA
+- MedDic-CANCER-DRUG-JA
+
+These are a set of Japanese medical dictionaries developed with normalization of concepts normally found during the
+analysis of adverse events caused by anticancer drugs. Please refer to 
+[this page](https://sociocom.naist.jp/meddic-cancer-ja/) for mor information.
+
+There are convenient classes for loading these dictionaries, which can be accessed with the `Dictionaries` module:
+
+```python
+from EntityNormalizer import Dictionaries, normalize
+
+entities = ['entity1', 'entity2', 'entity3']
+
+# Load the dictionaries
+cancer_ade = Dictionaries.MedDicCancerADE()
+cancer_drug = Dictionaries.MedDicCancerDrug()
+
+# Use the dictionaries
+normalized_ade = normalize(entities, cancer_ade, matching_threshold=70)
+normalized_drug = normalize(entities, cancer_drug, matching_threshold=70)
+```
+
+Both dictionaries use the columns `出現形` (Surface form) and `[細分類]` (Sub-classification) as source and target
+columns, respectively. 
+
+This can be altered by passing the referring parameter when creating the dictionary:
+
+```python
+from EntityNormalizer import Dictionaries
+
+cancer_ade = Dictionaries.MedDicCancerADE(source_column='customColumn', target_column='customColumn2')
+```
diff --git a/setup.py b/setup.py
@@ -2,13 +2,14 @@
 setup(
     name='EntityNormalizer',
     packages=find_packages(),
-    version='0.1.0-2',
+    version='0.2.0',
     description='Library for normalizing entities based on a dictionary',
     author='Gabriel Herman Bernardim Andrade',
     license='MIT',
     readme='README.md',
     url='https://github.com/sociocom/EntityNormalizer',
-    download_url='https://github.com/sociocom/EntityNormalizer/archive/refs/tags/0.1.0.tar.gz',
+    download_url='https://github.com/sociocom/EntityNormalizer/archive/refs/tags/0.2.0.tar.gz',
+    include_package_data=True,
     py_modules=['EntityNormalizer'],
     install_requires=['pandas', 'rapidfuzz', 'mojimoji'],
 )