diff --git a/examples/ndd_member_update.ipynb b/examples/ndd_member_update.ipynb
index 0daeb5d..4d98d45 100644
--- a/examples/ndd_member_update.ipynb
+++ b/examples/ndd_member_update.ipynb
@@ -12,10 +12,23 @@
"cell_type": "code",
"execution_count": 1,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'0.0.7'"
+ ]
+ },
+ "execution_count": 1,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"from pandarize.frame import Pandarizer\n",
- "import pandas as pd"
+ "import pandarize\n",
+ "import pandas as pd\n",
+ "pandarize.__version__"
]
},
{
@@ -84,6 +97,7 @@
"
\n",
@@ -292,112 +220,112 @@
" 6 | \n",
" OHBM, Rome Italy | \n",
" ... | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" \n",
" \n",
" 1 | \n",
" inproceedings | \n",
- " hayden_naisys_2020 | \n",
- " A Biological Implementation of Lifelong Learni... | \n",
- " Vogelstein, Joshua T. and Helm, Hayden and Ped... | \n",
- " 2=trainee;3=trainee;4=trainee;1=highlight | \n",
- " 2020 | \n",
+ " Allen2015synaptome | \n",
+ " The Open Synaptome Project: Toward a Microscop... | \n",
+ " Smith, Stephen J. and Burns, Randal and Chevil... | \n",
+ " 8=highlight | \n",
+ " 2015 | \n",
" abspos | \n",
- " | \n",
- " 11 | \n",
- " NAIsys, Cold Spring Harbor, NY, USA | \n",
+ " https://figshare.com/articles/Open_Synaptome_P... | \n",
+ " 10 | \n",
+ " Society for Neuroscience, Chicago, IL, USA | \n",
" ... | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
" 2 | \n",
" inproceedings | \n",
- " berlin_2017 | \n",
- " Processing and Analyzing Terascale Conjugate A... | \n",
- " Baden, Alex and Perlman, Eric and Collman, For... | \n",
- " 1=trainee;5=highlight | \n",
- " 2017 | \n",
+ " XBrain2015 | \n",
+ " X-Brain: Quantifying Mesoscale Neuroanatomy Us... | \n",
+ " Deyer, Eva L. and Fernandes, Hugo L. and Ronca... | \n",
+ " 5=highlight;3=trainee | \n",
+ " 2015 | \n",
" abspos | \n",
- " https://neurodata.io/talks/berlin_2017.pdf | \n",
- " | \n",
- " Berlin, Germany | \n",
+ " https://figshare.com/articles/X_Brain_Quantify... | \n",
+ " NaN | \n",
+ " Figshare | \n",
" ... | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
" 3 | \n",
" inproceedings | \n",
- " falk_open_data2019 | \n",
- " NeuroData's Open Data Cloud Ecosystem | \n",
- " Falk, Benjamin and Vogelstein, Joshua T. | \n",
- " 2=highlight | \n",
- " 2019 | \n",
+ " Design2015 | \n",
+ " Optimal Design for Discovery Science: Applicat... | \n",
+ " Wang, Shangsi and Yang, Zhi and Zuo, Xi-Nian a... | \n",
+ " 1=trainee;7=highlight | \n",
+ " 2015 | \n",
" abspos | \n",
- " https://neurodata.io/talks/25_NeuroDatas_Open_... | \n",
- " 7 | \n",
- " Harvard University, Cambridge, MA, USA | \n",
+ " https://figshare.com/articles/Optimal_Design_f... | \n",
+ " NaN | \n",
+ " Figshare | \n",
" ... | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
" 4 | \n",
" inproceedings | \n",
- " browneposter2019 | \n",
- " Forest Packing: Fast Parallel Decision Forests | \n",
- " Browne, James and Mhembere, Disa and Tomita, T... | \n",
- " 1=trainee;2=trainee;3=trainee;4=highlight | \n",
- " 2019 | \n",
+ " Sparse2015 | \n",
+ " A Sparse High Dimensional State-Space Model wi... | \n",
+ " Chen, Shaojie and Liu, Kai and Yuguang, Yang a... | \n",
+ " 1=trainee;7=highlight | \n",
+ " 2015 | \n",
" abspos | \n",
- " https://figshare.com/articles/Forest_Packing_F... | \n",
- " 5 | \n",
- " SIAM International Conference on Data Mining, ... | \n",
+ " https://figshare.com/articles/A_Sparse_High_Di... | \n",
+ " NaN | \n",
+ " Figshare | \n",
" ... | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
"\n",
@@ -407,52 +335,52 @@
"text/plain": [
" type alias \\\n",
"0 inproceedings j12019 \n",
- "1 inproceedings hayden_naisys_2020 \n",
- "2 inproceedings berlin_2017 \n",
- "3 inproceedings falk_open_data2019 \n",
- "4 inproceedings browneposter2019 \n",
+ "1 inproceedings Allen2015synaptome \n",
+ "2 inproceedings XBrain2015 \n",
+ "3 inproceedings Design2015 \n",
+ "4 inproceedings Sparse2015 \n",
"\n",
" title \\\n",
"0 Clustering Multi-Modal Connectomes \n",
- "1 A Biological Implementation of Lifelong Learni... \n",
- "2 Processing and Analyzing Terascale Conjugate A... \n",
- "3 NeuroData's Open Data Cloud Ecosystem \n",
- "4 Forest Packing: Fast Parallel Decision Forests \n",
+ "1 The Open Synaptome Project: Toward a Microscop... \n",
+ "2 X-Brain: Quantifying Mesoscale Neuroanatomy Us... \n",
+ "3 Optimal Design for Discovery Science: Applicat... \n",
+ "4 A Sparse High Dimensional State-Space Model wi... \n",
"\n",
" author \\\n",
"0 Chung, Jaewon and Pedigo, Benjamin D. and Prie... \n",
- "1 Vogelstein, Joshua T. and Helm, Hayden and Ped... \n",
- "2 Baden, Alex and Perlman, Eric and Collman, For... \n",
- "3 Falk, Benjamin and Vogelstein, Joshua T. \n",
- "4 Browne, James and Mhembere, Disa and Tomita, T... \n",
+ "1 Smith, Stephen J. and Burns, Randal and Chevil... \n",
+ "2 Deyer, Eva L. and Fernandes, Hugo L. and Ronca... \n",
+ "3 Wang, Shangsi and Yang, Zhi and Zuo, Xi-Nian a... \n",
+ "4 Chen, Shaojie and Liu, Kai and Yuguang, Yang a... \n",
"\n",
- " author+an year keywords \\\n",
- "0 1=trainee;2=trainee;4=highlight 2019 abspos \n",
- "1 2=trainee;3=trainee;4=trainee;1=highlight 2020 abspos \n",
- "2 1=trainee;5=highlight 2017 abspos \n",
- "3 2=highlight 2019 abspos \n",
- "4 1=trainee;2=trainee;3=trainee;4=highlight 2019 abspos \n",
+ " author+an year keywords \\\n",
+ "0 1=trainee;2=trainee;4=highlight 2019 abspos \n",
+ "1 8=highlight 2015 abspos \n",
+ "2 5=highlight;3=trainee 2015 abspos \n",
+ "3 1=trainee;7=highlight 2015 abspos \n",
+ "4 1=trainee;7=highlight 2015 abspos \n",
"\n",
" url month \\\n",
"0 https://figshare.com/articles/Clustering_Multi... 6 \n",
- "1 11 \n",
- "2 https://neurodata.io/talks/berlin_2017.pdf \n",
- "3 https://neurodata.io/talks/25_NeuroDatas_Open_... 7 \n",
- "4 https://figshare.com/articles/Forest_Packing_F... 5 \n",
+ "1 https://figshare.com/articles/Open_Synaptome_P... 10 \n",
+ "2 https://figshare.com/articles/X_Brain_Quantify... NaN \n",
+ "3 https://figshare.com/articles/Optimal_Design_f... NaN \n",
+ "4 https://figshare.com/articles/A_Sparse_High_Di... NaN \n",
"\n",
- " address ... elocation-id \\\n",
- "0 OHBM, Rome Italy ... \n",
- "1 NAIsys, Cold Spring Harbor, NY, USA ... \n",
- "2 Berlin, Germany ... \n",
- "3 Harvard University, Cambridge, MA, USA ... \n",
- "4 SIAM International Conference on Data Mining, ... ... \n",
+ " address ... adsnote urldate tag \\\n",
+ "0 OHBM, Rome Italy ... NaN NaN NaN \n",
+ "1 Society for Neuroscience, Chicago, IL, USA ... NaN NaN NaN \n",
+ "2 Figshare ... NaN NaN NaN \n",
+ "3 Figshare ... NaN NaN NaN \n",
+ "4 Figshare ... NaN NaN NaN \n",
"\n",
- " urldate tag howpublished note day annotation acmid articleno issue_date \n",
- "0 \n",
- "1 \n",
- "2 \n",
- "3 \n",
- "4 \n",
+ " howpublished annotation acmid articleno issue_date note day \n",
+ "0 NaN NaN NaN NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN NaN NaN NaN \n",
+ "3 NaN NaN NaN NaN NaN NaN NaN \n",
+ "4 NaN NaN NaN NaN NaN NaN NaN \n",
"\n",
"[5 rows x 51 columns]"
]
@@ -513,16 +441,16 @@
" \n",
@@ -539,112 +467,112 @@
" 6 | \n",
" OHBM, Rome Italy | \n",
" ... | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" \n",
" \n",
" 1 | \n",
" inproceedings | \n",
- " Allen2015synaptome | \n",
- " The Open Synaptome Project: Toward a Microscop... | \n",
- " Smith, Stephen J. and Burns, Randal and Chevil... | \n",
- " 8=highlight | \n",
- " 2015 | \n",
+ " ThomasNeuro2020 | \n",
+ " Automated Neuron Tracing of Sparse Fluorescent... | \n",
+ " Athey, Thomas L and Sulam, Jeremias and Vogels... | \n",
+ " 3=highlight; 1=trainee | \n",
+ " 2020 | \n",
" abspos | \n",
- " https://figshare.com/articles/Open_Synaptome_P... | \n",
- " 10 | \n",
- " Society for Neuroscience, Chicago, IL, USA | \n",
+ " NaN | \n",
+ " 11 | \n",
+ " Neuromatch 3 | \n",
" ... | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
" 2 | \n",
" inproceedings | \n",
- " XBrain2015 | \n",
- " X-Brain: Quantifying Mesoscale Neuroanatomy Us... | \n",
- " Deyer, Eva L. and Fernandes, Hugo L. and Ronca... | \n",
- " 5=highlight;3=trainee | \n",
- " 2015 | \n",
+ " hayden_naisys_2020 | \n",
+ " A Biological Implementation of Lifelong Learni... | \n",
+ " Vogelstein, Joshua T. and Helm, Hayden and Ped... | \n",
+ " 2=trainee;3=trainee;4=trainee;1=highlight | \n",
+ " 2020 | \n",
" abspos | \n",
- " https://figshare.com/articles/X_Brain_Quantify... | \n",
- " | \n",
- " Figshare | \n",
+ " NaN | \n",
+ " 11 | \n",
+ " NAIsys, Cold Spring Harbor, NY, USA | \n",
" ... | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
" 3 | \n",
" inproceedings | \n",
- " Design2015 | \n",
- " Optimal Design for Discovery Science: Applicat... | \n",
- " Wang, Shangsi and Yang, Zhi and Zuo, Xi-Nian a... | \n",
- " 1=trainee;7=highlight | \n",
- " 2015 | \n",
+ " berlin_2017 | \n",
+ " Processing and Analyzing Terascale Conjugate A... | \n",
+ " Baden, Alex and Perlman, Eric and Collman, For... | \n",
+ " 1=trainee;5=highlight | \n",
+ " 2017 | \n",
" abspos | \n",
- " https://figshare.com/articles/Optimal_Design_f... | \n",
- " | \n",
- " Figshare | \n",
+ " https://neurodata.io/talks/berlin_2017.pdf | \n",
+ " NaN | \n",
+ " Berlin, Germany | \n",
" ... | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
" 4 | \n",
" inproceedings | \n",
- " Sparse2015 | \n",
- " A Sparse High Dimensional State-Space Model wi... | \n",
- " Chen, Shaojie and Liu, Kai and Yuguang, Yang a... | \n",
- " 1=trainee;7=highlight | \n",
- " 2015 | \n",
+ " falk_open_data2019 | \n",
+ " NeuroData's Open Data Cloud Ecosystem | \n",
+ " Falk, Benjamin and Vogelstein, Joshua T. | \n",
+ " 2=highlight | \n",
+ " 2019 | \n",
" abspos | \n",
- " https://figshare.com/articles/A_Sparse_High_Di... | \n",
- " | \n",
- " Figshare | \n",
+ " https://neurodata.io/talks/25_NeuroDatas_Open_... | \n",
+ " 7 | \n",
+ " Harvard University, Cambridge, MA, USA | \n",
" ... | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
"
\n",
" \n",
"\n",
@@ -654,52 +582,52 @@
"text/plain": [
" type alias \\\n",
"0 inproceedings j12019 \n",
- "1 inproceedings Allen2015synaptome \n",
- "2 inproceedings XBrain2015 \n",
- "3 inproceedings Design2015 \n",
- "4 inproceedings Sparse2015 \n",
+ "1 inproceedings ThomasNeuro2020 \n",
+ "2 inproceedings hayden_naisys_2020 \n",
+ "3 inproceedings berlin_2017 \n",
+ "4 inproceedings falk_open_data2019 \n",
"\n",
" title \\\n",
"0 Clustering Multi-Modal Connectomes \n",
- "1 The Open Synaptome Project: Toward a Microscop... \n",
- "2 X-Brain: Quantifying Mesoscale Neuroanatomy Us... \n",
- "3 Optimal Design for Discovery Science: Applicat... \n",
- "4 A Sparse High Dimensional State-Space Model wi... \n",
+ "1 Automated Neuron Tracing of Sparse Fluorescent... \n",
+ "2 A Biological Implementation of Lifelong Learni... \n",
+ "3 Processing and Analyzing Terascale Conjugate A... \n",
+ "4 NeuroData's Open Data Cloud Ecosystem \n",
"\n",
" author \\\n",
"0 Chung, Jaewon and Pedigo, Benjamin D. and Prie... \n",
- "1 Smith, Stephen J. and Burns, Randal and Chevil... \n",
- "2 Deyer, Eva L. and Fernandes, Hugo L. and Ronca... \n",
- "3 Wang, Shangsi and Yang, Zhi and Zuo, Xi-Nian a... \n",
- "4 Chen, Shaojie and Liu, Kai and Yuguang, Yang a... \n",
+ "1 Athey, Thomas L and Sulam, Jeremias and Vogels... \n",
+ "2 Vogelstein, Joshua T. and Helm, Hayden and Ped... \n",
+ "3 Baden, Alex and Perlman, Eric and Collman, For... \n",
+ "4 Falk, Benjamin and Vogelstein, Joshua T. \n",
"\n",
- " author+an year keywords \\\n",
- "0 1=trainee;2=trainee;4=highlight 2019 abspos \n",
- "1 8=highlight 2015 abspos \n",
- "2 5=highlight;3=trainee 2015 abspos \n",
- "3 1=trainee;7=highlight 2015 abspos \n",
- "4 1=trainee;7=highlight 2015 abspos \n",
+ " author+an year keywords \\\n",
+ "0 1=trainee;2=trainee;4=highlight 2019 abspos \n",
+ "1 3=highlight; 1=trainee 2020 abspos \n",
+ "2 2=trainee;3=trainee;4=trainee;1=highlight 2020 abspos \n",
+ "3 1=trainee;5=highlight 2017 abspos \n",
+ "4 2=highlight 2019 abspos \n",
"\n",
" url month \\\n",
"0 https://figshare.com/articles/Clustering_Multi... 6 \n",
- "1 https://figshare.com/articles/Open_Synaptome_P... 10 \n",
- "2 https://figshare.com/articles/X_Brain_Quantify... \n",
- "3 https://figshare.com/articles/Optimal_Design_f... \n",
- "4 https://figshare.com/articles/A_Sparse_High_Di... \n",
+ "1 NaN 11 \n",
+ "2 NaN 11 \n",
+ "3 https://neurodata.io/talks/berlin_2017.pdf NaN \n",
+ "4 https://neurodata.io/talks/25_NeuroDatas_Open_... 7 \n",
"\n",
- " address ... elocation-id urldate tag \\\n",
- "0 OHBM, Rome Italy ... \n",
- "1 Society for Neuroscience, Chicago, IL, USA ... \n",
- "2 Figshare ... \n",
- "3 Figshare ... \n",
- "4 Figshare ... \n",
+ " address ... adsnote urldate tag \\\n",
+ "0 OHBM, Rome Italy ... NaN NaN NaN \n",
+ "1 Neuromatch 3 ... NaN NaN NaN \n",
+ "2 NAIsys, Cold Spring Harbor, NY, USA ... NaN NaN NaN \n",
+ "3 Berlin, Germany ... NaN NaN NaN \n",
+ "4 Harvard University, Cambridge, MA, USA ... NaN NaN NaN \n",
"\n",
- " howpublished note day annotation acmid articleno issue_date \n",
- "0 \n",
- "1 \n",
- "2 \n",
- "3 \n",
- "4 \n",
+ " howpublished annotation acmid articleno issue_date note day \n",
+ "0 NaN NaN NaN NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN NaN NaN NaN \n",
+ "3 NaN NaN NaN NaN NaN NaN NaN \n",
+ "4 NaN NaN NaN NaN NaN NaN NaN \n",
"\n",
"[5 rows x 51 columns]"
]
diff --git a/pandarize/__init__.py b/pandarize/__init__.py
index d85b28e..7d53ea3 100644
--- a/pandarize/__init__.py
+++ b/pandarize/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.6"
\ No newline at end of file
+__version__ = "0.0.7"
\ No newline at end of file
diff --git a/pandarize/_util.py b/pandarize/_util.py
index 0e26a18..fa83f29 100644
--- a/pandarize/_util.py
+++ b/pandarize/_util.py
@@ -44,18 +44,46 @@ def rfindall(string, pattern):
return indexes
-def bib_parser(raw):
+def rfindall_matched(string, pattern, key):
+ '''Find all indices of the match pattern w.r.t to the key value
+
+ E.g., the function returns [5] when attempts to find
+ pattern ({abc}) in the string '123{abc}def' w.r.t. the key (b)
+
+ Args:
+ -----
+ string : string; string to be searched
+ pattern : regex; regex pattern to be searched in string
+ key : string; a character from the string
+
+ Returns:
+ out : list; returns a list of integers for each index
+ '''
+ match_index = []
+ for match in re.finditer(pattern, string):
+ match_index.append(match.start() + match.group().rfind(key))
+ return match_index
+
+def bib_preprocessing(raw):
+ '''Pre-processes raw bib file'''
+
+ raw = raw.replace('\n', '').replace('\r', '') #remove linebreaks and linefeed
+ raw = re.sub(' +', ' ', raw) #contract whitespace
+
+ return raw
+
+def bib_parser(raw, idxkey):
'''Main bib parsing logic'''
all_lst = []
lst = []
start = None
standby = None
- raw = raw.replace('\n', '').replace('\r', '') #remove linebreaks and linefeed
- raw = re.sub(' +', ' ', raw) #contract whitespace
-
for i, c in enumerate(raw):
if c == '@':
+ if not i in idxkey: #skip if not true start
+ continue
+
if lst:
# fixes cases when extra comma is added to the last key:value item
fix = raw[curr_idx:last_pair-2] + raw[last_pair-2:last_pair+1].replace(',', '')
@@ -111,12 +139,14 @@ def _itemize_bib(lst):
dic['type'] = s[ii:jj].replace('@', '')
dic['alias'] = s[jj:kk].replace('{', '')
else:
- ii = sorted(rfindall(s, '='))[0]
- if s[-1] == ',':
- s = s[:-1]
- out = LatexNodes2Text().latex_to_text(s[ii+1:]).strip()
- dic[s[:ii].strip()] = out
-
+ if s:
+ # print(s, sorted(rfindall(s, '=')))
+ ii = sorted(rfindall(s, '='))[0]
+ if s[-1] == ',':
+ s = s[:-1]
+ out = LatexNodes2Text().latex_to_text(s[ii+1:]).strip()
+ dic[s[:ii].strip()] = out
+
for i in lst:
new_lst.append(LatexNodes2Text().latex_to_text(i))
@@ -276,7 +306,7 @@ def parse(row, types=types, alias=alias):
for i in items:
out_text += i
out_text = out_text[:-2] #remove last comma
- out_text += '\n}\n'
+ out_text += '\n},\n'
return out_text
@@ -286,7 +316,10 @@ def parse(row, types=types, alias=alias):
out = stamper(target='bib')
for i in range(N):
- out += parse(df.iloc[i,:]) + '\n'
+ if i == N-1: #remove the very last comma
+ out += parse(df.iloc[i,:])[:-3] + parse(df.iloc[i,:])[-3:].replace(',', '') + '\n'
+ else:
+ out += parse(df.iloc[i,:]) + '\n'
if not os.path.exists(path=dirs):
os.mkdir(path=dirs)
diff --git a/pandarize/frame.py b/pandarize/frame.py
index 106aa1a..cfa42e8 100644
--- a/pandarize/frame.py
+++ b/pandarize/frame.py
@@ -5,17 +5,20 @@ class Pandarizer:
def __init__(self):
self.raw = None
self.df = None
+ self.idxkey = None
def load(self, source=None, savefile=None):
'''Loads raw data from either local file or the url
'''
self.raw = source_loader(source=source, savefile=savefile)
+ self.raw = bib_preprocessing(raw=self.raw)
+ self.idxkey = rfindall_matched(self.raw, r'[.*]?@[^}]*{*[,]', '@')
def fit(self, kind='bib'):
'''Method that infers data structure (in the future)
'''
if kind == 'bib':
- self.df = bib_parser(raw=self.raw)
+ self.df = bib_parser(raw=self.raw, idxkey=self.idxkey)
def transform(self, formats='bib', types=None, alias=None, dirs=None):
'''Transform loaded data into a specified data type