From 1320c227825c72a182ab3936a33d58ddae979553 Mon Sep 17 00:00:00 2001
From: Christopher Burr <christopher.burr@cern.ch>
Date: Mon, 30 Jul 2018 11:12:47 +0100
Subject: [PATCH 1/7] Add support for serialising categorical columns

---
 root_pandas/readwrite.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/root_pandas/readwrite.py b/root_pandas/readwrite.py
index c54604b..3acf4bf 100644
--- a/root_pandas/readwrite.py
+++ b/root_pandas/readwrite.py
@@ -353,12 +353,24 @@ def to_root(df, path, key='my_ttree', mode='w', store_index=True, *args, **kwarg
     from root_numpy import array2root
     # We don't want to modify the user's DataFrame here, so we make a shallow copy
     df_ = df.copy(deep=False)
+
     if store_index:
         name = df_.index.name
         if name is None:
             # Handle the case where the index has no name
             name = ''
         df_['__index__' + name] = df_.index
+
+    # Convert categorical columns into something root_numpy can serialise
+    for col in df.select_dtypes(['category']).columns:
+        name_components = ['__rpCaT', col, str(df[col].cat.ordered)] + df[col].cat.categories
+        if ['*' not in c for c in name_components]:
+            sep = '*'
+        else:
+            raise ValueError('Unable to find suitable separator for columns')
+        df_[sep.join(name_components)] = df[col].cat.codes
+        del df[col]
+
     arr = df_.to_records(index=False)
     array2root(arr, path, key, mode=mode, *args, **kwargs)
 

From fbc9c72cf0f85affa4e3e35aed7ea9f86c8c8e5f Mon Sep 17 00:00:00 2001
From: Christopher Burr <christopher.burr@cern.ch>
Date: Mon, 30 Jul 2018 11:24:04 +0100
Subject: [PATCH 2/7] Bugfix to previous commit

---
 root_pandas/readwrite.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/root_pandas/readwrite.py b/root_pandas/readwrite.py
index 3acf4bf..5e6a6ce 100644
--- a/root_pandas/readwrite.py
+++ b/root_pandas/readwrite.py
@@ -363,7 +363,8 @@ def to_root(df, path, key='my_ttree', mode='w', store_index=True, *args, **kwarg
 
     # Convert categorical columns into something root_numpy can serialise
     for col in df.select_dtypes(['category']).columns:
-        name_components = ['__rpCaT', col, str(df[col].cat.ordered)] + df[col].cat.categories
+        name_components = ['__rpCaT', col, str(df[col].cat.ordered)]
+        name_components.extend(df[col].cat.categories)
         if ['*' not in c for c in name_components]:
             sep = '*'
         else:

From d4f2ef4c193bbaf9e9b0a248d12ad34decf2d03f Mon Sep 17 00:00:00 2001
From: Christopher Burr <christopher.burr@cern.ch>
Date: Mon, 30 Jul 2018 11:24:42 +0100
Subject: [PATCH 3/7] Add support for loading categorical data

---
 root_pandas/readwrite.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/root_pandas/readwrite.py b/root_pandas/readwrite.py
index 5e6a6ce..2f5621e 100644
--- a/root_pandas/readwrite.py
+++ b/root_pandas/readwrite.py
@@ -6,6 +6,7 @@
 import numpy as np
 from numpy.lib.recfunctions import append_fields
 from pandas import DataFrame, RangeIndex
+import pandas as pd
 from root_numpy import root2array, list_trees
 import fnmatch
 from root_numpy import list_branches
@@ -312,6 +313,14 @@ def convert_to_dataframe(array, start_index=None):
         assert len(columns) == len(df.columns), (columns, df.columns)
         df = df.reindex_axis(columns, axis=1, copy=False)
 
+    # Convert categorical columns back to categories
+    for c in df.columns:
+        match = re.match(r'__rpCaT\*([^\*]\*(True|False)\*)', c)
+        if match:
+            real_name, ordered = match.groups
+            categories = c.split('*')[3:]
+            df[c] = pd.Categorical.from_codes(df[c], categories, ordered={'True': True, 'False': False}[ordered])
+
     return df
 
 

From 70bef7ab34a505aa8437f352b2ae4a238536e89a Mon Sep 17 00:00:00 2001
From: Christopher Burr <christopher.burr@cern.ch>
Date: Mon, 30 Jul 2018 11:30:40 +0100
Subject: [PATCH 4/7] Fix typos in variable names

---
 root_pandas/readwrite.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/root_pandas/readwrite.py b/root_pandas/readwrite.py
index 2f5621e..1f2e098 100644
--- a/root_pandas/readwrite.py
+++ b/root_pandas/readwrite.py
@@ -371,15 +371,15 @@ def to_root(df, path, key='my_ttree', mode='w', store_index=True, *args, **kwarg
         df_['__index__' + name] = df_.index
 
     # Convert categorical columns into something root_numpy can serialise
-    for col in df.select_dtypes(['category']).columns:
-        name_components = ['__rpCaT', col, str(df[col].cat.ordered)]
-        name_components.extend(df[col].cat.categories)
+    for col in df_.select_dtypes(['category']).columns:
+        name_components = ['__rpCaT', col, str(df_[col].cat.ordered)]
+        name_components.extend(df_[col].cat.categories)
         if ['*' not in c for c in name_components]:
             sep = '*'
         else:
             raise ValueError('Unable to find suitable separator for columns')
-        df_[sep.join(name_components)] = df[col].cat.codes
-        del df[col]
+        df_[sep.join(name_components)] = df_[col].cat.codes
+        del df_[col]
 
     arr = df_.to_records(index=False)
     array2root(arr, path, key, mode=mode, *args, **kwargs)

From ed96faafcf2663ccce3d8098360aebc95c6a0432 Mon Sep 17 00:00:00 2001
From: Christopher Burr <christopher.burr@cern.ch>
Date: Mon, 30 Jul 2018 11:41:40 +0100
Subject: [PATCH 5/7] Fix regex and maintian column order

---
 root_pandas/readwrite.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/root_pandas/readwrite.py b/root_pandas/readwrite.py
index 1f2e098..a6d2525 100644
--- a/root_pandas/readwrite.py
+++ b/root_pandas/readwrite.py
@@ -315,11 +315,12 @@ def convert_to_dataframe(array, start_index=None):
 
     # Convert categorical columns back to categories
     for c in df.columns:
-        match = re.match(r'__rpCaT\*([^\*]\*(True|False)\*)', c)
+        match = re.match(r'^__rpCaT\*([^\*]+\*(True|False)\*)', c)
         if match:
             real_name, ordered = match.groups
             categories = c.split('*')[3:]
             df[c] = pd.Categorical.from_codes(df[c], categories, ordered={'True': True, 'False': False}[ordered])
+            df.rename(index=str, columns={c: real_name}, inplace=True)
 
     return df
 
@@ -378,8 +379,8 @@ def to_root(df, path, key='my_ttree', mode='w', store_index=True, *args, **kwarg
             sep = '*'
         else:
             raise ValueError('Unable to find suitable separator for columns')
-        df_[sep.join(name_components)] = df_[col].cat.codes
-        del df_[col]
+        df_[col] = df_[col].cat.codes
+        df.rename(index=str, columns={col: sep.join(name_components)}, inplace=True)
 
     arr = df_.to_records(index=False)
     array2root(arr, path, key, mode=mode, *args, **kwargs)

From 22aaa45fed22aba0b5af49972841dbba3578ecca Mon Sep 17 00:00:00 2001
From: Christopher Burr <christopher.burr@cern.ch>
Date: Mon, 30 Jul 2018 11:56:32 +0100
Subject: [PATCH 6/7] And another typo

---
 root_pandas/readwrite.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/root_pandas/readwrite.py b/root_pandas/readwrite.py
index a6d2525..f453912 100644
--- a/root_pandas/readwrite.py
+++ b/root_pandas/readwrite.py
@@ -317,7 +317,7 @@ def convert_to_dataframe(array, start_index=None):
     for c in df.columns:
         match = re.match(r'^__rpCaT\*([^\*]+\*(True|False)\*)', c)
         if match:
-            real_name, ordered = match.groups
+            real_name, ordered = match.groups()
             categories = c.split('*')[3:]
             df[c] = pd.Categorical.from_codes(df[c], categories, ordered={'True': True, 'False': False}[ordered])
             df.rename(index=str, columns={c: real_name}, inplace=True)
@@ -380,7 +380,7 @@ def to_root(df, path, key='my_ttree', mode='w', store_index=True, *args, **kwarg
         else:
             raise ValueError('Unable to find suitable separator for columns')
         df_[col] = df_[col].cat.codes
-        df.rename(index=str, columns={col: sep.join(name_components)}, inplace=True)
+        df_.rename(index=str, columns={col: sep.join(name_components)}, inplace=True)
 
     arr = df_.to_records(index=False)
     array2root(arr, path, key, mode=mode, *args, **kwargs)

From 7546bfc46935a6ba6c497c2fce6a9aee542afeb9 Mon Sep 17 00:00:00 2001
From: Christopher Burr <christopher.burr@cern.ch>
Date: Mon, 30 Jul 2018 12:01:08 +0100
Subject: [PATCH 7/7] And another regex fix

---
 root_pandas/readwrite.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/root_pandas/readwrite.py b/root_pandas/readwrite.py
index f453912..bdd0966 100644
--- a/root_pandas/readwrite.py
+++ b/root_pandas/readwrite.py
@@ -315,7 +315,7 @@ def convert_to_dataframe(array, start_index=None):
 
     # Convert categorical columns back to categories
     for c in df.columns:
-        match = re.match(r'^__rpCaT\*([^\*]+\*(True|False)\*)', c)
+        match = re.match(r'^__rpCaT\*([^\*]+)\*(True|False)\*', c)
         if match:
             real_name, ordered = match.groups()
             categories = c.split('*')[3:]