diff --git a/hespi/main.py b/hespi/main.py
index 1e12677..960373a 100644
--- a/hespi/main.py
+++ b/hespi/main.py
@@ -32,7 +32,7 @@ def detect(
         help="Whether or not to do handwritten text recognition using Microsoft's TrOCR.",
     ),
     trocr_size: TrOCRSize = typer.Option(
-        TrOCRSize.BASE.value,
+        TrOCRSize.LARGE.value,
         help="The size of the TrOCR model to use for handwritten text recognition.",
         case_sensitive=False,
     ),
diff --git a/hespi/util.py b/hespi/util.py
index 207d8d4..33cfbc1 100644
--- a/hespi/util.py
+++ b/hespi/util.py
@@ -104,6 +104,7 @@ def ocr_data_df(data: dict, output_path: Path=None) -> pd.DataFrame:
         pd.DataFrame: The text recognition data as a Pandas dataframe
     """
     df = pd.DataFrame.from_dict(data, orient="index")
+    df = df.fillna(value="")
     df = df.reset_index().rename(columns={"index": "institutional label"})
     
     # Splitting the ocr_results columns into seperate original text, adjusted, and score