Merge pull request #160 from pydanny/issue-142/implement-csv_to_df-fu…

…nction Add csv_to_df function
pydanny · Dec 1, 2023 · cffc5ec · cffc5ec
2 parents c545c64 + 7871ff8
commit cffc5ec
Show file tree

Hide file tree

Showing 5 changed files with 126 additions and 5 deletions.
diff --git a/docs/reference/plus.md b/docs/reference/plus.md
@@ -6,9 +6,12 @@ Here's the reference for the `Plus`  class, with all its parameters, attributes,
     options:
         show_source: true
         members:
+            - csv_to_df
             - diagram    
             - mermaid                            
             - model_graph
+            - model_graph_schema            
             - print
             - read_frame
-            - model_graph_schema
+
+
diff --git a/docs/usage.ipynb b/docs/usage.ipynb
@@ -13,7 +13,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "metadata": {
     "tags": [
      "remove-input",
@@ -43,7 +43,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -164,7 +164,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Data frames from QuerySets\n",
+    "## Dataframes from QuerySets\n",
     "\n",
     "_New in dj-notebook 0.3.0_\n",
     "\n",
@@ -409,7 +409,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -459,6 +459,83 @@
    "source": [
     "plus.print()"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Dataframes from CSVs\n",
+    "\n",
+    "_New in dj-notebook 0.7.0_\n",
+    "\n",
+    "This turns strings or files on defined paths into Dataframes.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Name</th>\n",
+       "      <th>FirstLetter</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Daniel</td>\n",
+       "      <td>D</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Audrey</td>\n",
+       "      <td>A</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     Name FirstLetter\n",
+       "0  Daniel           D\n",
+       "1  Audrey           A"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "csv_string = \"\"\"Name,FirstLetter\n",
+    "Daniel,D\n",
+    "Audrey,A\"\"\"\n",
+    "\n",
+    "# Also works with plus.csv_to_df(pathlib.path('path/to/data.csv'))\n",
+    "plus.csv_to_df(csv_string)"
+   ]
   }
  ],
  "metadata": {

diff --git a/src/dj_notebook/shell_plus.py b/src/dj_notebook/shell_plus.py
@@ -13,6 +13,8 @@
 
 
 import base64
+import io
+import pathlib
 import typing
 
 import IPython
@@ -143,6 +145,16 @@ def model_graph(self, model: django_models.Model, max_nodes: int = 20) -> None:
             )
         display_mermaid(output)
 
+    def csv_to_df(self, filepath_or_string: pathlib.Path | str) -> pd.DataFrame:
+        """Read a CSV file into a Pandas DataFrame."""
+        # Process as a Path object
+        if isinstance(filepath_or_string, pathlib.Path):
+            return pd.read_csv(filepath_or_string)
+
+        # Process as a string, which we convert to a filebuffer
+        buffer = io.StringIO(filepath_or_string)
+        return pd.read_csv(buffer)
+
 
 def get_node_for_model(graph, model: django_models.Model):
     try:

diff --git a/tests/sample.csv b/tests/sample.csv
@@ -0,0 +1,3 @@
+Name,Age,Weight
+A,1,100
+B,2,200
diff --git a/tests/test_dj_notebook.py b/tests/test_dj_notebook.py
@@ -4,6 +4,7 @@
 from unittest.mock import patch
 
 import django.conf
+import pandas
 import pytest
 from dj_notebook import Plus, activate
 from dj_notebook.shell_plus import DiagramClass
@@ -165,6 +166,31 @@ class to ensure it properly delegates to the
     assert result == "Mocked DataFrame"
 
 
+def test_csv_to_df():
+    """
+    Tests the `csv_to_df` method of the `Plus`
+    class to ensure it returns a CSV.
+
+    The test mocks this function to return "Mocked DataFrame"
+    and checks if the `Plus` method returns this when given a mock CSV.
+    """
+    plus_instance = Plus(helpers={})
+    csv_path = Path("tests/sample.csv")
+    with open(csv_path) as f:
+        csv_string = f.read()
+
+    result_from_string = plus_instance.csv_to_df(csv_string)
+    result_from_path = plus_instance.csv_to_df(csv_path)
+
+    # assert results are dataframes
+    assert isinstance(result_from_string, pandas.DataFrame)
+    assert isinstance(result_from_path, pandas.DataFrame)
+
+    # assert content is correct
+    assert result_from_string.at[0, "Name"] == "A"
+    assert result_from_path.at[0, "Name"] == "A"
+
+
 def test_warning_when_debug_false(capfd):
     """
     Test if the correct warning and message are displayed when DEBUG is False.