From dc611733e0a6a37214613acc4d71ac3a0d4764f8 Mon Sep 17 00:00:00 2001
From: Rich Chiodo <rchiodo@microsoft.com>
Date: Mon, 24 Feb 2020 16:18:50 -0800
Subject: [PATCH 1/2] Support opening spark data frames in the data viewer

---
 news/2 Fixes/9959.md                          |  1 +
 .../getJupyterVariableDataFrameInfo.py        | 32 ++++++++++++-------
 .../getJupyterVariableDataFrameRows.py        |  2 ++
 3 files changed, 23 insertions(+), 12 deletions(-)
 create mode 100644 news/2 Fixes/9959.md

diff --git a/news/2 Fixes/9959.md b/news/2 Fixes/9959.md
new file mode 100644
index 000000000000..40f534911697
--- /dev/null
+++ b/news/2 Fixes/9959.md	
@@ -0,0 +1 @@
+Support opening spark dataframes in the data viewer.
\ No newline at end of file
diff --git a/pythonFiles/datascience/getJupyterVariableDataFrameInfo.py b/pythonFiles/datascience/getJupyterVariableDataFrameInfo.py
index 2447132ab8bd..a3cf90a0d4a0 100644
--- a/pythonFiles/datascience/getJupyterVariableDataFrameInfo.py
+++ b/pythonFiles/datascience/getJupyterVariableDataFrameInfo.py
@@ -10,6 +10,22 @@
 # Indexes off of _VSCODE_targetVariable need to index types that are part of IJupyterVariable
 _VSCODE_targetVariable = _VSCODE_json.loads("""_VSCode_JupyterTestValue""")
 
+# Function to compute row count for a value
+def getRowCount(var):
+    if hasattr(var, "shape"):
+        try:
+            # Get a bit more restrictive with exactly what we want to count as a shape, since anything can define it
+            if isinstance(var.shape, tuple):
+                return var.shape[0]
+        except TypeError:
+            return 0
+    elif hasattr(var, "__len__"):
+        try:
+            return len(var)
+        except TypeError:
+            return 0
+
+
 # First check to see if we are a supported type, this prevents us from adding types that are not supported
 # and also keeps our types in sync with what the variable explorer says that we support
 if _VSCODE_targetVariable["type"] not in _VSCode_supportsDataExplorer:
@@ -21,18 +37,7 @@
     _VSCODE_evalResult = eval(_VSCODE_targetVariable["name"])
 
     # Figure out shape if not already there. Use the shape to compute the row count
-    if hasattr(_VSCODE_evalResult, "shape"):
-        try:
-            # Get a bit more restrictive with exactly what we want to count as a shape, since anything can define it
-            if isinstance(_VSCODE_evalResult.shape, tuple):
-                _VSCODE_targetVariable["rowCount"] = _VSCODE_evalResult.shape[0]
-        except TypeError:
-            _VSCODE_targetVariable["rowCount"] = 0
-    elif hasattr(_VSCODE_evalResult, "__len__"):
-        try:
-            _VSCODE_targetVariable["rowCount"] = len(_VSCODE_evalResult)
-        except TypeError:
-            _VSCODE_targetVariable["rowCount"] = 0
+    _VSCODE_targetVariable["rowCount"] = getRowCount(_VSCODE_evalResult)
 
     # Turn the eval result into a df
     _VSCODE_df = _VSCODE_evalResult
@@ -45,6 +50,9 @@
         _VSCODE_df = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
     elif _VSCODE_targetVariable["type"] == "ndarray":
         _VSCODE_df = _VSCODE_pd.DataFrame(_VSCODE_evalResult)
+    elif hasattr(_VSCODE_df, "toPandas"):
+        _VSCODE_df = _VSCODE_df.toPandas()
+        _VSCODE_targetVariable["rowCount"] = getRowCount(_VSCODE_df)
 
     # If any rows, use pandas json to convert a single row to json. Extract
     # the column names and types from the json so we match what we'll fetch when
diff --git a/pythonFiles/datascience/getJupyterVariableDataFrameRows.py b/pythonFiles/datascience/getJupyterVariableDataFrameRows.py
index 7bf647f652ae..697cc14ad1b6 100644
--- a/pythonFiles/datascience/getJupyterVariableDataFrameRows.py
+++ b/pythonFiles/datascience/getJupyterVariableDataFrameRows.py
@@ -24,6 +24,8 @@
     _VSCODE_df = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
 elif _VSCODE_targetVariable["type"] == "ndarray":
     _VSCODE_df = _VSCODE_pd.DataFrame(_VSCODE_evalResult)
+elif hasattr(_VSCODE_df, "toPandas"):
+    _VSCODE_df = _VSCODE_df.toPandas()
 # If not a known type, then just let pandas handle it.
 elif not (hasattr(_VSCODE_df, "iloc")):
     _VSCODE_df = _VSCODE_pd.DataFrame(_VSCODE_evalResult)

From 99d0599d9b8cd069a180d5fd0e15560e6ae2591a Mon Sep 17 00:00:00 2001
From: Rich Chiodo <rchiodo@microsoft.com>
Date: Mon, 24 Feb 2020 16:33:03 -0800
Subject: [PATCH 2/2] Review feedback

---
 pythonFiles/datascience/getJupyterVariableDataFrameInfo.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pythonFiles/datascience/getJupyterVariableDataFrameInfo.py b/pythonFiles/datascience/getJupyterVariableDataFrameInfo.py
index a3cf90a0d4a0..6a2a2bba9a0f 100644
--- a/pythonFiles/datascience/getJupyterVariableDataFrameInfo.py
+++ b/pythonFiles/datascience/getJupyterVariableDataFrameInfo.py
@@ -11,7 +11,7 @@
 _VSCODE_targetVariable = _VSCODE_json.loads("""_VSCode_JupyterTestValue""")
 
 # Function to compute row count for a value
-def getRowCount(var):
+def _VSCODE_getRowCount(var):
     if hasattr(var, "shape"):
         try:
             # Get a bit more restrictive with exactly what we want to count as a shape, since anything can define it
@@ -37,7 +37,7 @@ def getRowCount(var):
     _VSCODE_evalResult = eval(_VSCODE_targetVariable["name"])
 
     # Figure out shape if not already there. Use the shape to compute the row count
-    _VSCODE_targetVariable["rowCount"] = getRowCount(_VSCODE_evalResult)
+    _VSCODE_targetVariable["rowCount"] = _VSCODE_getRowCount(_VSCODE_evalResult)
 
     # Turn the eval result into a df
     _VSCODE_df = _VSCODE_evalResult
@@ -52,7 +52,7 @@ def getRowCount(var):
         _VSCODE_df = _VSCODE_pd.DataFrame(_VSCODE_evalResult)
     elif hasattr(_VSCODE_df, "toPandas"):
         _VSCODE_df = _VSCODE_df.toPandas()
-        _VSCODE_targetVariable["rowCount"] = getRowCount(_VSCODE_df)
+        _VSCODE_targetVariable["rowCount"] = _VSCODE_getRowCount(_VSCODE_df)
 
     # If any rows, use pandas json to convert a single row to json. Extract
     # the column names and types from the json so we match what we'll fetch when