From dc611733e0a6a37214613acc4d71ac3a0d4764f8 Mon Sep 17 00:00:00 2001 From: Rich Chiodo Date: Mon, 24 Feb 2020 16:18:50 -0800 Subject: [PATCH 1/2] Support opening spark data frames in the data viewer --- news/2 Fixes/9959.md | 1 + .../getJupyterVariableDataFrameInfo.py | 32 ++++++++++++------- .../getJupyterVariableDataFrameRows.py | 2 ++ 3 files changed, 23 insertions(+), 12 deletions(-) create mode 100644 news/2 Fixes/9959.md diff --git a/news/2 Fixes/9959.md b/news/2 Fixes/9959.md new file mode 100644 index 000000000000..40f534911697 --- /dev/null +++ b/news/2 Fixes/9959.md @@ -0,0 +1 @@ +Support opening spark dataframes in the data viewer. \ No newline at end of file diff --git a/pythonFiles/datascience/getJupyterVariableDataFrameInfo.py b/pythonFiles/datascience/getJupyterVariableDataFrameInfo.py index 2447132ab8bd..a3cf90a0d4a0 100644 --- a/pythonFiles/datascience/getJupyterVariableDataFrameInfo.py +++ b/pythonFiles/datascience/getJupyterVariableDataFrameInfo.py @@ -10,6 +10,22 @@ # Indexes off of _VSCODE_targetVariable need to index types that are part of IJupyterVariable _VSCODE_targetVariable = _VSCODE_json.loads("""_VSCode_JupyterTestValue""") +# Function to compute row count for a value +def getRowCount(var): + if hasattr(var, "shape"): + try: + # Get a bit more restrictive with exactly what we want to count as a shape, since anything can define it + if isinstance(var.shape, tuple): + return var.shape[0] + except TypeError: + return 0 + elif hasattr(var, "__len__"): + try: + return len(var) + except TypeError: + return 0 + + # First check to see if we are a supported type, this prevents us from adding types that are not supported # and also keeps our types in sync with what the variable explorer says that we support if _VSCODE_targetVariable["type"] not in _VSCode_supportsDataExplorer: @@ -21,18 +37,7 @@ _VSCODE_evalResult = eval(_VSCODE_targetVariable["name"]) # Figure out shape if not already there. Use the shape to compute the row count - if hasattr(_VSCODE_evalResult, "shape"): - try: - # Get a bit more restrictive with exactly what we want to count as a shape, since anything can define it - if isinstance(_VSCODE_evalResult.shape, tuple): - _VSCODE_targetVariable["rowCount"] = _VSCODE_evalResult.shape[0] - except TypeError: - _VSCODE_targetVariable["rowCount"] = 0 - elif hasattr(_VSCODE_evalResult, "__len__"): - try: - _VSCODE_targetVariable["rowCount"] = len(_VSCODE_evalResult) - except TypeError: - _VSCODE_targetVariable["rowCount"] = 0 + _VSCODE_targetVariable["rowCount"] = getRowCount(_VSCODE_evalResult) # Turn the eval result into a df _VSCODE_df = _VSCODE_evalResult @@ -45,6 +50,9 @@ _VSCODE_df = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult) elif _VSCODE_targetVariable["type"] == "ndarray": _VSCODE_df = _VSCODE_pd.DataFrame(_VSCODE_evalResult) + elif hasattr(_VSCODE_df, "toPandas"): + _VSCODE_df = _VSCODE_df.toPandas() + _VSCODE_targetVariable["rowCount"] = getRowCount(_VSCODE_df) # If any rows, use pandas json to convert a single row to json. Extract # the column names and types from the json so we match what we'll fetch when diff --git a/pythonFiles/datascience/getJupyterVariableDataFrameRows.py b/pythonFiles/datascience/getJupyterVariableDataFrameRows.py index 7bf647f652ae..697cc14ad1b6 100644 --- a/pythonFiles/datascience/getJupyterVariableDataFrameRows.py +++ b/pythonFiles/datascience/getJupyterVariableDataFrameRows.py @@ -24,6 +24,8 @@ _VSCODE_df = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult) elif _VSCODE_targetVariable["type"] == "ndarray": _VSCODE_df = _VSCODE_pd.DataFrame(_VSCODE_evalResult) +elif hasattr(_VSCODE_df, "toPandas"): + _VSCODE_df = _VSCODE_df.toPandas() # If not a known type, then just let pandas handle it. elif not (hasattr(_VSCODE_df, "iloc")): _VSCODE_df = _VSCODE_pd.DataFrame(_VSCODE_evalResult) From 99d0599d9b8cd069a180d5fd0e15560e6ae2591a Mon Sep 17 00:00:00 2001 From: Rich Chiodo Date: Mon, 24 Feb 2020 16:33:03 -0800 Subject: [PATCH 2/2] Review feedback --- pythonFiles/datascience/getJupyterVariableDataFrameInfo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pythonFiles/datascience/getJupyterVariableDataFrameInfo.py b/pythonFiles/datascience/getJupyterVariableDataFrameInfo.py index a3cf90a0d4a0..6a2a2bba9a0f 100644 --- a/pythonFiles/datascience/getJupyterVariableDataFrameInfo.py +++ b/pythonFiles/datascience/getJupyterVariableDataFrameInfo.py @@ -11,7 +11,7 @@ _VSCODE_targetVariable = _VSCODE_json.loads("""_VSCode_JupyterTestValue""") # Function to compute row count for a value -def getRowCount(var): +def _VSCODE_getRowCount(var): if hasattr(var, "shape"): try: # Get a bit more restrictive with exactly what we want to count as a shape, since anything can define it @@ -37,7 +37,7 @@ def getRowCount(var): _VSCODE_evalResult = eval(_VSCODE_targetVariable["name"]) # Figure out shape if not already there. Use the shape to compute the row count - _VSCODE_targetVariable["rowCount"] = getRowCount(_VSCODE_evalResult) + _VSCODE_targetVariable["rowCount"] = _VSCODE_getRowCount(_VSCODE_evalResult) # Turn the eval result into a df _VSCODE_df = _VSCODE_evalResult @@ -52,7 +52,7 @@ def getRowCount(var): _VSCODE_df = _VSCODE_pd.DataFrame(_VSCODE_evalResult) elif hasattr(_VSCODE_df, "toPandas"): _VSCODE_df = _VSCODE_df.toPandas() - _VSCODE_targetVariable["rowCount"] = getRowCount(_VSCODE_df) + _VSCODE_targetVariable["rowCount"] = _VSCODE_getRowCount(_VSCODE_df) # If any rows, use pandas json to convert a single row to json. Extract # the column names and types from the json so we match what we'll fetch when