Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion news/1 Enhancements/4677.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
Add preliminary support for viewing dataframes.
Add support for viewing dataframes, lists, dicts, nparrays.
9 changes: 9 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2397,6 +2397,7 @@
"mocha-junit-reporter": "^1.17.0",
"mocha-multi-reporters": "^1.1.7",
"node-has-native-dependencies": "^1.0.2",
"node-html-parser": "^1.1.13",
"nyc": "^13.3.0",
"raw-loader": "^0.5.1",
"react": "^16.5.2",
Expand Down
5 changes: 3 additions & 2 deletions package.nls.json
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,8 @@
"DataScience.dataExplorerInvalidVariableFormat" : "'{0}' is not an active variable.",
"DataScience.jupyterGetVariablesExecutionError" : "Failure during variable extraction:\r\n{0}",
"DataScience.loadingMessage" : "loading ...",
"DataScience.noRowsInDataExplorer" : "Fetching data ...",
"DataScience.noRowsInDataViewer" : "Fetching data ...",
"DataScience.pandasTooOldForViewingFormat" : "Python package 'pandas' is version {0}. Version 0.20 or greater is required for viewing data.",
"DataScience.pandasRequiredForViewing" : "Python package 'pandas' is required for viewing data."
"DataScience.pandasRequiredForViewing" : "Python package 'pandas' is required for viewing data.",
"DataScience.valuesColumn": "values"
}
54 changes: 34 additions & 20 deletions pythonFiles/datascience/getJupyterVariableDataFrameInfo.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,35 @@
# Query Jupyter server for the info about a dataframe
import json as _VSCODE_json
import pandas as _VSCODE_pd

# In IJupyterVariables.getValue this '_VSCode_JupyterTestValue' will be replaced with the json stringified value of the target variable
# Indexes off of _VSCODE_targetVariable need to index types that are part of IJupyterVariable
_VSCODE_targetVariable = _VSCODE_json.loads('_VSCode_JupyterTestValue')
_VSCODE_evalResult = eval(_VSCODE_targetVariable['name'])

# First list out the columns of the data frame (assuming it is one for now)
_VSCODE_columnTypes = list(_VSCODE_evalResult.dtypes)
_VSCODE_columnNames = list(_VSCODE_evalResult)
_VSCODE_columnTypes = []
_VSCODE_columnNames = []
if _VSCODE_targetVariable['type'] == 'list':
_VSCODE_columnTypes = ['string'] # Might be able to be more specific here?
_VSCODE_columnNames = ['_VSCode_JupyterValuesColumn']
elif _VSCODE_targetVariable['type'] == 'Series':
_VSCODE_evalResult = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
_VSCODE_columnTypes = list(_VSCODE_evalResult.dtypes)
_VSCODE_columnNames = list(_VSCODE_evalResult)
elif _VSCODE_targetVariable['type'] == 'dict':
_VSCODE_evalResult = _VSCODE_pd.Series(_VSCODE_evalResult)
_VSCODE_evalResult = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
_VSCODE_columnTypes = list(_VSCODE_evalResult.dtypes)
_VSCODE_columnNames = list(_VSCODE_evalResult)
elif _VSCODE_targetVariable['type'] == 'ndarray':
_VSCODE_evalResult = _VSCODE_pd.Series(_VSCODE_evalResult)
_VSCODE_evalResult = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
_VSCODE_columnTypes = list(_VSCODE_evalResult.dtypes)
_VSCODE_columnNames = list(_VSCODE_evalResult)
elif _VSCODE_targetVariable['type'] == 'DataFrame':
_VSCODE_columnTypes = list(_VSCODE_evalResult.dtypes)
_VSCODE_columnNames = list(_VSCODE_evalResult)

# Make sure we have an index column (see code in getJupyterVariableDataFrameRows.py)
if 'index' not in _VSCODE_columnNames:
Expand All @@ -17,13 +38,13 @@

# Then loop and generate our output json
_VSCODE_columns = []
for n in range(0, len(_VSCODE_columnNames)):
c = _VSCODE_columnNames[n]
t = _VSCODE_columnTypes[n]
for _VSCODE_n in range(0, len(_VSCODE_columnNames)):
_VSCODE_column_name = _VSCODE_columnNames[_VSCODE_n]
_VSCODE_column_type = _VSCODE_columnTypes[_VSCODE_n]
_VSCODE_colobj = {}
_VSCODE_colobj['key'] = c
_VSCODE_colobj['name'] = c
_VSCODE_colobj['type'] = str(t)
_VSCODE_colobj['key'] = _VSCODE_column_name
_VSCODE_colobj['name'] = _VSCODE_column_name
_VSCODE_colobj['type'] = str(_VSCODE_column_type)
_VSCODE_columns.append(_VSCODE_colobj)

del _VSCODE_columnNames
Expand All @@ -33,20 +54,13 @@
_VSCODE_targetVariable['columns'] = _VSCODE_columns
del _VSCODE_columns

# Figure out shape if not already there
if 'shape' not in _VSCODE_targetVariable:
_VSCODE_targetVariable['shape'] = str(_VSCODE_evalResult.shape)

# Row count is actually embedded in shape. Should be the second number
import re as _VSCODE_re
_VSCODE_regex = r"\(\s*(\d+),\s*(\d+)\s*\)"
_VSCODE_matches = _VSCODE_re.search(_VSCODE_regex, _VSCODE_targetVariable['shape'])
if (_VSCODE_matches):
_VSCODE_targetVariable['rowCount'] = int(_VSCODE_matches[1])
del _VSCODE_matches
# Figure out shape if not already there. Use the shape to compute the row count
if (hasattr(_VSCODE_evalResult, "shape")):
_VSCODE_targetVariable['rowCount'] = _VSCODE_evalResult.shape[0]
elif _VSCODE_targetVariable['type'] == 'list':
_VSCODE_targetVariable['rowCount'] = len(_VSCODE_evalResult)
else:
_VSCODE_targetVariable['rowCount'] = 0
del _VSCODE_regex

# Transform this back into a string
print(_VSCODE_json.dumps(_VSCODE_targetVariable))
20 changes: 19 additions & 1 deletion pythonFiles/datascience/getJupyterVariableDataFrameRows.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Query Jupyter server for the rows of a data frame
import json as _VSCODE_json
import pandas as _VSCODE_pd
import pandas.io.json as _VSCODE_pd_json

# In IJupyterVariables.getValue this '_VSCode_JupyterTestValue' will be replaced with the json stringified value of the target variable
Expand All @@ -12,12 +13,29 @@
_VSCODE_startRow = max(_VSCode_JupyterStartRow, 0)
_VSCODE_endRow = min(_VSCode_JupyterEndRow, _VSCODE_targetVariable['rowCount'])

# Assume we have a dataframe. If not, turn our eval result into a dataframe
_VSCODE_df = _VSCODE_evalResult
if (_VSCODE_targetVariable['type'] == 'list'):
_VSCODE_df = _VSCODE_pd.DataFrame({'_VSCode_JupyterValuesColumn':_VSCODE_evalResult})
elif (_VSCODE_targetVariable['type'] == 'Series'):
_VSCODE_df = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
elif _VSCODE_targetVariable['type'] == 'dict':
_VSCODE_evalResult = _VSCODE_pd.Series(_VSCODE_evalResult)
_VSCODE_df = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
elif _VSCODE_targetVariable['type'] == 'ndarray':
_VSCODE_evalResult = _VSCODE_pd.Series(_VSCODE_evalResult)
_VSCODE_df = _VSCODE_pd.Series.to_frame(_VSCODE_evalResult)
# If not a known type, then just let pandas handle it.
elif not (hasattr(_VSCODE_df, 'iloc')):
_VSCODE_df = _VSCODE_pd.DataFrame(_VSCODE_evalResult)

# Turn into JSON using pandas. We use pandas because it's about 3 orders of magnitude faster to turn into JSON
_VSCODE_rows = df.iloc[_VSCODE_startRow:_VSCODE_endRow]
_VSCODE_rows = _VSCODE_df.iloc[_VSCODE_startRow:_VSCODE_endRow]
_VSCODE_result = _VSCODE_pd_json.to_json(None, _VSCODE_rows, orient='table', date_format='iso')
print(_VSCODE_result)

# Cleanup our variables
del _VSCODE_df
del _VSCODE_endRow
del _VSCODE_startRow
del _VSCODE_rows
Expand Down
Loading