From 576dfd8058012aa9df5fe23f41bcfcb6975f1a84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaan=20L=C4=B1?= Date: Sun, 7 Apr 2024 07:07:34 -0400 Subject: [PATCH] feat: add notebook example --- notebooks/load_csv_with_data_types.ipynb | 1027 ++++++++++++++++++++++ 1 file changed, 1027 insertions(+) create mode 100644 notebooks/load_csv_with_data_types.ipynb diff --git a/notebooks/load_csv_with_data_types.ipynb b/notebooks/load_csv_with_data_types.ipynb new file mode 100644 index 0000000..04155be --- /dev/null +++ b/notebooks/load_csv_with_data_types.ipynb @@ -0,0 +1,1027 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/me/jaanli/synthetic-healthcare-data/.venv/lib/python3.12/site-packages/sql/traits.py:20: FutureWarning: named_parameters: boolean values are now deprecated. Value True will be treated as \"enabled\". \n", + "Please use a valid option: \"warn\", \"enabled\", or \"disabled\". \n", + "For more information, see the docs: https://jupysql.ploomber.io/en/latest/api/configuration.html#named-parameters\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "# Load duckdb, which lets us efficiently load large files\n", + "import duckdb\n", + "\n", + "# Load pandas, which lets us manipulate dataframes\n", + "import pandas as pd\n", + "\n", + "# Import jupysql Jupyter extension to create SQL cells\n", + "%load_ext sql\n", + "\n", + "# Set configrations on jupysql to directly output data to Pandas and to simplify the output that is printed to the notebook.\n", + "%config SqlMagic.autopandas = True\n", + "\n", + "%config SqlMagic.feedback = False\n", + "%config SqlMagic.displaycon = False\n", + "\n", + "# Allow named parameters (python variables) in SQL cells\n", + "%config SqlMagic.named_parameters=True\n", + "\n", + "# Connect jupysql to DuckDB using a SQLAlchemy-style connection string. Either connect to an in memory DuckDB, or a file backed db.\n", + "%sql duckdb:///:memory:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Success
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [Success]\n", + "Index: []" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql \n", + "\n", + "COPY (\n", + " SELECT\n", + " PERSON_ID::NUMERIC,\n", + " MCAID_BENE_ID::NUMERIC,\n", + " PERSON_WGHT::NUMERIC,\n", + " AGE_LOW::NUMERIC,\n", + " AGE_HIGH::NUMERIC,\n", + " SEX_IDENT_CD::VARCHAR,\n", + " RACE_CD::VARCHAR,\n", + " MCAID_SBMTTG_ST_CD::VARCHAR,\n", + " STATE_CD::VARCHAR,\n", + " COUNTY_FIPS_CD::VARCHAR,\n", + " ZIP_CD::VARCHAR,\n", + " RSN_ENRLMT_CD::VARCHAR,\n", + " MDCD_ENRLMT_1::NUMERIC,\n", + " MDCD_ENRLMT_2::VARCHAR,\n", + " MDCD_ENRLMT_3::VARCHAR,\n", + " MDCD_ENRLMT_4::VARCHAR,\n", + " MDCD_ENRLMT_5::VARCHAR,\n", + " MDCD_ENRLMT_6::VARCHAR,\n", + " MDCD_ENRLMT_7::VARCHAR,\n", + " MDCD_ENRLMT_8::VARCHAR,\n", + " MDCD_ENRLMT_9::VARCHAR,\n", + " MDCD_ENRLMT_10::VARCHAR,\n", + " MDCD_ENRLMT_11::VARCHAR,\n", + " MDCD_ENRLMT_12::NUMERIC,\n", + " MDCD_MCO_ENRLMT_1::NUMERIC,\n", + " MDCD_MCO_ENRLMT_2::VARCHAR,\n", + " MDCD_MCO_ENRLMT_3::VARCHAR,\n", + " MDCD_MCO_ENRLMT_4::VARCHAR,\n", + " MDCD_MCO_ENRLMT_5::VARCHAR,\n", + " MDCD_MCO_ENRLMT_6::VARCHAR,\n", + " MDCD_MCO_ENRLMT_7::VARCHAR,\n", + " MDCD_MCO_ENRLMT_8::VARCHAR,\n", + " MDCD_MCO_ENRLMT_9::VARCHAR,\n", + " MDCD_MCO_ENRLMT_10::VARCHAR,\n", + " MDCD_MCO_ENRLMT_11::VARCHAR,\n", + " MDCD_MCO_ENRLMT_12::NUMERIC,\n", + " MDCD_CHIP_ENRLMT::NUMERIC,\n", + " RSTRCTD_BNFTS_IND::VARCHAR,\n", + " DUAL_ELGBL_1::NUMERIC,\n", + " DUAL_ELGBL_2::VARCHAR,\n", + " DUAL_ELGBL_3::VARCHAR,\n", + " DUAL_ELGBL_4::VARCHAR,\n", + " DUAL_ELGBL_5::VARCHAR,\n", + " DUAL_ELGBL_6::VARCHAR,\n", + " DUAL_ELGBL_7::VARCHAR,\n", + " DUAL_ELGBL_8::VARCHAR,\n", + " DUAL_ELGBL_9::VARCHAR,\n", + " DUAL_ELGBL_10::VARCHAR,\n", + " DUAL_ELGBL_11::VARCHAR,\n", + " DUAL_ELGBL_12::NUMERIC\n", + " FROM read_csv_auto('/Users/me/data/syh_dr/syhdr_medicaid_person_2016.CSV')\n", + ") TO '/Users/me/data/syh_dr/syhdr_medicaid_person_2016.parquet' (FORMAT 'parquet');" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CAST(PERSON_ID AS DECIMAL(18,3))CAST(MCAID_BENE_ID AS DECIMAL(18,3))CAST(PERSON_WGHT AS DECIMAL(18,3))CAST(AGE_LOW AS DECIMAL(18,3))CAST(AGE_HIGH AS DECIMAL(18,3))CAST(SEX_IDENT_CD AS VARCHAR)CAST(RACE_CD AS VARCHAR)CAST(MCAID_SBMTTG_ST_CD AS VARCHAR)CAST(STATE_CD AS VARCHAR)CAST(COUNTY_FIPS_CD AS VARCHAR)...CAST(DUAL_ELGBL_3 AS VARCHAR)CAST(DUAL_ELGBL_4 AS VARCHAR)CAST(DUAL_ELGBL_5 AS VARCHAR)CAST(DUAL_ELGBL_6 AS VARCHAR)CAST(DUAL_ELGBL_7 AS VARCHAR)CAST(DUAL_ELGBL_8 AS VARCHAR)CAST(DUAL_ELGBL_9 AS VARCHAR)CAST(DUAL_ELGBL_10 AS VARCHAR)CAST(DUAL_ELGBL_11 AS VARCHAR)CAST(DUAL_ELGBL_12 AS DECIMAL(18,3))
0500000001.0511701142.08.7745.054.0M2NYNY103...1111111111.0
1500000002.0511589747.013.4765.074.0F1AZAZ007...1111111111.0
2500000003.0511623768.012.516.017.0F3NCNC183...000000000NaN
3500000004.0510156090.07.9818.024.0F0SCSC019...None000000000.0
4500000005.0511858221.09.2025.034.0F0NYNY059...00000000NoneNaN
..................................................................
5771388505771389.0513686691.011.576.017.0F1FLCO031...000000NoneNoneNoneNaN
5771389505771390.0512039932.012.526.017.0F1KSKS057...0000000NoneNoneNaN
5771390505771391.0515359234.010.2525.034.0M1ILIL073...0000000000.0
5771391505771392.0511986556.011.4718.024.0M0TNTN157...00000NoneNoneNoneNoneNaN
5771392505771393.0510936005.012.5525.034.0F1WANoneNone...0000000000.0
\n", + "

5771393 rows × 50 columns

\n", + "
" + ], + "text/plain": [ + " CAST(PERSON_ID AS DECIMAL(18,3)) \\\n", + "0 500000001.0 \n", + "1 500000002.0 \n", + "2 500000003.0 \n", + "3 500000004.0 \n", + "4 500000005.0 \n", + "... ... \n", + "5771388 505771389.0 \n", + "5771389 505771390.0 \n", + "5771390 505771391.0 \n", + "5771391 505771392.0 \n", + "5771392 505771393.0 \n", + "\n", + " CAST(MCAID_BENE_ID AS DECIMAL(18,3)) \\\n", + "0 511701142.0 \n", + "1 511589747.0 \n", + "2 511623768.0 \n", + "3 510156090.0 \n", + "4 511858221.0 \n", + "... ... \n", + "5771388 513686691.0 \n", + "5771389 512039932.0 \n", + "5771390 515359234.0 \n", + "5771391 511986556.0 \n", + "5771392 510936005.0 \n", + "\n", + " CAST(PERSON_WGHT AS DECIMAL(18,3)) CAST(AGE_LOW AS DECIMAL(18,3)) \\\n", + "0 8.77 45.0 \n", + "1 13.47 65.0 \n", + "2 12.51 6.0 \n", + "3 7.98 18.0 \n", + "4 9.20 25.0 \n", + "... ... ... \n", + "5771388 11.57 6.0 \n", + "5771389 12.52 6.0 \n", + "5771390 10.25 25.0 \n", + "5771391 11.47 18.0 \n", + "5771392 12.55 25.0 \n", + "\n", + " CAST(AGE_HIGH AS DECIMAL(18,3)) CAST(SEX_IDENT_CD AS VARCHAR) \\\n", + "0 54.0 M \n", + "1 74.0 F \n", + "2 17.0 F \n", + "3 24.0 F \n", + "4 34.0 F \n", + "... ... ... \n", + "5771388 17.0 F \n", + "5771389 17.0 F \n", + "5771390 34.0 M \n", + "5771391 24.0 M \n", + "5771392 34.0 F \n", + "\n", + " CAST(RACE_CD AS VARCHAR) CAST(MCAID_SBMTTG_ST_CD AS VARCHAR) \\\n", + "0 2 NY \n", + "1 1 AZ \n", + "2 3 NC \n", + "3 0 SC \n", + "4 0 NY \n", + "... ... ... \n", + "5771388 1 FL \n", + "5771389 1 KS \n", + "5771390 1 IL \n", + "5771391 0 TN \n", + "5771392 1 WA \n", + "\n", + " CAST(STATE_CD AS VARCHAR) CAST(COUNTY_FIPS_CD AS VARCHAR) ... \\\n", + "0 NY 103 ... \n", + "1 AZ 007 ... \n", + "2 NC 183 ... \n", + "3 SC 019 ... \n", + "4 NY 059 ... \n", + "... ... ... ... \n", + "5771388 CO 031 ... \n", + "5771389 KS 057 ... \n", + "5771390 IL 073 ... \n", + "5771391 TN 157 ... \n", + "5771392 None None ... \n", + "\n", + " CAST(DUAL_ELGBL_3 AS VARCHAR) CAST(DUAL_ELGBL_4 AS VARCHAR) \\\n", + "0 1 1 \n", + "1 1 1 \n", + "2 0 0 \n", + "3 None 0 \n", + "4 0 0 \n", + "... ... ... \n", + "5771388 0 0 \n", + "5771389 0 0 \n", + "5771390 0 0 \n", + "5771391 0 0 \n", + "5771392 0 0 \n", + "\n", + " CAST(DUAL_ELGBL_5 AS VARCHAR) CAST(DUAL_ELGBL_6 AS VARCHAR) \\\n", + "0 1 1 \n", + "1 1 1 \n", + "2 0 0 \n", + "3 0 0 \n", + "4 0 0 \n", + "... ... ... \n", + "5771388 0 0 \n", + "5771389 0 0 \n", + "5771390 0 0 \n", + "5771391 0 0 \n", + "5771392 0 0 \n", + "\n", + " CAST(DUAL_ELGBL_7 AS VARCHAR) CAST(DUAL_ELGBL_8 AS VARCHAR) \\\n", + "0 1 1 \n", + "1 1 1 \n", + "2 0 0 \n", + "3 0 0 \n", + "4 0 0 \n", + "... ... ... \n", + "5771388 0 0 \n", + "5771389 0 0 \n", + "5771390 0 0 \n", + "5771391 0 None \n", + "5771392 0 0 \n", + "\n", + " CAST(DUAL_ELGBL_9 AS VARCHAR) CAST(DUAL_ELGBL_10 AS VARCHAR) \\\n", + "0 1 1 \n", + "1 1 1 \n", + "2 0 0 \n", + "3 0 0 \n", + "4 0 0 \n", + "... ... ... \n", + "5771388 None None \n", + "5771389 0 None \n", + "5771390 0 0 \n", + "5771391 None None \n", + "5771392 0 0 \n", + "\n", + " CAST(DUAL_ELGBL_11 AS VARCHAR) CAST(DUAL_ELGBL_12 AS DECIMAL(18,3)) \n", + "0 1 1.0 \n", + "1 1 1.0 \n", + "2 0 NaN \n", + "3 0 0.0 \n", + "4 None NaN \n", + "... ... ... \n", + "5771388 None NaN \n", + "5771389 None NaN \n", + "5771390 0 0.0 \n", + "5771391 None NaN \n", + "5771392 0 0.0 \n", + "\n", + "[5771393 rows x 50 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql \n", + "\n", + "SELECT * FROM '/Users/me/data/syh_dr/syhdr_medicaid_person_2016.parquet'" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "df = _" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CAST(PERSON_ID AS DECIMAL(18,3))CAST(MCAID_BENE_ID AS DECIMAL(18,3))CAST(PERSON_WGHT AS DECIMAL(18,3))CAST(AGE_LOW AS DECIMAL(18,3))CAST(AGE_HIGH AS DECIMAL(18,3))CAST(SEX_IDENT_CD AS VARCHAR)CAST(RACE_CD AS VARCHAR)CAST(MCAID_SBMTTG_ST_CD AS VARCHAR)CAST(STATE_CD AS VARCHAR)CAST(COUNTY_FIPS_CD AS VARCHAR)...CAST(DUAL_ELGBL_3 AS VARCHAR)CAST(DUAL_ELGBL_4 AS VARCHAR)CAST(DUAL_ELGBL_5 AS VARCHAR)CAST(DUAL_ELGBL_6 AS VARCHAR)CAST(DUAL_ELGBL_7 AS VARCHAR)CAST(DUAL_ELGBL_8 AS VARCHAR)CAST(DUAL_ELGBL_9 AS VARCHAR)CAST(DUAL_ELGBL_10 AS VARCHAR)CAST(DUAL_ELGBL_11 AS VARCHAR)CAST(DUAL_ELGBL_12 AS DECIMAL(18,3))
0500000001.0511701142.08.7745.054.0M2NYNY103...1111111111.0
1500000002.0511589747.013.4765.074.0F1AZAZ007...1111111111.0
2500000003.0511623768.012.516.017.0F3NCNC183...000000000NaN
3500000004.0510156090.07.9818.024.0F0SCSC019...None000000000.0
4500000005.0511858221.09.2025.034.0F0NYNY059...00000000NoneNaN
\n", + "

5 rows × 50 columns

\n", + "
" + ], + "text/plain": [ + " CAST(PERSON_ID AS DECIMAL(18,3)) CAST(MCAID_BENE_ID AS DECIMAL(18,3)) \\\n", + "0 500000001.0 511701142.0 \n", + "1 500000002.0 511589747.0 \n", + "2 500000003.0 511623768.0 \n", + "3 500000004.0 510156090.0 \n", + "4 500000005.0 511858221.0 \n", + "\n", + " CAST(PERSON_WGHT AS DECIMAL(18,3)) CAST(AGE_LOW AS DECIMAL(18,3)) \\\n", + "0 8.77 45.0 \n", + "1 13.47 65.0 \n", + "2 12.51 6.0 \n", + "3 7.98 18.0 \n", + "4 9.20 25.0 \n", + "\n", + " CAST(AGE_HIGH AS DECIMAL(18,3)) CAST(SEX_IDENT_CD AS VARCHAR) \\\n", + "0 54.0 M \n", + "1 74.0 F \n", + "2 17.0 F \n", + "3 24.0 F \n", + "4 34.0 F \n", + "\n", + " CAST(RACE_CD AS VARCHAR) CAST(MCAID_SBMTTG_ST_CD AS VARCHAR) \\\n", + "0 2 NY \n", + "1 1 AZ \n", + "2 3 NC \n", + "3 0 SC \n", + "4 0 NY \n", + "\n", + " CAST(STATE_CD AS VARCHAR) CAST(COUNTY_FIPS_CD AS VARCHAR) ... \\\n", + "0 NY 103 ... \n", + "1 AZ 007 ... \n", + "2 NC 183 ... \n", + "3 SC 019 ... \n", + "4 NY 059 ... \n", + "\n", + " CAST(DUAL_ELGBL_3 AS VARCHAR) CAST(DUAL_ELGBL_4 AS VARCHAR) \\\n", + "0 1 1 \n", + "1 1 1 \n", + "2 0 0 \n", + "3 None 0 \n", + "4 0 0 \n", + "\n", + " CAST(DUAL_ELGBL_5 AS VARCHAR) CAST(DUAL_ELGBL_6 AS VARCHAR) \\\n", + "0 1 1 \n", + "1 1 1 \n", + "2 0 0 \n", + "3 0 0 \n", + "4 0 0 \n", + "\n", + " CAST(DUAL_ELGBL_7 AS VARCHAR) CAST(DUAL_ELGBL_8 AS VARCHAR) \\\n", + "0 1 1 \n", + "1 1 1 \n", + "2 0 0 \n", + "3 0 0 \n", + "4 0 0 \n", + "\n", + " CAST(DUAL_ELGBL_9 AS VARCHAR) CAST(DUAL_ELGBL_10 AS VARCHAR) \\\n", + "0 1 1 \n", + "1 1 1 \n", + "2 0 0 \n", + "3 0 0 \n", + "4 0 0 \n", + "\n", + " CAST(DUAL_ELGBL_11 AS VARCHAR) CAST(DUAL_ELGBL_12 AS DECIMAL(18,3)) \n", + "0 1 1.0 \n", + "1 1 1.0 \n", + "2 0 NaN \n", + "3 0 0.0 \n", + "4 None NaN \n", + "\n", + "[5 rows x 50 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['CAST(PERSON_ID AS DECIMAL(18,3))',\n", + " 'CAST(MCAID_BENE_ID AS DECIMAL(18,3))',\n", + " 'CAST(PERSON_WGHT AS DECIMAL(18,3))', 'CAST(AGE_LOW AS DECIMAL(18,3))',\n", + " 'CAST(AGE_HIGH AS DECIMAL(18,3))', 'CAST(SEX_IDENT_CD AS VARCHAR)',\n", + " 'CAST(RACE_CD AS VARCHAR)', 'CAST(MCAID_SBMTTG_ST_CD AS VARCHAR)',\n", + " 'CAST(STATE_CD AS VARCHAR)', 'CAST(COUNTY_FIPS_CD AS VARCHAR)',\n", + " 'CAST(ZIP_CD AS VARCHAR)', 'CAST(RSN_ENRLMT_CD AS VARCHAR)',\n", + " 'CAST(MDCD_ENRLMT_1 AS DECIMAL(18,3))',\n", + " 'CAST(MDCD_ENRLMT_2 AS VARCHAR)', 'CAST(MDCD_ENRLMT_3 AS VARCHAR)',\n", + " 'CAST(MDCD_ENRLMT_4 AS VARCHAR)', 'CAST(MDCD_ENRLMT_5 AS VARCHAR)',\n", + " 'CAST(MDCD_ENRLMT_6 AS VARCHAR)', 'CAST(MDCD_ENRLMT_7 AS VARCHAR)',\n", + " 'CAST(MDCD_ENRLMT_8 AS VARCHAR)', 'CAST(MDCD_ENRLMT_9 AS VARCHAR)',\n", + " 'CAST(MDCD_ENRLMT_10 AS VARCHAR)', 'CAST(MDCD_ENRLMT_11 AS VARCHAR)',\n", + " 'CAST(MDCD_ENRLMT_12 AS DECIMAL(18,3))',\n", + " 'CAST(MDCD_MCO_ENRLMT_1 AS DECIMAL(18,3))',\n", + " 'CAST(MDCD_MCO_ENRLMT_2 AS VARCHAR)',\n", + " 'CAST(MDCD_MCO_ENRLMT_3 AS VARCHAR)',\n", + " 'CAST(MDCD_MCO_ENRLMT_4 AS VARCHAR)',\n", + " 'CAST(MDCD_MCO_ENRLMT_5 AS VARCHAR)',\n", + " 'CAST(MDCD_MCO_ENRLMT_6 AS VARCHAR)',\n", + " 'CAST(MDCD_MCO_ENRLMT_7 AS VARCHAR)',\n", + " 'CAST(MDCD_MCO_ENRLMT_8 AS VARCHAR)',\n", + " 'CAST(MDCD_MCO_ENRLMT_9 AS VARCHAR)',\n", + " 'CAST(MDCD_MCO_ENRLMT_10 AS VARCHAR)',\n", + " 'CAST(MDCD_MCO_ENRLMT_11 AS VARCHAR)',\n", + " 'CAST(MDCD_MCO_ENRLMT_12 AS DECIMAL(18,3))',\n", + " 'CAST(MDCD_CHIP_ENRLMT AS DECIMAL(18,3))',\n", + " 'CAST(RSTRCTD_BNFTS_IND AS VARCHAR)',\n", + " 'CAST(DUAL_ELGBL_1 AS DECIMAL(18,3))', 'CAST(DUAL_ELGBL_2 AS VARCHAR)',\n", + " 'CAST(DUAL_ELGBL_3 AS VARCHAR)', 'CAST(DUAL_ELGBL_4 AS VARCHAR)',\n", + " 'CAST(DUAL_ELGBL_5 AS VARCHAR)', 'CAST(DUAL_ELGBL_6 AS VARCHAR)',\n", + " 'CAST(DUAL_ELGBL_7 AS VARCHAR)', 'CAST(DUAL_ELGBL_8 AS VARCHAR)',\n", + " 'CAST(DUAL_ELGBL_9 AS VARCHAR)', 'CAST(DUAL_ELGBL_10 AS VARCHAR)',\n", + " 'CAST(DUAL_ELGBL_11 AS VARCHAR)',\n", + " 'CAST(DUAL_ELGBL_12 AS DECIMAL(18,3))'],\n", + " dtype='object')" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df['CAST(AGE_HIGH AS DECIMAL(18,3))'].hist()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df['CAST(PERSON_WGHT AS DECIMAL(18,3))'].hist()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}