# Working with specific Datatable (use **`"WaterSystem"`** as example)

In [1]:
from sdwis_drink_water import WaterSystem
# Due to the width limitation, I wrote some functions for outputting some results using an interactive scrollbar
from sdwis_drink_water.utils_for_jupyter_print import print_columns, print_column_description, print_result_data

water_system_api = WaterSystem(print_url=False)

## fetch table column name

In [2]:
water_system_columns = water_system_api.get_table_column_name(print_to_console=False)
print_columns(water_system_columns)

Output(layout=Layout(width='100%'))

## fetch table column names with descriptions

In [3]:
water_system_columns_description_dict = water_system_api.get_table_columns_description(print_to_console=False, multi_threads=True)
print_column_description(water_system_columns_description_dict)

Fetching column descriptions for WATER_SYSTEM from SDWIS Official Website by multi_threads_mode: 100%|██████████| 45/45 [00:05<00:00,  8.30it/s]


Output(layout=Layout(width='100%'))

## fetch first data from Water_System table

In [4]:
# fetch first data from LcrSampleResult table
first_water_system_data = water_system_api.get_table_first_data(print_to_console=False)

print_result_data(first_water_system_data)

Output(layout=Layout(width='100%'))

## fetch first n data from Water_System table

In [9]:
first_20_water_system_data = water_system_api.get_table_first_n_data(n=20, print_to_console=False)

print_result_data(first_20_water_system_data)

Output(layout=Layout(width='100%'))

### We can use this method to quickly access the full datasheet, then export to a file

In [6]:
water_system_data_number = water_system_api.get_table_data_number()
all_water_system = water_system_api.get_table_first_n_data(n=water_system_data_number, multi_threads=True, print_to_console=False)
# You can also just set n to a big number
# all_water_system = water_system_api.get_table_first_n_data(n=99999999, multi_threads=True, print_to_console=False)
all_water_system.export_data("./output_files/all_water_system.xlsx", format_type="xlsx")

WATER_SYSTEM
The data number in the database provided by the API is 428808. Your request number exceeds this limit, please note.


Fetching Data by multi_threads_mode:   0%|          | 0/43 [00:00<?, ?it/s]
  0%|          | 0/43 [00:00<?, ?it/s][A
 28%|██▊       | 12/43 [00:02<00:05,  5.97it/s][A
 30%|███       | 13/43 [00:04<00:10,  2.75it/s][A
 33%|███▎      | 14/43 [00:06<00:16,  1.73it/s][A
 35%|███▍      | 15/43 [00:08<00:22,  1.24it/s][A
 37%|███▋      | 16/43 [00:10<00:27,  1.03s/it][A
 40%|███▉      | 17/43 [00:12<00:32,  1.23s/it][A
 42%|████▏     | 18/43 [00:14<00:35,  1.41s/it][A
 44%|████▍     | 19/43 [00:16<00:37,  1.56s/it][A
 47%|████▋     | 20/43 [00:18<00:38,  1.68s/it][A
 49%|████▉     | 21/43 [00:20<00:38,  1.77s/it][A
 51%|█████     | 22/43 [00:22<00:38,  1.83s/it][A
 53%|█████▎    | 23/43 [00:24<00:37,  1.88s/it][A
 56%|█████▌    | 24/43 [00:26<00:36,  1.93s/it][A
 58%|█████▊    | 25/43 [00:28<00:35,  1.95s/it][A
 60%|██████    | 26/43 [00:30<00:33,  1.97s/it][A
 63%|██████▎   | 27/43 [00:32<00:31,  1.98s/it][A
 65%|██████▌   | 28/43 [00:34<00:29,  1.99s/it][A
 67%|██████▋   

Data is successfully exported to ./output_files/all_water_system.xlsx!


## fetch data from Water_System table by conditions

In [11]:
columbia_water_systems = water_system_api.get_water_system_data_by_conditions("city_name=columbia", print_to_console=False)
columbia_water_systems.count()

487

## get summarize data number according to epa_region

In [12]:
water_system_api.summarize_water_system_data_by_epa_region(multi_threads=True, print_to_console=True)

Fetching the total amount of data by EPA region from the WATER_SYSTEM data table: 100%|██████████| 10/10 [00:05<00:00,  1.91it/s]

>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
┌─────────────────┬─────────────────────────────┐
│   EPA_REGION_ID │   WATER_SYSTEM_TOTAL_NUMBER │
├─────────────────┼─────────────────────────────┤
│    EPA_REGION_1 │                       45508 │
├─────────────────┼─────────────────────────────┤
│    EPA_REGION_2 │                       67410 │
├─────────────────┼─────────────────────────────┤
│    EPA_REGION_3 │                       24074 │
├─────────────────┼─────────────────────────────┤
│    EPA_REGION_4 │                       33898 │
├─────────────────┼─────────────────────────────┤
│    EPA_REGION_5 │                       30931 │
├─────────────────┼─────────────────────────────┤
│    EPA_REGION_6 │                       15799 │
├─────────────────┼─────────────────────────────┤
│    EPA_REGION_7 │                       43325 │
├─────────────────┼─────────────────────────────┤
│    EPA_REGION_8 │                       25563 │




## get data according to epa_region and export

In [18]:
water_system_epa_1 = water_system_api.get_water_system_by_epa_region(epa_region=1, print_to_console=True)
# export data to xlsx
water_system_epa_1.export_data("./output_files/Water System of EPA region 1.xlsx", format_type="xlsx")

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



Data is successfully exported to ./output_files/Water System of EPA region 1.xlsx!


## Data Handle

In [24]:
# print all keys to determine how to use this data
print_columns(list(water_system_epa_1.get_all_keys()))

# find by condition "is_grant_eligible_ind==Y"
is_grant_eligible_ind = water_system_epa_1.find_by_condition("is_grant_eligible_ind=Y")
print(is_grant_eligible_ind.count())

# find by condition "is_wholesaler_ind==Y"
is_wholesaler_ind = water_system_epa_1.find_by_condition("is_wholesaler_ind=Y")
print(is_wholesaler_ind.count())

# find by condition "is_grant_eligible_ind==Y" and "is_wholesaler_ind==Y"
is_grant_eligible_ind_and_is_wholesaler_ind = is_grant_eligible_ind.find_by_condition("is_wholesaler_ind=Y")
print(is_grant_eligible_ind_and_is_wholesaler_ind.count())

# export data to csv
is_grant_eligible_ind_and_is_wholesaler_ind.export_data("./output_files/water_system_epa_1_filtered_result.xlsx",
                                                        format_type="xlsx")

Output(layout=Layout(width='100%'))

10112
41
25
Data is successfully exported to ./output_files/water_system_epa_1_filtered_result.xlsx!
