In [None]:
import micropip
await micropip.install(['pandas', 'shapely', 'pyproj', 'pydeck', 'dubo'])

<h1 style='text-align:left;font-size:6rem;'><span style='color:cornflowerblue;'>dubo</span> makes data analysis simple </h1>

<p style="text-align:left;">Use <a href="https://e2eml.school/transformers.html">GPTs</a> tuned with SQL queries to make EDA fast and easy.</p>

<section style='background:cornflowerblue;height:100px;color:white;font-family:monospace;white-space:pre;padding-top:1rem;margin-top:1rem;'>
    <span style="color:white;font-weight:bold;">dubo</span>.<span style="color:white;font-weight:bold;">ask</span>("What's the most populous ZIP code in the United States?", df)
    > 77449
</section>

To install, simply run `pip install dubo`

In [3]:
import pandas as pd
import dubo


# Grab a subset of data grouped by ZIP code from the 2021 American Community Survey
DATA_URL = (
    'https://raw.githubusercontent.com/ajduberstein/'
    'geo_datasets/master/2021_5_yr_acs.csv'
)
census_df = pd.read_csv(DATA_URL)
census_df.head()

Unnamed: 0,tot_pop,elderly_pop,male_pop,female_pop,white_pop,black_pop,native_american_pop,asian_pop,two_or_more_pop,hispanic_pop,...,pop_35_to_44_years,pop_45_to_54_years,pop_55_to_59_years,pop_60_to_64_years,pop_65_to_74_years,pop_75_to_84_years,pop_85_years_and_over,per_capita_income,median_income_for_workers,zip_code
0,17126.0,3478.0,8451.0,8675.0,15249.0,358.0,111.0,2.0,888.0,17038.0,...,1967.0,2350.0,1237.0,1282.0,1986.0,1088.0,404.0,7587.0,12541.0,601
1,37895.0,7768.0,18588.0,19307.0,35571.0,10754.0,9157.0,46.0,12405.0,35649.0,...,4680.0,5082.0,2736.0,3130.0,4605.0,2349.0,814.0,10699.0,14180.0,602
2,49136.0,11025.0,23817.0,25319.0,39975.0,2621.0,669.0,61.0,3750.0,48121.0,...,5962.0,6312.0,3259.0,3467.0,6225.0,3774.0,1026.0,12280.0,17449.0,603
3,5751.0,1309.0,2817.0,2934.0,3488.0,137.0,21.0,0.0,261.0,5710.0,...,691.0,731.0,385.0,442.0,760.0,273.0,276.0,8574.0,15565.0,606
4,26153.0,5423.0,12678.0,13475.0,24015.0,6882.0,5659.0,30.0,8216.0,25053.0,...,3295.0,3688.0,1649.0,1944.0,3010.0,1952.0,461.0,11638.0,16262.0,610


In [5]:
dubo.ask(
    "What's the most populous ZIP code in the United States?",
    census_df,
    verbose=True)

SELECT zip_code FROM tbl 
    ORDER BY tot_pop DESC 
    LIMIT 1;


[(8701,)]

In [22]:
from dubo import MSG
MSG

'Dubo | Examples: https://dubo.mercator.tech/ | Feedback? https://forms.gle/KvPm6niv9oUGRZhh8 | Privacy policy: https://mercator.tech/privacy'

In [7]:
dubo.ask(
    "What are the ten ZIP codes with the largest Hispanic "
    "populations in the United States?", census_df, verbose=True)

SELECT zip_code, hispanic_pop
    FROM tbl
    ORDER BY hispanic_pop DESC
    LIMIT 10;


[(90011, 100502.0),
 (79936, 93807.0),
 (926, 93311.0),
 (90201, 90673.0),
 (91331, 89439.0),
 (90280, 88872.0),
 (11368, 88079.0),
 (78521, 85910.0),
 (92335, 82412.0),
 (78542, 80178.0)]

In [17]:
# Try it yourself!
dubo.ask("Where is the wealthiest place in the US that is not majority white?", census_df, verbose=True)

SELECT zip_code FROM tbl
    WHERE white_pop < (tot_pop / 2)
    ORDER BY median_income_for_workers DESC
    LIMIT 1;


[(11973,)]

In [None]:
## COMING SOON - Joins
# 
# dubo.ask(
#     "Where are the wealthiest under-65 ZIP codes in the United States?",
#     [census_df, zip_code_locations_df])
#
## COMING SOON - Follow-up queries
# 
# dubo.followup("Which of those areas are majority non-white?")
#
## COMING SOON - Direct SQL connections
## COMING SOON - Fine-tune to your company's database 

In [13]:
import ipywidgets as widgets
from IPython.display import display
button = widgets.Button(description="Click Me!")
inp = widgets.Text(description='Path:')
Box = widgets.HBox([button,inp])
Box

HBox(children=(Button(description='Click Me!', style=ButtonStyle()), Text(value='', description='Path:')))

Interested in accelerating your analytics? Apply for our pilot program: support@mercator.tech