In [1]:
%load_ext autoreload
%autoreload 2
import bwypy

## BW Query object


## Intialize from JSON

In [41]:
bwypy.BWQuery

bwypy.core.BWQuery

In [40]:
jsonq = '''{
   "database": "hathipd",
   "method": "return_json", 
   "search_limits": {
       "date_year": {"$gt": 1790, "$lt": 1923 }
   },
   "counttype": ["TextCount"],
   "groups": ["date_year"]
   }'''
bw = bwypy.BWQuery(json=jsonq, endpoint='https://bookworm.htrc.illinois.edu/cgi-bin/dbbindings.py')



In [9]:
bw.json

{'counttype': ['TextCount'],
 'database': 'hathipd',
 'groups': ['date_year'],
 'method': 'return_json',
 'search_limits': {'date_year': {'$gt': 1790, '$lt': 1923}}}

In [10]:
bw.groups

['date_year']

In [11]:
bw.search_limits

{'date_year': {'$gt': 1790, '$lt': 1923}}

In [12]:
bw.database

'hathipd'

## Run a query

Query results are returns as a BWResults object

In [13]:
bw.groups = ['page_count_bin', 'is_gov_doc']
bw_results = bw.run()



In [14]:
bw_results.json()

{'L - Between 350 and 550': {'': [563222], 'No': [30973]},
 'M - Between 150 and 350': {'': [549374], 'No': [30020]},
 'S - Less than 150': {'': [466445], 'No': [25737]},
 'XL - Greater than 550': {'': [529501], 'No': [28435]},
 'unknown': {'': [1325704], 'No': [73659]}}

In [15]:
bw_results.dataframe()

IndexError: list index out of range

In [16]:
print(bw_results.csv())

IndexError: list index out of range

In [17]:
bw_results.tolist()

IndexError: list index out of range

In [18]:
bw_results.tuples()

IndexError: list index out of range

## Initialize blank BW

Rather than entering an already constructed json query, BWQuery can be used to construct from scratch.

In [19]:
newq = bwypy.BWQuery()

NameError: No endpoint. Provide to BWQuery on initialization or set globally.

In [20]:
newq = bwypy.BWQuery(database='hathipd', endpoint='https://bookworm.htrc.illinois.edu/cgi-bin/dbbindings.py')



In [21]:
newq.json

{'compare_limits': [],
 'counttype': ['TextCount', 'WordCount'],
 'database': 'hathipd',
 'groups': [],
 'method': 'return_json',
 'search_limits': {},
 'words_collation': 'Case_Sensitive'}

In [22]:
newq.run().dataframe()



AttributeError: 'list' object has no attribute 'items'

In [None]:
newq.groups

In [None]:
newq.groups = ['foo']

In [23]:
newq.groups = ['publication_country']
newq.run().dataframe()



TypeError: unhashable type: 'list'

### Global settings

Since it's unlikely be be consistently switching databases or endpoints, these settings can be set globally with `set_options`:

In [24]:
bwypy.set_options(endpoint='https://bookworm.htrc.illinois.edu/cgi-bin/dbbindings.py',
                        database='global')

bwypy.BWQuery(verify_fields=False).database

'global'

Or in a `with` block:

In [25]:
with bwypy.set_options(endpoint='https://bookworm.htrc.illinois.edu/cgi-bin/dbbindings.py', database='with_block'):
    bw = bwypy.BWQuery(verify_fields=False)
bw.database

'with_block'

The priority for variables is:

- set with an _init_ argument
- set within the query json (for database)
- set within a `with` block with `set_options`
- set globally with `set_options`

## More BWQuery functions


Parser for `getAvailableFields`, used internally on initialization if `integrity_check=True`:

In [26]:
bw = bwypy.BWQuery(json=jsonq, endpoint='https://bookworm.htrc.illinois.edu/cgi-bin/dbbindings.py')
bw.fields()



Unnamed: 0,anchor,dbname,description,name,tablename,type
0,bookid,lc_classes,,lc_classes,lc_classesLookup,character
1,bookid,lc_subclasses,,lc_subclasses,lc_subclassesLookup,character
2,bookid,fiction_nonfiction,,fiction_nonfiction,fiction_nonfictionLookup,character
3,bookid,genres,,genres,genresLookup,character
4,bookid,languages,,languages,languagesLookup,character
5,bookid,format,,format,formatLookup,character
6,bookid,is_gov_doc,,is_gov_doc,is_gov_docLookup,character
7,bookid,page_count_bin,,page_count_bin,page_count_binLookup,character
8,bookid,word_count_bin,,word_count_bin,word_count_binLookup,character
9,bookid,publication_country,,publication_country,publication_countryLookup,character


*This was a property before (e.g. `bw.fields` instead of `bw.fields()`), but this was not appropriate because it does processing, and there is no sensible setter.*

Return all possible values for the field.

In [27]:
bw.field_values(field='lc_classes')



['unknown',
 'Language and Literature',
 'General and Old World History',
 'Social Sciences',
 'Science',
 'Philosophy, Psychology, and Religion',
 'Law',
 'Technology',
 'General Works',
 'History of the United States and British, Dutch, French, and Latin America',
 'Political Science',
 'Agriculture',
 'History of America',
 'Education',
 'Bibliography, Library Science, and General Information Resources',
 'Medicine',
 'Fine Arts',
 'Geography, Anthropology, and Recreation',
 'Music',
 'Auxiliary Sciences of History',
 'Military Science',
 'Naval Science']

In [28]:
bw.field_values(field='is_gov_doc')



['', 'No']

Return possible values for the field, within the current search limit. 

**TODO** Need better name.
```
bw.limited_field_values(field='lc_classes')
```

In [29]:
bw.limit_field_values(field='lc_classes')

AttributeError: 'BWQuery' object has no attribute 'limit_field_values'

# Testing validation

In [30]:
bw = bwypy.BWQuery(json=jsonq, endpoint='https://bookworm.htrc.illinois.edu/cgi-bin/dbbindings.py')
bw



<bwypy.bwypy.BWQuery at 0x196d4874518>

In [31]:
bw.search_limits['date_year_wrong'] = 1

In [32]:
bw._last_good

{'counttype': ['TextCount'],
 'database': 'hathipd',
 'groups': ['date_year'],
 'method': 'return_json',
 'search_limits': {'date_year': {'$gt': 1790, '$lt': 1923}}}

In [33]:
bw._validate()

KeyError: 'The following search_limit fields are not supported in this BW: date_year_wrong'

In [34]:
bw.search_limits

{'date_year': {'$gt': 1790, '$lt': 1923}}

In [35]:
bw.groups = ['date_year_wrong']

KeyError: 'The following groups are not supported in this BW: date_year_wrong'

In [36]:
a = bw.search_limits

In [37]:
a.keys()

dict_keys(['date_year'])

### Turning off validation

In [39]:
%%time
bwypy.BWQuery(json=jsonq, endpoint='https://bookworm.htrc.illinois.edu/cgi-bin/dbbindings.py', verify_fields=False)

Wall time: 0 ns


<bwypy.bwypy.BWQuery at 0x196d4893ba8>