# Explore Python PEP data using littletable

Load and examine a zipped JSON extract of data on Python PEPs.
 - access data by unique `id` key
 - access table using normal slice notation
 - present table in tabular, JSON, or CSV output
 - create a full-text search index on the abstract field

In [3]:
import littletable as lt

In [23]:
# examine first 3 rows of data (except for "abstract" field)
peek = lt.Table().json_import("peps.json.zip")
peek.info()

{'len': 642,
 'name': 'peps.json.zip',
 'fields': ['id',
  'title',
  'authors',
  'discussions_to',
  'status',
  'type',
  'topic',
  'created',
  'python_version',
  'post_history',
  'resolution',
  'requires',
  'replaces',
  'superseded_by',
  'url',
  'abstract'],
 'indexes': [],
 'created': datetime.datetime(2024, 3, 25, 20, 6, 51, 891354, tzinfo=datetime.timezone.utc),
 'modified': datetime.datetime(2024, 3, 25, 20, 6, 51, 898138, tzinfo=datetime.timezone.utc),
 'last_import': datetime.datetime(2024, 3, 25, 20, 6, 51, 898138, tzinfo=datetime.timezone.utc)}

In [25]:
# look at the first 3 rows, omitting the "abstract" field
peek.select("-abstract")[:3].present()



In [24]:
# import PEP data from zipped JSON, converting id's to ints and created date stings to Python datetimes
peps = lt.Table().json_import(
    "peps.json.zip",
    transforms={
        "id": int,
        "created": lt.Table.parse_date("%d-%b-%Y"),
    }
)

In [7]:
# display summary Table info
peps.info()

{'len': 642,
 'name': 'peps.json.zip',
 'fields': ['id',
  'title',
  'authors',
  'discussions_to',
  'status',
  'type',
  'topic',
  'created',
  'python_version',
  'post_history',
  'resolution',
  'requires',
  'replaces',
  'superseded_by',
  'url',
  'abstract'],
 'indexes': [],
 'created': datetime.datetime(2024, 3, 25, 19, 59, 19, 196974, tzinfo=datetime.timezone.utc),
 'modified': datetime.datetime(2024, 3, 25, 19, 59, 19, 210519, tzinfo=datetime.timezone.utc),
 'last_import': datetime.datetime(2024, 3, 25, 19, 59, 19, 210519, tzinfo=datetime.timezone.utc)}

In [8]:
# access records by unique PEP id
peps.create_index("id", unique=True)
print("PEP20:", peps.by.id[20].title)

PEP20: The Zen of Python


In [9]:
# add a numeric "year" field, and index it (non-unique index, since there are multiple PEPs in any given year)
peps.add_field("year", lambda pep: pep.created.year)
peps.create_index("year")

<littletable.Table at 0x23341412f50>

In [11]:
# present PEPs created in 2016 - access index by indexed field name
peps.by.year[2016]("PEPs Created in 2016").select("id python_version title status url").present()



In [12]:
# how many PEPs since 2020? use slice notation for ">="
print(f"Number of PEPs since 2020: {len(peps.by.year[2020:])}")

Number of PEPs since 2020: 130


In [13]:
# pivot by year and dump counts, or present as nice table
peps.pivot("year").dump_counts()

Pivot: year
2000         41
2001         52
2002         30
2003         27
2004         11
2005         16
2006         23
2007         32
2008         10
2009         25
2010          7
2011         16
2012         28
2013         27
2014         24
2015         29
2016         30
2017         26
2018         26
2019         32
2020         36
2021         30
2022         25
2023         35
2024          4


In [14]:
peps_by_year = peps.pivot("year").as_table()("PEPs by Year Submitted")
peps_by_year.add_field("", lambda rec: "o" * rec.count)
peps_by_year.present(caption="as of 21 March 2024", caption_justify="left")



In [15]:
# create full text search on PEP abstracts
peps.create_search_index("abstract")

<littletable.Table at 0x23341412f50>

In [21]:
# search by search field name
walrus_pep = peps.search.abstract("walrus", as_table=True)("'walrus' Search Results")
walrus_pep.select("id title year authors").present()



In [18]:
# display as JSON (json_export also accepts an output file)
print(walrus_pep.select("id title year authors").json_export())

[
{"id": 572, "title": "Assignment Expressions", "year": 2018, "authors": "Angelico, Peters, GvR"}
]


In [19]:
# search for PEPs referring to GvR or Guido or BDFL
bdfl_peps = peps.search.abstract("gvr guido bdfl", as_table=True)("GvR PEPs")
bdfl_peps.orderby("id")
bdfl_peps.select("id title year url authors").present()



In [26]:
# define a custom JSON encoder for datetime.date field
import json
class JsonDateEncoder(json.JSONEncoder):
    def default(self, o):
        import datetime
        if isinstance(o, datetime.date):
            return str(o)
        return super().default(o)

print(bdfl_peps.select("id title created").json_export(json_encoder=(JsonDateEncoder,)))

[
{"id": 8, "title": "Style Guide for Python Code", "created": "2001-07-05"},
{"id": 20, "title": "The Zen of Python", "created": "2004-08-19"},
{"id": 42, "title": "Feature Requests", "created": "2000-09-12"},
{"id": 102, "title": "Doing Python Micro Releases", "created": "2002-01-09"},
{"id": 246, "title": "Object Adaptation", "created": "2001-03-21"},
{"id": 298, "title": "The Locked Buffer Interface", "created": "2002-07-26"},
{"id": 308, "title": "Conditional Expressions", "created": "2003-02-07"},
{"id": 346, "title": "User Defined (\"``with``\") Statements", "created": "2005-05-06"},
{"id": 366, "title": "Main module explicit relative imports", "created": "2007-05-01"},
{"id": 401, "title": "BDFL Retirement", "created": "2009-04-01"},
{"id": 568, "title": "Generator-sensitivity for Context Variables", "created": "2018-01-04"},
{"id": 3099, "title": "Things that will Not Change in Python 3000", "created": "2006-04-04"},
{"id": 3100, "title": "Miscellaneous Python 3.0 Plans", "cre

In [29]:
# CSV import/export also supported
print(bdfl_peps.select("id title created").csv_export())

id,title,created
8,Style Guide for Python Code,2001-07-05
20,The Zen of Python,2004-08-19
42,Feature Requests,2000-09-12
102,Doing Python Micro Releases,2002-01-09
246,Object Adaptation,2001-03-21
298,The Locked Buffer Interface,2002-07-26
308,Conditional Expressions,2003-02-07
346,"User Defined (""``with``"") Statements",2005-05-06
366,Main module explicit relative imports,2007-05-01
401,BDFL Retirement,2009-04-01
568,Generator-sensitivity for Context Variables,2018-01-04
3099,Things that will Not Change in Python 3000,2006-04-04
3100,Miscellaneous Python 3.0 Plans,2004-08-20
3111,Simple input built-in in Python 3000,2006-09-13
3146,Merging Unladen Swallow into CPython,2010-01-01
8000,Python Language Governance Proposal Overview,2018-08-24
8001,Python Governance Voting Process,2018-08-24
8002,Open Source Governance Survey,2018-08-24
8010,The Technical Leader Governance Model,2018-08-24
8014,The Commons Governance Model,2018-09-16
8015,Organization of the Python co