# Airtable Wardrobe Analytics

In [1]:
import os

%matplotlib inline
import matplotlib
import pandas as pd
import matplotlib.pyplot as plt

from airtable import airtable

## Setup
Set your airtable key and base id as environment variables and import them. You can get your key on your 'Account' page

In [2]:
AIRTABLE_KEY = os.environ.get('AIRTABLE_KEY')
BASE_ID = os.environ.get('AIRTABLE_BASE_ID')

In [3]:
at = airtable.Airtable(BASE_ID, AIRTABLE_KEY)

## Get tables
These are my tables. They all should have mostly identical schemas. Not generated programmatically, need to clean them up.

In [4]:
# for reference
OUTFITS = 'Outfits'
CATEGORIES = [
    'Tops',
    'Bottoms',
    'Dresses',
    'Shoes',
    'Outerwear',
    'Layering',
    'Accessories',
    'Scarves',
    'Bags',
    'Swimwear'
]

In [None]:
def records_to_df(table):
    return pd.DataFrame([record['fields'] for record in table['records']])

In [None]:
# This takes a couple of seconds
TABLES = {category:records_to_df(at.get(category)) for category in CATEGORIES}

## Functions for analytics & visualization

Just some simple stuff to start with

In [None]:
def sort_by_wears(df):
     return df.sort_values(by='# Outfits', ascending=False)

In [None]:
def plot_wears(table, name=None):
    table_by_wears = sort_by_wears(table)
    table_by_wears.plot(x='Name', y='# Outfits', kind='bar', title=name, rot=90)

In [None]:
def top_n_items(df, n=3):
    """Just truncates the DataFrame, so won't get all of them if you
    have multiple items with the same '# Outfits' values."""
    sorted_by_wears = sort_by_wears(df)
    return sorted_by_wears[['Name', '# Outfits']].head(n)

In [None]:
def plot_each_by_wears():
    for table in TABLES:
        try:
            plot_wears(TABLES[table], name=table)
        except KeyError as e:
            print("Could not plot {0}. Fields are {1}".format(table, TABLES[table].keys()))

In [None]:
def print_each_top_4():
    for tablename,table in TABLES.items():
        print(tablename)
        print(top_n_items(table, 4))
        print('-'*80)

## Number of wears for each type of item

In [None]:
plot_each_by_wears()

## Top Items

Because the labels are hard to read in the graphs

In [None]:
print_each_top_4()