In [1]:
# First make YAML from dataset

# Then maybe use jinja or something to render as HTML



In [2]:
import pandas as pd
from datetime import datetime as dt
import re

YEAR_RANGE = (1900, 1950)

In [3]:
def remove(row):
    try:
        date = dt.strptime(row.Date, '%Y-%m-%d')
    except:
        return True
    
    if YEAR_RANGE and date.year >= YEAR_RANGE[0] and date.year <= YEAR_RANGE[1]:
        pass # in the range!
    else:
        return True

    if row['Exclude from visualization'] or row['Unsure whether drag artist']:
        return True
    
    no_city = row['City'] == ''
    no_performer = row['Performer'] == ''
    no_venue = row['Venue'] == ''
    unnamed_performer = 'unnamed' in row['Performer'].lower()
    
    if no_city and no_performer and no_venue:
        return True
    
    if unnamed_performer:
        return True

    return False

def extract_addresses_dict(normalized_df):
    addresses = {}
    rows_with_addresses = normalized_df[normalized_df['Address']!='']
    warnings = []
    for x in zip(rows_with_addresses['Date'], rows_with_addresses['Source'], rows_with_addresses['Venue'], rows_with_addresses['Address']):
        date, source, venue, address = x
        if venue == '':
            warnings.append(address)
        else:
            if not venue in addresses:
                addresses[venue] = {}
            if not source in addresses[venue]:
                addresses[venue][source] = address
    if len(warnings):
        print(f'Warning: {len(warnings)} Venues with no names have addresses:')
        print('- ' + '- '.join(warnings))

    return addresses

def reverse_comment_dict(comment_dict):
    comments_reverse = {}
    for performer, comments in comment_dict.items():
        if not performer in comments_reverse:
            comments_reverse[performer] = {}
        for source, comment in comments.items():
            if not comment in comments_reverse[performer]:
                comments_reverse[performer][comment] = []
            comments_reverse[performer][comment].append(source)
    return comments_reverse
        
def get_comments(df, comment_field='Comment on edge: revue', match_field='Revue', transform=None):
    comments = {}
    rows_with_comments = df[df[comment_field]!='']
    warnings = []
    for x in zip(rows_with_comments['Date'], rows_with_comments['Source'], rows_with_comments[match_field], rows_with_comments[comment_field]):
        date, source, match, comment = x
        comment = str(comment).strip()
        if transform:
            comment = transform(comment)
        if match == '':
            warnings.append(str(comment)[:40]+'...')
        else:
            if not match in comments:
                comments[match] = {}
            if not source in comments[match]:
                comments[match][source] = comment
    if len(warnings):
        print(f'Warning: {len(warnings)} mentions in `{comment_field}` with no value have comments:')
        print('- ' + '\n- '.join(warnings))

    return comments

def get_revue_comments_dict(df):
    return get_comments(df, 'Comment on edge: revue', 'Revue')

def get_performer_comments_dict(df):
    return get_comments(df, 'Comment on node: performer', 'Performer')

def get_venue_comments_dict(df):
    return get_comments(df, 'Comment on node: venue', 'Venue')

def get_city_comments_dict(df):
    return get_comments(df, 'Comment on node: city', 'City')

def get_true_value(row, type):
    if type == 'source':
        if row['Source clean'] != '':
            return row['Source clean']
        return row['Source']
    if type == 'performer':
        if row['Normalized performer'] != '':
            return row['Normalized performer']
        if row['Performer first-name'] != '' and row['Performer last-name'] != '':
            return row['Normalized performer']
        return row['Performer']
    if type == 'city':
        if row['Normalized City'] != '':
            return row['Normalized City']
        return row['City']
    if type == 'revue':
        if row['Normalized Revue Name'] != '':
            return row['Normalized Revue Name']
        return row['Revue name']
    if type == 'venue':
        if row['Normalized Venue'] != '':
            return row['Normalized Venue']
        return row['Venue']
    raise NotImplementedError(f'type `{type}` is not yet implemented')

def find_ref(row, eima=True):
    source = row['Source']
    source += ' ' + row['EIMA']
    source += ' ' + row['Search (newspapers.com)']
    source += ' ' + row['Source clean']
    
    is_eima = 'eima' in source.lower() or 'variety' in source.lower() or 'billboard' in source.lower()
    has_ref = re.search(r'(\d{7,10})', source)
    refs = list(set(re.findall(r'(\d{7,10})', source)))
    if has_ref and eima and is_eima:
        return '|'.join(refs)
    
    if has_ref and not eima and not is_eima:
        return '|'.join(refs)

    return ''
    

df = pd.read_csv('https://docs.google.com/spreadsheets/d/e/2PACX-1vT0E0Y7txIa2pfBuusA1cd8X5OVhQ_D0qZC8D40KhTU3xB7McsPR2kuB7GH6ncmNT3nfjEYGbscOPp0/pub?gid=0&single=true&output=csv')
df = df.fillna('')
df = df.replace('—', '')
df = df.replace('—*', '')

# First, set up our references to EIMA and newspapers.com
df['EIMA'] = df.apply(lambda row: find_ref(row), axis=1)
df['Newspapers.com'] = df.apply(lambda row: find_ref(row, False), axis=1)

# Normalize dataframe
df['Source'] = df.apply(lambda row: get_true_value(row, 'source'), axis=1)
df['Venue'] = df.apply(lambda row: get_true_value(row, 'venue'), axis=1)
df['Performer'] = df.apply(lambda row: get_true_value(row, 'performer'), axis=1)
df['City'] = df.apply(lambda row: get_true_value(row, 'city'), axis=1)
df['Revue'] = df.apply(lambda row: get_true_value(row, 'revue'), axis=1)

# Extract "node" information
addresses = extract_addresses_dict(df)
revue_comments = get_revue_comments_dict(df)
performer_comments = get_performer_comments_dict(df)
venue_comments = get_venue_comments_dict(df)
city_comments = get_city_comments_dict(df)
edge_comments = get_comments(df, 'Edge Comment', 'Source')
legal_names = get_comments(df, 'Legal name', 'Performer')
ages = get_comments(df, 'Alleged age', 'Performer', lambda x: int(float(x)))
birth_years = get_comments(df, 'Assumed birth year', 'Performer', lambda x: int(float(x)))
eima_links = get_comments(df, 'EIMA', 'Source')
newspaper_links = get_comments(df, 'Newspapers.com', 'Source')

# Edge booleans
blackface_performers = get_comments(df, 'Blackface', 'Performer', lambda x: bool(x))
sepia_performers = get_comments(df, 'Sepia', 'Performer', lambda x: bool(x))
fan_dance_performers = get_comments(df, 'Fan dancer/Sally Rand', 'Performer', lambda x: bool(x))
exotic_dancers = get_comments(df, 'Exotic/erotic/oriental dancer/Gypsy', 'Performer', lambda x: bool(x))
has_image = get_comments(df, 'Has image', 'Performer', lambda x: bool(x))

# Drop filtered data
df['remove'] = df.apply(lambda row: remove(row), axis=1)
df = df.drop(df[df['remove']==True].index)

# Drop columns
df = df.drop(['Category', 'EIMA', 'Newspapers.com', 'Search (newspapers.com)', 'Blackface', 'Sepia', 'Fan dancer/Sally Rand', 'Exotic/erotic/oriental dancer/Gypsy', 'Has image', 'Legal name', 'Alleged age', 'Assumed birth year', 'Search (fulton)', 'Imported from former archive', 'Edge Comment', 'Comment on node: performer', 'Comment on node: venue', 'Comment on node: city', 'Comment on edge: revue', 'Exclude from visualization', 'Address', 'Unsure whether drag artist', 'Source clean', 'Normalized performer', 'Performer first-name', 'Performer last-name', 'Normalized Venue', 'Normalized City', 'Normalized Revue Name', 'Revue name', 'remove'], axis=1)

- 925 East Main Street
- "3 Complete Floor Shows Nightly"...
- "3 Complete Floor Shows Nightly"...
- "3 Complete Floor Shows Nightly"...
- "3 Complete Floor Shows Nightly"...
- "3 Complete Floor Shows Nightly"...
- "3 Complete Floor Shows Nightly"...
- "3 Complete Floor Shows Nightly"...
- "Big Floor Show", "Positively the Funnie...
- "Big Floor Show", "Positively the Funnie...
- "Big Floor Show", "Positively the Funnie...
- "Big Floor Show", "Positively the Funnie...
- "Big Floor Show", "Positively the Funnie...
- "Big Floor Show", "Positively the Funnie...
- "Big Floor Show", "Positively the Funnie...
- "Big Floor Show", "Positively the Funnie...
- "Wednesday night is a gay night . . . wi...
- "San Francisco's Sensational Show"...
- "San Francisco's Sensational Show"...
- "San Francisco's Sensational Show"...
- "San Francisco's Sensational Show"...
- "San Francisco's Sensational Show"...
- "San Francisco's Sensational Show"...
- "San Francisco's Sensational Show"...
- "San Francisco'

In [4]:
# Time to render some files

from jinja2 import Environment, FileSystemLoader
from slugify import slugify
from pathlib import Path
import os

In [5]:
def keyshift(dictionary, key, diff):
    if key in dictionary:
        token = object()
        keys = [token]*(diff*-1) + sorted(dictionary) + [token]*diff
        newkey = keys[keys.index(key)+diff]
        if newkey is token:
            return None
        else:
            return newkey
    else:
        return None


def slugify_column(df, column='Performer'):
    if not column == 'Performer':
        all_values = list(sorted(set([x for x in df[column] if x and not x.startswith('—')])))
    else:
        all_values = list(sorted(set([x for x in df[column] if x]))) # we have to include the ones that start with — here
    values_dict = {}
    for value in all_values:
        done = False
        i = 0
        while not done:
            if i == 0:
                if not slugify(value) in values_dict:
                    values_dict[slugify(value)] = value
                    done = True
                else:
                    i += 1
            else:
                print('Warning: Multiple values with the same value. This should not happen:', value)
                if not f'{slugify(value)}-{i}' in values_dict:
                    values_dict[f'{slugify(value)}-{i}'] = value
                    done = True
                else:
                    i += 1
    return {v: k for k, v in values_dict.items()} # reversed
    

In [29]:
def get_venue_slug(venue):
    return ALL_VENUES[venue]

def get_performer_slug(performer):
    return ALL_PERFORMERS[performer]


def make_calendar(df):
    from collections import OrderedDict

    calendar = OrderedDict()

    years = [pd.to_datetime(x).year for x in df.Date]
    min_year = min(years)
    max_year = max(years)

    years = range(min_year, max_year+1)
    months = range(1,13)
    for year in years:
        if not year in calendar:
            calendar[year] = OrderedDict()
        for month in months:
            # 31 = jan, mar, may, jul, aug, oct, dec
            # 30 = apr, jun, sep, nov
            # 28 = feb
            if not month in calendar[year]:
                calendar[year][month] = OrderedDict()
            if month == 2:
                days = range(1,30) #adding the 29th day despite it not always existing
            elif month in [4, 6, 9, 11]:
                days = range(1,31)
            elif month in [1,3,5,7,8,10,12]:
                days = range(1,32)
            else:
                raise RuntimeError('error')

            for day in days:
                if not day in calendar[year][month]:
                    calendar[year][month][day] = 0

    for date in [pd.to_datetime(date) for date in df.sort_values('Date').Date]:
        calendar[date.year][date.month][date.day] += 1
        
    return calendar


TEMPLATE_DIR = '/Users/kallewesterling/Repositories/kallewesterling/dissertation/drag-data-browser/templates/'
OUTPUT_DIR = '/Users/kallewesterling/Repositories/kallewesterling/dissertation/drag-data-browser/docs/'
BASE_URL = '/drag-data-browser/' # '/docs/' or '/drag-data-browser/'

e = Environment(loader=FileSystemLoader(TEMPLATE_DIR))

ALL_YEARS = list(range(YEAR_RANGE[0], YEAR_RANGE[1]))
ALL_PERFORMERS = slugify_column(df, 'Performer')
ALL_VENUES = slugify_column(df, 'Unique venue')
ALL_CITIES = slugify_column(df, 'City')

e.globals['slugify'] = slugify
e.globals['get_venue_slug'] = get_venue_slug
e.globals['get_performer_slug'] = get_performer_slug
e.globals['ALL_YEARS'] = ALL_YEARS
e.globals['ALL_PERFORMERS'] = ALL_PERFORMERS
e.globals['ALL_VENUES'] = ALL_VENUES
e.globals['ALL_CITIES'] = ALL_CITIES
e.globals['BASE_URL'] = BASE_URL
e.globals['PERFORMER_COMMENTS'] = reverse_comment_dict(performer_comments)
e.globals['VENUE_COMMENTS'] = reverse_comment_dict(venue_comments)
e.globals['CITY_COMMENTS'] = reverse_comment_dict(city_comments)
e.globals['NEWSPAPERS_LINKS'] = newspaper_links
e.globals['EIMA_LINKS'] = eima_links
e.globals['CALENDAR'] = make_calendar(df)

In [37]:
e.globals['str'] = str

In [30]:
mappings = {
    ('Albert', 'Henry', 'Cook'): ['Albert Henry', 'Cook'],
    ('Baby', 'Jan', 'Ray'): ['Baby Jan', 'Ray'],
    ('Edward', 'Albert', 'Crawford'): ['Edward Albert', 'Crawford'],
    ('Frank', 'Barrett', 'Carman'): ['Frank', 'Barrett Carman'],
    ('J.', 'John', 'Howard'): ['J. John', 'Howard'],
    ('James', 'Ernest', 'Allen'): ['James Ernest', 'Allen'],
    ('La', 'Belle', 'Rose'): ['La Belle', 'Rose'],
    ('Nina', 'Mae', 'McKinney'): ['Nina Mae', 'McKinney'],
    ('Nora', 'Corona', 'Hancock'): ['Nora', 'Corona Hancock'],
    ('Ray', 'Erline', 'Garrison'): ['Ray', 'Erline Garrison'],
    ('Richard', 'Snooks', 'Davis'): ['Richard', 'Snooks Davis'],
    ('Sepia', 'Gloria', 'Swanson'): ['Gloria', 'Swanson'],
    ('Sepia', 'Greta', 'Garbo'): ['Greta', 'Garbo'],
    ('Sepia', 'Mae', 'West'): ['Sepia Mae', 'West'],
    ('Sweet', 'Mama', 'Sue'): ['Sweet Mama Sue'],
    ('Thompson', 'Twin', '1'): ['Thompson Twins'],
    ('Thompson', 'Twin', '2'): ['Thompson Twins'],
    ('Titanic', 'Kit', 'Russell'): ['Kit', 'Russell'],
    ('William', 'Lee', 'Becker'): ['William', 'Lee Becker'],
    ('Doran,', 'West,', 'and', 'Doran'): ['Doran, West, and Doran'],
    ('Elsie', 'the', 'Cobra', 'Woman'): ['Elsie the Cobra Woman'],
    ('F', '&', 'G', 'Doran'): ['F. and G.', 'Doran'],
    ('Lynn', 'and', 'De', 'Marco'): ['Lynn and De Marco'],
    ('May', 'West', 'of', 'the', 'East'): ['Sepia Mae', 'West'],
    ('Mother', 'Smother/Sepia', 'Marlene', 'Dietrich'): ['Marlene', 'Dietrich']
}

def make_performer_clippings(list_of_performers):
    _ = {}
    
    def get_comment(name):
        if FILE_COMMENT.search(name):
            return FILE_COMMENT.search(name).groups()[1]
        return ''

    import glob
    ARCHIVE_PNG_PATHS = [x.lower() for x in glob.glob('/Volumes/GoogleDrive/My Drive/Ongoing Projects/Dissertation - Archive/- My own clippings and photos/**/*.png', recursive=True)]
    ARCHIVE_FOLDERS = [x.lower() for x in glob.glob('/Volumes/GoogleDrive/My Drive/Ongoing Projects/Dissertation - Archive/- My own clippings and photos/*')]
    FILE_COMMENT = re.compile(r'(.*) ?\[(.*)\]')

    for performer in list_of_performers:
        found = False

        if not performer:
            continue

        names = performer.split(' ')
        if len(names) == 3:
            if names[1] == 'La' or names[1] == 'Le' or names[1] == 'De' or names[1] == 'Del' or names[1] == 'St.' or names[1] == 'Van' or names[1] == 'Val' or names[1] == 'the':
                names = [f'{names[0]}', f'{names[1]} {names[2]}']
            elif names[1] == '&' or names[1] == 'and':
                names = [f'{names[0]} {names[1]} {names[2]}']
            elif len(names[0]) < 3 and len(names[1]) < 3:
                names = [names[2], f'{names[0]} {names[1]}']
            elif len(names[1]) < 3:
                names = [f'{names[0]} {names[1]}', f'{names[2]}']
            elif names[0] == 'The':
                names = [f'{names[0]} {names[1]}', f'{names[2]}']
            elif names[0] == 'Miss':
                names = [f'{names[0]}', f'{names[1]}']
            elif names[1].startswith('"') or names[1].startswith('('):
                names = [f'{names[1]}', f'{names[2]}']
            else:
                if not mappings.get(tuple(names)):
                    print(names)
                else:
                    names = mappings.get(tuple(names))
        elif len(names) > 3:
            if not mappings.get(tuple(names)):
                print(names)
            else:
                names = mappings.get(tuple(names))

        names = [x.lower() for x in names]
        
        if len(names) == 2:
            search = [x for x in ARCHIVE_FOLDERS if (f'{names[1]}, {names[0]}' in x or f'{names[0]} {names[1]}' in x) and 'performer' in get_comment(x)]
            if len(search) == 1:
                found = search[0]
            else:
                pass # print(f'{names[1]}, {names[0]}', search)
                
        elif len(names) == 1:
            search = [x for x in ARCHIVE_FOLDERS if Path(x).stem.startswith(names[0])]
            if len(search) == 1:
                found = search[0]
            else:
                pass # print(names, search)
        elif len(names) == 3:
            search = [x for x in ARCHIVE_FOLDERS if (f'{names[2]}, {names[0]} {names[1]}' in x or f'{names[0]} {names[1]}' in x) and 'performer' in get_comment(x)]
            if len(search) == 1:
                found = search[0]
            else:
                pass # print(f'{names[2]}, {names[0]} {names[1]}', search)

        if found:
            if not 'performer' in get_comment(Path(found).name) and not 'producer' in get_comment(Path(found).name):
                print(f'Warning: Found matching clippings folder with wrong name, so decoupling: {performer} ≠ {Path(found).name}')
                found = False # if not a performer or producer
        else:
            print(f'Warning: {performer} does not exist in clippings')
            found = ''
            
        _[performer] = found
    
    return _



e.globals['PERFORMER_CLIPPINGS'] = make_performer_clippings([x for x, _ in df.groupby('Performer') if x])
e.globals['PERFORMER_CLIPPINGS'] = {x:y for x,y in e.globals['PERFORMER_CLIPPINGS'].items() if y}







In [31]:
def make_all_clippings():
    import glob
    
    try:
        from IPython.display import clear_output
        ipython = True
    except:
        ipython = False
    files = {}
    ARCHIVE_FOLDERS = [x for x in glob.glob('/Volumes/GoogleDrive/My Drive/Ongoing Projects/Dissertation - Archive/- My own clippings and photos/*')]
    FILE_COMMENT = re.compile(r'(.*) ?\[(.*)\]')
    categories = {None: {None: []}}
    for folder_count, name in enumerate(ARCHIVE_FOLDERS):
        if ipython:
            print(f'{folder_count}/{len(ARCHIVE_FOLDERS)}: {name}')
            clear_output(wait=True)
        if FILE_COMMENT.search(name):
            folder, category = FILE_COMMENT.search(name).groups()
            clean_folder_name = Path(folder).name.strip()
            print(clean_folder_name)
            folder_categories = [x.strip() for x in category.split(';')]
            primary_cat = folder_categories.pop(0)
            if not primary_cat in categories:
                categories[primary_cat] = {None: []}
            if not folder_categories:
                categories[primary_cat][None].append(clean_folder_name)
            else:
                if not '; '.join(folder_categories) in categories[primary_cat]:
                    categories[primary_cat]['; '.join(folder_categories)] = []
                categories[primary_cat]['; '.join(folder_categories)].append(clean_folder_name)
        else:
            categories[None].append(Path(name).name.strip())

        p = Path(name)
        files[p.name] = [x.name for x in p.glob('**/*') if x.is_file() and not x.name.startswith('.')]
        
    return categories, files
    
e.globals['ALL_CLIPPINGS'], e.globals['ALL_CLIPPINGS_FILES'] = make_all_clippings()

West, Sepia Mae


In [32]:
e.globals['ALL_CLIPPINGS']

{None: {None: []},
 'performer': {None: ['Dunn, Francis',
   'Byng, Douglas',
   'Lily of the Valley',
   'Miss Guy',
   'Twig the Wonderkid?',
   'Hilton, Spikey',
   'Lamar, Sophia',
   'DeBarge, Flotilla',
   'Afrodite',
   'Porkpie, Johnny',
   'Swan, Paul',
   'Lady Rizo',
   'Tish',
   'Isaacs Mencken, Adah',
   'Tigger!',
   'Bolts, Rusty',
   'Moon, Waxie',
   'de Ville, Louis',
   'Walker, Johnnie',
   'Boots, Twinky',
   'de Lune, Esquire',
   'Lucky Charming',
   'Thompson, Lydia',
   'Douglas, Lewd Alfred',
   'Hoover, Lou Henry',
   'Gosselin. Jon',
   'Panic, Johnny',
   'Fire, Indy',
   'Patrick the All-American Stripper',
   'Sabrina, Flawless',
   'Harder, Chris',
   'Knife, Matt',
   'Sweat, Brief',
   'Valentino, Broody',
   'Ferro, Chad',
   'Villain, Amoxie',
   'Zorita',
   'Pontani, Angie',
   'Bollocks, Castor',
   'Noir, Jet',
   'Finnell, Carrie',
   'Love, Bunny',
   'Appeal, Fem',
   'World Famous *BOB*',
   'Bois, Curt',
   'Dean, Joshua',
   'Smith, Kim Da

In [40]:
performer_template = e.get_template('performer.html')


performers_active_dates_overview = {}

for performer, row in df.groupby(['Performer']):
    if not performer:
        continue

    html_file = os.path.join(OUTPUT_DIR, 'performer', ALL_PERFORMERS[performer], 'index.html')
    if not Path(html_file).parent.exists():
        Path(html_file).parent.mkdir(parents=True)
    
    full_venues = list(set([x for x in row['Unique venue'] if x and not x.startswith('—')]))
    full_venues = {x: ALL_VENUES[x] for x in full_venues}

    cities = list(set([x for x in row['City'] if x]))
    years_active = list(set(([x.year for x in pd.to_datetime(row['Date'])])))
    appears_at = list(set([x for x in row['Venue'] if x]))
    
    performers_active_dates_overview[performer] = years_active

    next_performer = keyshift(ALL_PERFORMERS, performer, +1)
    prev_performer = keyshift(ALL_PERFORMERS, performer, -1)
    if next_performer:
        next_performer = {'label': next_performer, 'url': BASE_URL + 'performer/' + ALL_PERFORMERS[next_performer]}
    if prev_performer:
        prev_performer = {'label': prev_performer, 'url': BASE_URL + 'performer/' + ALL_PERFORMERS[prev_performer]}
    
    text = performer_template.render(data={
        'name': performer,
        'years_active': years_active,
        'full_venues': full_venues,
        'cities': cities,
        'in_blackface': blackface_performers.get(performer, {}),
        'sepia_performer': sepia_performers.get(performer, {}),
        'fan_dancer': fan_dance_performers.get(performer, {}),
        'exotic_dancer': exotic_dancers.get(performer, {}),
        'images': has_image.get(performer, {}),
        'comments': e.globals['PERFORMER_COMMENTS'].get(performer, {}),
        'legal_name': legal_names.get(performer, {}),
        'age': ages.get(performer, {}),
        'birth_year': birth_years.get(performer, {}),
        'relative': {
            'next': next_performer,
            'prev': prev_performer
        }
    })

    with open(html_file, 'w+') as f:
        f.write(text)
        
        
        
########################


venue_template = e.get_template('venue.html')


venues_active_dates_overview = {}

for venue, row in df.groupby(['Unique venue']):
    if not venue or venue.startswith('—'):
        continue

    html_file = os.path.join(OUTPUT_DIR, 'venue', ALL_VENUES[venue], 'index.html')
    if not Path(html_file).parent.exists():
        Path(html_file).parent.mkdir(parents=True)

    associated_performers = list(set([x for x in row['Performer'] if x]))
    associated_performers = {x: ALL_PERFORMERS[x] for x in associated_performers}
    years_active = list(set(([x.year for x in pd.to_datetime(row['Date'])])))
    
    venues_active_dates_overview[venue] = years_active
    
    next_venue = keyshift(ALL_VENUES, venue, +1)
    prev_venue = keyshift(ALL_VENUES, venue, -1)
    if next_venue:
        next_venue = {'label': next_venue, 'url': BASE_URL + 'venue/' + ALL_VENUES[next_venue]}
    if prev_venue:
        prev_venue = {'label': prev_venue, 'url': BASE_URL + 'venue/' + ALL_VENUES[prev_venue]}
    
    text = venue_template.render(data={
        'name': venue,
        'years_active': years_active,
        'associated_performers': associated_performers,
        'addresses': addresses.get(venue, {}),
        'comments': venue_comments.get(venue, {}),
        'relative': {
            'next': next_venue,
            'prev': prev_venue,
        }
    })
    
    with open(html_file, 'w+') as f:
        f.write(text)
        


home_template = e.get_template('home.html')

html_file = os.path.join(OUTPUT_DIR, 'index.html')
if not Path(html_file).parent.exists():
    Path(html_file).parent.mkdir(parents=True)

text = home_template.render(data={
    'xxx': 'xxx'
})

with open(html_file, 'w+') as f:
    f.write(text)







import itertools
MIN = min(list(itertools.chain.from_iterable(venues_active_dates_overview.values())))
MAX = max(list(itertools.chain.from_iterable(venues_active_dates_overview.values())))
        
venue_list_template = e.get_template('venue-list.html')

html_file = os.path.join(OUTPUT_DIR, 'venue', 'index.html')
if not Path(html_file).parent.exists():
    Path(html_file).parent.mkdir(parents=True)

text = venue_list_template.render(data={
    'venues_active_dates_overview': venues_active_dates_overview,
    'venues_years_range': [x for x in range(MIN,MAX)]
})

with open(html_file, 'w+') as f:
    f.write(text)







import itertools
MIN = min(list(itertools.chain.from_iterable(performers_active_dates_overview.values())))
MAX = max(list(itertools.chain.from_iterable(performers_active_dates_overview.values())))
        
performer_list_template = e.get_template('performer-list.html')

html_file = os.path.join(OUTPUT_DIR, 'performer', 'index.html')
if not Path(html_file).parent.exists():
    Path(html_file).parent.mkdir(parents=True)

text = performer_list_template.render(data={
    'performers_active_dates_overview': performers_active_dates_overview,
    'performer_years_range': [x for x in range(MIN,MAX)]
})

with open(html_file, 'w+') as f:
    f.write(text)

    
    
    

template = e.get_template('clippings.html')

html_file = os.path.join(OUTPUT_DIR, 'clippings', 'index.html')
if not Path(html_file).parent.exists():
    Path(html_file).parent.mkdir(parents=True)

text = template.render(data={
    
})

with open(html_file, 'w+') as f:
    f.write(text)

    
    
    
    
    
    
calendar_template = e.get_template('calendar.html')

html_file = os.path.join(OUTPUT_DIR, 'calendar', 'index.html')
if not Path(html_file).parent.exists():
    Path(html_file).parent.mkdir(parents=True)

text = calendar_template.render(data={
    
})

with open(html_file, 'w+') as f:
    f.write(text)


In [11]:
sources = list(set(sorted(list(set([x for x in df['Source']])))))
for source in sources:
    s = re.split(', (January|February|March|April|May|June|July|August|September|October|November|December)', source)[0]
    if ',' in s or '(' in s:
        if not s.startswith('-') and not s.startswith('Letter from') and not s.startswith('Telegram from'):
            print(s)

Variety, Feb XX, 1933, page unknown
Promotional Postcard, Brooklyn Academy of Music, 1949 (BAM Archives)
Troy NY Times Record, date unclear [very likely end of February 1940], 8
Berwyn IL Life, October 9, 1936, 10 (523669010)
Times Daily News (NC)
Whittier News, 8 (Paul Harrison, "In New York", 627840200)
The Harlequin's Installation Revel, at the Masquers
Albany NY Times-Union, date unknown
Tristan Cabello, "Queer Bronzeville," OutHistory, no date
