In [3]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [4]:
#database connection settings
import psycopg2

db_name = "traviato_development"
db_host = "localhost"
db_port = "5432"
db_user = "lievgarcia"
db_pwd = "traviato81"

conn = psycopg2.connect(database=db_name, user=db_user, password=db_pwd, host=db_host, port=db_port)

In [5]:
######################## libraries ########################

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import rc
import matplotlib.ticker as ticker
import numpy as np

import sys
sys.path.append('../../')

from utils.utility_functions import label_top
from utils.utility_functions import get_last_name
from utils.utility_functions import make_dash_zero
# from utils.utility_functions import shorten_opera_name

##################### CHARTING OPTIONS #####################

# rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
rc('font',**{'family':'serif','serif':['Palatino']})

rc('text', usetex=True)
from cycler import cycler
mpl.rcParams['axes.prop_cycle'] = cycler(color='bgrcmyk')

colors = ['#8a170f', '#8a540f', '#828a0f', '#458a0f', '#0f8a17', '#0f8a54', '#0f828a', '#0f458a', '#170f8a', '#540f8a', '#8a0f82', '#8a0f45', '#8a170f']


##################### SOURCES OPTIONS #####################

archival_sources = ['Metropolitan Opera', 'Opera Scotland', 'Royal Opera House', 'Teatro alla Scala', 'Wiener Staatsoper', 
                    'Chicago Lyric Opera','San Francisco Opera','Czech National Theatre','Glyndebourne','Salzburg Festival',
                    'Sweden Opera House','National Opera de Paris','Opera Comique']
commercial_sources_world = ['bachtrack.com','operabase.com','operacritic.com']
commercial_sources_uk = ['Concert-Diary','U.K. Theatre Web']
government_sources = ['National Central Opera Service Reports', 'Opera America']

##############################################################################

In [None]:
import pandas as pd

columns=['source_id', 'source_name', 'composer', 'work', 'country', 'state', 'city', 'theatre', 'date_start', 'date_end', 'premiere_date', 'performance_count']

cursor = conn.cursor()  
query = "SELECT s.id as source_id, " + \
        "       s.name as source_name, " + \
        "       coalesce(c.name, l.composer) as composer, " + \
        "       coalesce(w.name, l.work) as work, " + \
        "       l.country, " + \
        "       l.state, " + \
        "       l.city, " + \
        "       l.theatre, " + \
        "       l.date_start, " + \
        "       l.date_end, " + \
        "       w.premiere_date, " + \
        "       l.performance_count " + \
        "from sources s " + \
        "INNER JOIN listings l on s.id = l.source_id " + \
        "LEFT JOIN listing_mappers lm ON l.map_key = lm.map_key " + \
        "LEFT JOIN works w ON lm.work_uri = w.uri " + \
        "LEFT JOIN composers c ON w.composer_id = c.id " + \
        "ORDER BY s.id, l.country, l.state, l.city"
        
cursor.execute(query)

mdf = pd.DataFrame(cursor.fetchall(), columns=columns)
mdf['year'] = mdf.apply(lambda row: row['date_start'].year, axis=1)
mdf['decade'] = mdf.apply(lambda row: 10*int(row['year']/10), axis=1)
mdf[['premiere_date']] = mdf[['premiere_date']].fillna(value=0)
mdf['work_decade'] = mdf.apply(lambda row: 10*int(row['premiere_date']/10), axis=1)

In [None]:
#################### top composers share of repertoire ####################

sources = archival_sources
num_sources = len(sources)

o_df = mdf.copy()
o_df = o_df[o_df['source_name'].isin(sources)]
o_df = o_df[o_df['work'] != 'Unknown']
o_df = o_df[o_df['composer'] != '']
o_df = o_df[o_df['year'] < 2019]

for source in sources:
    dat = o_df[o_df['source_name']==source]
    c_df = dat.groupby(['composer'])
    c_df = c_df.agg({'performance_count': np.sum })
    top_composers = list(c_df.sort_values(by=['performance_count'], ascending=False).index.values[0:5])    
    o_df['composer'] = o_df.apply(lambda row: label_top(row['source_name'], source, row['composer'], top_composers), axis=1)
    
c_df = o_df.groupby(['source_name', 'year', 'composer']).agg({'performance_count': np.sum })
c_df = c_df.groupby(level = ['source_name', 'year']).transform(lambda x: x/x.sum())
c_df = c_df.reset_index(level=['year', 'composer'])

fig = plt.figure(figsize=(15, 25))

for num, source in enumerate(sources):
    
    dat = c_df.loc[source]
    dat = dat[dat['composer']=='Top']
    dates = dat['year'].values

    ax = plt.subplot(num_sources, 1, num + 1)
    plt.bar(dates, dat['performance_count'], label=source, color=colors[num], linewidth=3)
    ax.set_title(source)
    
    axes = plt.gca()
    axes.set_ylim([0,1])
    axes.axhline(y=.5, dashes=[1, 1], color='#000000')
    axes.yaxis.set_major_locator(ticker.MultipleLocator(0.5))

# plt.tight_layout()

###############################################################