In [8]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [3]:
#database connection settings
import psycopg2

db_name = "traviato_development"
db_host = "localhost"
db_port = "5432"
db_user = "lievgarcia"
db_pwd = "traviato81"

conn = psycopg2.connect(database=db_name, user=db_user, password=db_pwd, host=db_host, port=db_port)

In [4]:
######################## libraries ########################

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import rc
import matplotlib.ticker as ticker
import numpy as np

import sys
sys.path.append('../../')

from utils.utility_functions import label_top
from utils.utility_functions import get_last_name
from utils.utility_functions import make_dash_zero
from utils.utility_functions import make_dash_zero_float
from utils.utility_functions import get_season

# from utils.utility_functions import shorten_opera_name

##################### CHARTING OPTIONS #####################

# rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
rc('font',**{'family':'serif','serif':['Palatino']})

rc('text', usetex=False)
# rc.rcParams['text.usetex']=True
# rc.rcParams['text.latex.unicode']=True

from cycler import cycler
mpl.rcParams['axes.prop_cycle'] = cycler(color='bgrcmyk')

colors = ['#8a170f', '#8a540f', '#828a0f', '#458a0f', '#0f8a17', '#0f8a54', '#0f828a', '#0f458a', '#170f8a', '#540f8a', '#8a0f82', '#8a0f45', '#8a170f']


##################### SOURCES OPTIONS #####################

archival_sources = ['Metropolitan Opera', 'Opera Scotland', 'Royal Opera House', 'Teatro alla Scala', 'Wiener Staatsoper', 
                    'Chicago Lyric Opera','San Francisco Opera','Czech National Theatre','Glyndebourne','Salzburg Festival',
                    'Sweden Opera House','National Opera de Paris','Opera Comique']
commercial_sources_world = ['bachtrack.com','operabase.com','operacritic.com']
commercial_sources_uk = ['Concert-Diary','U.K. Theatre Web']
government_sources = ['National Central Opera Service Reports', 'Opera America']

##############################################################################

In [68]:
import pandas as pd

columns=['source_id', 'source_name', 'composer', 'work', 'country', 'state', 'city', 'theatre', 'date_start', 'date_end', 'premiere_date', 'performance_count']

cursor = conn.cursor()  
query = "SELECT s.id as source_id, " + \
        "       s.name as source_name, " + \
        "       coalesce(c.name, l.composer) as composer, " + \
        "       coalesce(w.name, l.work) as work, " + \
        "       l.country, " + \
        "       l.state, " + \
        "       l.city, " + \
        "       l.theatre, " + \
        "       l.date_start, " + \
        "       l.date_end, " + \
        "       w.premiere_date, " + \
        "       l.performance_count " + \
        "from sources s " + \
        "INNER JOIN listings l on s.id = l.source_id " + \
        "LEFT JOIN listing_mappers lm ON l.map_key = lm.map_key " + \
        "LEFT JOIN works w ON lm.work_uri = w.uri " + \
        "LEFT JOIN composers c ON w.composer_id = c.id " + \
        "ORDER BY s.id, l.country, l.state, l.city"
        
cursor.execute(query)

mdf = pd.DataFrame(cursor.fetchall(), columns=columns)
mdf['year'] = mdf.apply(lambda row: get_season(row), axis=1)
# mdf['year'] = mdf.apply(lambda row: row['date_start'].year, axis=1)
mdf['decade'] = mdf.apply(lambda row: 10*int(row['year']/10), axis=1)
mdf[['premiere_date']] = mdf[['premiere_date']].fillna(value=0)
mdf['work_decade'] = mdf.apply(lambda row: 10*int(row['premiere_date']/10), axis=1)

In [69]:
#################### Works HHI ####################

sources = archival_sources
# print(sources)
num_sources = len(sources)

o_df = mdf.copy()
# o_df = o_df[o_df['source_name']=='National Opera de Paris']
o_df = o_df[o_df['source_name'].isin(sources)]
o_df = o_df[o_df['source_name'] != 'Opera Comique']
o_df = o_df[o_df['work'] != 'Unknown']
o_df = o_df[o_df['composer'] != '']
o_df = o_df[o_df['year'] < 2019]

hhi_df = o_df.copy()
hhi_df = hhi_df[['source_name', 'work', 'year', 'decade']].drop_duplicates()
hhi_df = hhi_df.groupby(['source_name', 'decade']).agg({'work':'count', 'year': 'nunique'})
hhi_df['works_year'] = hhi_df['work']/hhi_df['year']
hhi_df['hhi_min'] = hhi_df.apply(lambda x: x['work']*((100/x['work'])**2), axis=1)
hhi_df['hhi_max'] = hhi_df.apply(lambda x: x['works_year']*((100/x['works_year'])**2), axis=1)
hhi_df

c_df = o_df.groupby(['source_name', 'work', 'decade']).agg({'performance_count': np.sum })
c_df['perc'] = c_df.groupby(level = ['source_name', 'decade']).transform(lambda x: x/x.sum())
c_df['perc'] = c_df['perc']*100
c_df['hhi'] = c_df.apply(lambda x: x['perc']**2, axis=1)
c_df = c_df.reset_index(level=['work'])
c_df = c_df.groupby(['source_name', 'decade']).agg({'hhi': 'sum'})

df = c_df.join(hhi_df, how='inner')
df = df.reset_index(level=['source_name', 'decade'])
df['hhi_range'] = df.apply(lambda x: 100*(x['hhi']-x['hhi_min'])/(max(1, x['hhi_max']-x['hhi_min'])), axis=1)
df = df[~((df["source_name"]=='Teatro alla Scala')&(df["decade"]==1940))]


df = df.pivot(index='decade', columns='source_name', values=['hhi_range'])
df = df.fillna(value=0)

decades = df.columns.get_level_values(1).get_values().tolist()
decades = [str(d) for d in decades]

print('r'*len(decades))
print('\\textbf{Decade} & \\textbf{' + ' & \\textbf{'.join(decades) + ' \\\ \midrule ')

for idx,row in df.iterrows():
    values = row.values.tolist()
    values = [make_dash_zero(c) for c in values ]
    print(str(row.name) + ' & ' + ' & '.join(values) + " \\\\")

# ###############################################################

rrrrrrrrrrrr
\textbf{Decade} & \textbf{Chicago Lyric Opera & \textbf{Czech National Theatre & \textbf{Glyndebourne & \textbf{Metropolitan Opera & \textbf{National Opera de Paris & \textbf{Opera Scotland & \textbf{Royal Opera House & \textbf{Salzburg Festival & \textbf{San Francisco Opera & \textbf{Sweden Opera House & \textbf{Teatro alla Scala & \textbf{Wiener Staatsoper \\ \midrule 
1770 & - & - & - & - & - & - & - & - & - & 49 & - & - \\
1780 & - & - & - & - & - & - & - & - & - & 47 & - & - \\
1790 & - & - & - & - & - & - & - & - & - & 41 & - & - \\
1800 & - & - & - & - & - & - & - & - & - & 67 & - & - \\
1810 & - & - & - & - & - & - & - & - & - & 86 & - & - \\
1820 & - & - & - & - & - & - & - & - & - & 76 & - & - \\
1830 & - & - & - & - & - & - & - & - & - & 62 & - & - \\
1840 & - & - & - & - & - & - & - & - & - & 53 & - & - \\
1850 & - & - & - & - & - & - & - & - & - & 51 & - & - \\
1860 & - & - & - & - & - & - & - & - & - & 64 & - & 118 \\
1870 & - & - & - & - & - & - & - & - & - 

In [71]:
#################### Works HHI ####################

sources = archival_sources
# print(sources)
num_sources = len(sources)

o_df = mdf.copy()
# o_df = o_df[o_df['source_name']=='National Opera de Paris']
o_df = o_df[o_df['source_name'].isin(sources)]
o_df = o_df[o_df['source_name'] != 'Opera Comique']
o_df = o_df[o_df['work'] != 'Unknown']
o_df = o_df[o_df['composer'] != '']
o_df = o_df[o_df['year'] < 2019]

hhi_df = o_df.copy()
hhi_df = hhi_df[['source_name', 'work', 'year', 'decade']].drop_duplicates()
hhi_df = hhi_df.groupby(['source_name', 'decade']).agg({'work':'count', 'year': 'nunique'})
hhi_df['works_year'] = hhi_df['work']/hhi_df['year']
hhi_df['hhi_min'] = hhi_df.apply(lambda x: x['work']*((100/x['work'])**2), axis=1)
hhi_df['hhi_max'] = hhi_df.apply(lambda x: x['works_year']*((100/x['works_year'])**2), axis=1)
hhi_df

c_df = o_df.groupby(['source_name', 'work', 'decade']).agg({'performance_count': np.sum })
c_df['perc'] = c_df.groupby(level = ['source_name', 'decade']).transform(lambda x: x/x.sum())
c_df['perc'] = c_df['perc']*100
c_df['hhi'] = c_df.apply(lambda x: x['perc']**2, axis=1)
c_df = c_df.reset_index(level=['work'])
c_df = c_df.groupby(['source_name', 'decade']).agg({'hhi': 'sum'})

df = c_df.join(hhi_df, how='inner')
df = df.reset_index(level=['source_name', 'decade'])
df['hhi_range'] = df.apply(lambda x: 100*(x['hhi']-x['hhi_min'])/(max(1, x['hhi_max']-x['hhi_min'])), axis=1)
df = df[~((df["source_name"]=='Teatro alla Scala')&(df["decade"]==1940))]


df = df.pivot(index='decade', columns='source_name', values=['hhi_range'])
# df = df.fillna(value=0)

In [73]:
df.describe()

Unnamed: 0_level_0,hhi_range,hhi_range,hhi_range,hhi_range,hhi_range,hhi_range,hhi_range,hhi_range,hhi_range,hhi_range,hhi_range,hhi_range
source_name,Chicago Lyric Opera,Czech National Theatre,Glyndebourne,Metropolitan Opera,National Opera de Paris,Opera Scotland,Royal Opera House,Salzburg Festival,San Francisco Opera,Sweden Opera House,Teatro alla Scala,Wiener Staatsoper
count,7.0,14.0,9.0,14.0,5.0,7.0,8.0,10.0,10.0,25.0,7.0,16.0
mean,9.71999,115.598502,42.6361,68.781912,21.258802,43.481339,40.954051,30.526279,21.883892,66.441609,14.980848,101.425794
std,5.163074,27.37949,29.705982,26.1401,9.585389,14.615089,15.620167,12.127783,12.595308,22.730709,5.053392,25.385564
min,3.550946,61.837114,20.177351,35.885867,8.011974,27.825374,19.266494,21.141975,8.25329,30.961652,6.239016,62.949204
25%,5.747298,97.644082,24.165281,50.288013,17.996432,34.236589,30.400697,23.894278,11.082701,49.71743,12.634653,88.484525
50%,9.546667,114.056661,26.620314,65.717235,20.588389,40.702909,39.472627,26.892605,19.850868,67.621827,15.298067,94.497678
75%,13.081257,131.165302,43.024672,82.651188,25.763372,47.771802,49.81461,32.016681,29.657022,86.687478,18.566476,110.279552
max,17.285207,165.104331,96.401888,133.512872,33.933843,71.824307,65.462567,62.974604,43.340914,97.902373,20.926596,153.48102
