### Creates a csv file containing whether a composer was alive for each work by him/her.

In [1]:
import math
import numpy as np
import pandas as pd

path_to_data = 'data/'
w_df = pd.read_csv(path_to_data + 'works_clean.csv')
c_df = pd.read_csv(path_to_data + 'comp_dates.csv')

#### Pre-processing

In [2]:
w_df = w_df.drop(columns=["Unnamed: 0","workTitle", "programID"])
c_df = c_df.rename(columns = {"Unnamed: 0": "composerName"})
c_df = c_df.set_index("composerName")

#### Create a dictionary of {"composerName" : deathYear} for faster lookup

In [3]:
comp = {}

for composer, death in c_df.iterrows():
    comp[composer] = death[1]

#### Insert "living" column in df

In [4]:
w_df.insert(2, 'living', '')

#### Determining whether a composer is living in given season:

<em>A living composer was considered a composer that died in the last 5 years from the performance date)</em>

In [5]:
def y_since_death (year, composer, dict_composers):
    '''
    Determines whether composer was living.
    Returns 0 if living, else the number of years since death.
    '''
    if composer in dict_composers:
        death_year = dict_composers[composer]

        if math.isnan(death_year):
            return float('nan')

        if death_year == 9999 or (year - death_year) < 0:
            return 0
        elif not math.isnan(death_year):
            return year - death_year
    else:
        return float('nan')

In [6]:
for index, rows in w_df.iterrows():

    years_death = y_since_death(int(rows[1][:4]), rows[0], comp)

    if years_death > 5:
        w_df.living[index] = "N"
    elif years_death <= 5:
        w_df.living[index] = "Y"

#### Write to file

In [7]:
w_df.to_csv(path_to_data + "works_composer_living.csv")