In [3]:
import pandas as pd
import gspread
from gspread_dataframe import set_with_dataframe
from oauth2client.service_account import ServiceAccountCredentials

# Auth
scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('../gspread_creds.json', scope)
client = gspread.authorize(creds)

In [4]:
# Open the sheet
worksheet = client.open("Moodys 1934").worksheet("reviewed-expanded")

# Get all records as list of dicts
records = worksheet.get_all_records()

# Convert to DataFrame
moodys34 = pd.DataFrame(records)

bool_cols = ['mentions a subsidiary', 
             'mentions stock ownership',
             'affiliated company is AG',
             'affiliated company is GmbH',
             'mentions an affiliated company',
             'mentions a plant/office/branch', 
             'is a subsidiary of a German firm',
             'other types of agreement']
moodys34[bool_cols] = moodys34[bool_cols].apply(lambda x: x == "TRUE")
moodys34['affiliated German firm name'] = moodys34['affiliated German firm name'].replace('NA', '')

In [5]:
def assign_link_type(row):
    if row['mentions a subsidiary']:
        link_type = 'USO'
    elif row['mentions stock ownership']:
        link_type = 'USOP'
    elif row['mentions an affiliated company']:
        link_type = 'USC'
    elif row['other types of agreement']:
        link_type = 'USC'
    elif row['is a subsidiary of a German firm']:
        link_type = 'DEO'
    else:
        link_type = 'Others'

    return link_type

def assign_org_type(row):
    if row['affiliated company is AG']:
        org_type = 'AG'
    elif row['affiliated company is GmbH']:
        org_type = 'GmbH'
    else:
        org_type = 'Others'

    return org_type

moodys34['link_type'] = moodys34.apply(assign_link_type, axis=1)
moodys34['org_type'] = moodys34.apply(assign_org_type, axis=1)

In [6]:
moodys34_cleaned = moodys34.rename(columns={'Master US firm name': 'US Company', 
                                           'Master German firm name': 'German subsidiary',
                                           'affiliated German firm name': 'Affiliated German firm (as in book)',
                                           'US company name': 'US firm (as in book)',
                                           'notes': 'Description'})
moodys34_cleaned = moodys34_cleaned[['German subsidiary', 'US Company', 'link_type', 'org_type', 'US firm (as in book)', 'Affiliated German firm (as in book)', 'Description']]

In [7]:
moodys34_cleaned.to_csv('../output/moodys34_cleaned.csv', index=False)