In [7]:
import pandas as pd
import gspread
from gspread_dataframe import set_with_dataframe
from oauth2client.service_account import ServiceAccountCredentials

# Auth
scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('../gspread_creds.json', scope)
client = gspread.authorize(creds)

In [8]:
# Open the sheet
worksheet = client.open("Tenenbaum").worksheet("Sheet1 expanded")

# Get all records as list of dicts
records = worksheet.get_all_records()

# Convert to DataFrame
tenenbaum = pd.DataFrame(records)

bool_cols = ['mentions a subsidiary', 
             'mentions stock ownership',
             'affiliated company is AG',
             'affiliated company is GmbH',
             'mentions an affiliated company',
             'mentions a plant/office/branch', 
             'is a subsidiary of a German firm',
             'other types of agreement']
tenenbaum[bool_cols] = tenenbaum[bool_cols].apply(lambda x: x == "TRUE")

In [9]:
tenenbaum

Unnamed: 0,US firm name,Master US firm name,affiliated German firm name,Master German firm name,notes,mentions a subsidiary,mentions stock ownership,affiliated company is AG,affiliated company is GmbH,mentions an affiliated company,mentions a plant/office/branch,is a subsidiary of a German firm,other types of agreement
0,Ford Motor Company,Ford Motor Company,Ford Motor Co. A. G.,Ford-Werke AG,,True,False,True,False,False,True,False,False
1,Ford Motor Company,Ford Motor Company,Credit A. G. für Fordfahrzeuge,Credit Aktiengesellschaft für Ford-Fahrzeuge.,,True,False,True,False,False,True,False,False
2,General Motors Corporation,General Motors Corporation (Overseas Operations),Adam Opel A.G.,Adam Opel Aktiengesellschaft,,True,True,True,False,False,False,False,False
3,General Motors Corporation,General Motors Corporation (Overseas Operations),Opel Automobilversicherungs A.G.,Opel Automobil Versicherungs AG,,True,True,True,False,False,False,False,False
4,General Motors Corporation,General Motors Corporation (Overseas Operations),Continentale Gummiwerke A.G.,Continentale Gummiwerke A.G.,,True,True,True,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,Libbey-Owens-Ford Glass Co.,Libbey-Owens-Ford Glass Company,Deutsche Libbey -Owens Gesellschaft für machin...,Deutsche Libbey-Owens Gesellschaft für machine...,The LIBBEY-OWENS-FORD GLASS Co. owns the Cie. ...,True,True,True,False,False,False,False,False
140,Libbey-Owens-Ford Glass Co.,Libbey-Owens-Ford Glass Company,Deutsche Tafelglas A. G.,Deutsche Tafelglas Aktiengesellschaft (Detag).,The LIBBEY-OWENS-FORD GLASS Co. owns the Cie. ...,True,True,True,False,False,False,False,False
141,The Norton Company,Norton Company,Deutsche Norton G.m.b.H.,"Deutsche Norton-Gesellschaft, m.b.H.",The NORTON COMPANY (abrasives ) owns the Deuts...,True,False,False,True,False,False,False,False
142,Amber Mines Inc.,Amber Mines Inc.,Preussische Bergwerks- und Hutten Gesellschaft,Preussische Bergwerks- und Hutten Gesellschaft,The PREUSSISCHE Bergwerks- UND HUTTEN GESELLSC...,False,False,False,True,False,False,True,False


In [10]:
def assign_link_type(row):
    if row['mentions a subsidiary']:
        link_type = 'USO'
    elif row['mentions stock ownership']:
        link_type = 'USOP'
    elif row['mentions an affiliated company']:
        link_type = 'USC'
    elif row['other types of agreement']:
        link_type = 'USC'
    elif row['is a subsidiary of a German firm']:
        link_type = 'DEO'
    else:
        link_type = 'Others'

    return link_type

def assign_org_type(row):
    if row['affiliated company is AG']:
        org_type = 'AG'
    elif row['affiliated company is GmbH']:
        org_type = 'GmbH'
    else:
        org_type = 'Others'

    return org_type

tenenbaum['link_type'] = tenenbaum.apply(assign_link_type, axis=1)
tenenbaum['org_type'] = tenenbaum.apply(assign_org_type, axis=1)

In [11]:
tenenbaum_to_csv = tenenbaum.rename(columns={'Master US firm name': 'US Company', 
                                             'Master German firm name': 'German subsidiary',
                                             'US firm name': 'US firm (as in book)',
                                             'affiliated German firm name': 'Affiliated German firm (as in book)',
                                             'notes': 'Description'})
tenenbaum_to_csv = tenenbaum_to_csv[['German subsidiary', 'US Company', 'link_type', 'org_type', 'US firm (as in book)', 'Affiliated German firm (as in book)', 'Description']]

In [12]:

tenenbaum_to_csv.to_csv('../output/tenenbaum_cleaned.csv', index=False)