In [79]:
import pandas as pd
import gspread
from gspread_dataframe import set_with_dataframe
from oauth2client.service_account import ServiceAccountCredentials

# Auth
scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('../gspread_creds.json', scope)
client = gspread.authorize(creds)

### TFR-500

In [80]:
# Open the sheet
worksheet = client.open("tfr500_summarized").worksheet("classified")

# Get all records as list of dicts
records = worksheet.get_all_records()

# Convert to DataFrame
tfr500 = pd.DataFrame(records)

# one-hot encoding investment types # for later aggregating purpose
type_expanded = pd.get_dummies(tfr500["Type of investment"], prefix="type")
tfr500 = pd.concat([tfr500, type_expanded], axis=1)

In [81]:
tfr500_parent_list = tfr500.groupby('American Owner - Name').agg({
    'Name of Foreign Business': list,
    **{col: 'any' for col in type_expanded}
}).reset_index()
tfr500_parent_list = tfr500_parent_list[tfr500_parent_list['American Owner - Name']!='']
tfr500_parent_list = tfr500_parent_list.rename(columns={'American Owner - Name': 'Master US firm name',
                                                        'Name of Foreign Business': 'affiliated German firms'})
tfr500_parent_list = tfr500_parent_list[['Master US firm name']]

In [82]:
tfr500_parent_list.to_excel('output/trf500_parent_list.xlsx', index=False)

### Moody's 1932

In [83]:
# Open the sheet
worksheet = client.open("Moodys 1932").worksheet("reviewed-expanded")

# Get all records as list of dicts
records = worksheet.get_all_records()

# Convert to DataFrame
moodys32 = pd.DataFrame(records)

bool_cols = ['mentions a subsidiary', 
             'mentions stock ownership',
             'subsidiary is AG',
             'subsidiary is GmbH',
             'mentions an affilated company',
             'mentions a plant/office/branch', 
             'is a subsidiary of a German firm',
             'other types of agreement']
moodys32[bool_cols] = moodys32[bool_cols].apply(lambda x: x == "TRUE")
moodys32['affiliated German firm name'] = moodys32['affiliated German firm name'].replace('NA', '')

In [84]:
moodys32_parent_list = moodys32.groupby("Master US firm name").agg(
    {
        "affiliated German firm name": list,
        "Master German firm name": list,
        **{col: "any" for col in bool_cols}
    }
).reset_index()

moodys32_parent_list = moodys32_parent_list[['Master US firm name']]

In [85]:
tfr500_moodys32 = tfr500_parent_list.merge(moodys32_parent_list, on='Master US firm name', how='outer', indicator=True)

tfr500_moodys32['TFR-500'] = tfr500_moodys32['_merge'].isin(['left_only', 'both'])
tfr500_moodys32['Moodys 32'] = tfr500_moodys32['_merge'].isin(['right_only', 'both'])

tfr500_moodys32 = tfr500_moodys32.drop(columns='_merge')

In [86]:
tfr500_moodys32

Unnamed: 0,Master US firm name,TFR-500,Moodys 32
0,Addressograph-Multigraph Corporation,True,True
1,American -Austrian Magnesite Corporation,True,False
2,American Bemberg Corp.,False,True
3,American Can Company,False,True
4,American Cyanamid Company,True,False
...,...,...,...
201,Western Electric Export Corporation,True,False
202,Westhold Corporation,True,False
203,"William R. Warner & Co., Inc.",True,False
204,Witroth Corporation,True,False


### Moody's 1934

In [87]:
# Open the sheet
worksheet = client.open("Moodys 1934").worksheet("reviewed-expanded")

# Get all records as list of dicts
records = worksheet.get_all_records()

# Convert to DataFrame
moodys34 = pd.DataFrame(records)

bool_cols = ['mentions a subsidiary', 
             'mentions stock ownership',
             'subsidiary is AG',
             'subsidiary is GmbH',
             'mentions an affilated company',
             'mentions a plant/office/branch', 
             'is a subsidiary of a German firm',
             'other types of agreement']
moodys34[bool_cols] = moodys34[bool_cols].apply(lambda x: x == "TRUE")
moodys34['affiliated German firm name'] = moodys34['affiliated German firm name'].replace('NA', '')

In [88]:
moodys34_parent_list = moodys34.groupby("Master US firm name").agg(
    {
        "affiliated German firm name": list,
        "Master German firm name": list,
        **{col: "any" for col in bool_cols}
    }
).reset_index()

moodys34_parent_list = moodys34_parent_list[['Master US firm name']]

In [89]:
tfr500_moodys32_moodys34 = tfr500_moodys32.merge(moodys34_parent_list, on='Master US firm name', how='outer', indicator=True,
                                                 suffixes=['_moodys32', '_moodys34'])

tfr500_moodys32_moodys34['TFR-500'] = tfr500_moodys32_moodys34['TFR-500'].fillna(False)
tfr500_moodys32_moodys34['Moodys 32'] = tfr500_moodys32_moodys34['Moodys 32'].fillna(False)
tfr500_moodys32_moodys34['Moodys 34'] = tfr500_moodys32_moodys34['_merge'].isin(['right_only', 'both'])

tfr500_moodys32_moodys34 = tfr500_moodys32_moodys34.drop(columns='_merge')

first_cols = ['Master US firm name', 'TFR-500', 'Moodys 32', 'Moodys 34']
other_cols = [col for col in tfr500_moodys32_moodys34.columns if col not in first_cols]

tfr500_moodys32_moodys34 = tfr500_moodys32_moodys34[first_cols + other_cols]

tfr500_moodys32_moodys34

  tfr500_moodys32_moodys34['TFR-500'] = tfr500_moodys32_moodys34['TFR-500'].fillna(False)
  tfr500_moodys32_moodys34['Moodys 32'] = tfr500_moodys32_moodys34['Moodys 32'].fillna(False)


Unnamed: 0,Master US firm name,TFR-500,Moodys 32,Moodys 34
0,Addressograph-Multigraph Corporation,True,True,True
1,Alta United Mines Co.,False,False,True
2,Aluminum Company of America,False,False,True
3,American -Austrian Magnesite Corporation,True,False,False
4,American Bemberg Corp.,False,True,True
...,...,...,...,...
229,Westhold Corporation,True,False,False
230,"William R. Warner & Co., Inc.",True,False,False
231,Witroth Corporation,True,False,False
232,Wm. Wrigley Jr. Company,False,False,True


In [90]:
tfr500_moodys32_moodys34.to_csv('output/tfr500_moodys32_moodys34.csv')

### Tenenbaum

In [91]:
# Open the sheet
worksheet = client.open("Tenenbaum").worksheet("Sheet1 expanded")

# Get all records as list of dicts
records = worksheet.get_all_records()

# Convert to DataFrame
tenenbaum = pd.DataFrame(records)

bool_cols = ['mentions a subsidiary', 
             'mentions stock ownership',
             'subsidiary is AG',
             'subsidiary is GmbH',
             'mentions an affilated company',
             'mentions a plant/office/branch', 
             'is a subsidiary of a German firm',
             'other types of agreement']
tenenbaum[bool_cols] = tenenbaum[bool_cols].apply(lambda x: x == "TRUE")

In [92]:
tenenbaum_parent_list = tenenbaum.groupby("Master US firm name").agg(
    {
        "affiliated German firm name": list,
        "Master German firm name": list,
        **{col: "any" for col in bool_cols}
    }
).reset_index()

tenenbaum_parent_list = tenenbaum_parent_list[['Master US firm name']]

In [93]:
tfr500_moodys32_moodys34_tenenbaum = tfr500_moodys32_moodys34.merge(tenenbaum_parent_list, on='Master US firm name', how='outer', indicator=True)

tfr500_moodys32_moodys34_tenenbaum['TFR-500'] = tfr500_moodys32_moodys34_tenenbaum['TFR-500'].fillna(False)
tfr500_moodys32_moodys34_tenenbaum['Moodys 32'] = tfr500_moodys32_moodys34_tenenbaum['Moodys 32'].fillna(False)
tfr500_moodys32_moodys34_tenenbaum['Moodys 34'] = tfr500_moodys32_moodys34_tenenbaum['Moodys 34'].fillna(False)
tfr500_moodys32_moodys34_tenenbaum['Tenenbaum'] = tfr500_moodys32_moodys34_tenenbaum['_merge'].isin(['right_only', 'both'])

tfr500_moodys32_moodys34_tenenbaum = tfr500_moodys32_moodys34_tenenbaum.drop(columns='_merge')

first_cols = ['Master US firm name', 'TFR-500', 'Moodys 32', 'Moodys 34', 'Tenenbaum']
other_cols = [col for col in tfr500_moodys32_moodys34_tenenbaum.columns if col not in first_cols]

tfr500_moodys32_moodys34_tenenbaum = tfr500_moodys32_moodys34_tenenbaum[first_cols + other_cols]

tfr500_moodys32_moodys34_tenenbaum

  tfr500_moodys32_moodys34_tenenbaum['TFR-500'] = tfr500_moodys32_moodys34_tenenbaum['TFR-500'].fillna(False)
  tfr500_moodys32_moodys34_tenenbaum['Moodys 32'] = tfr500_moodys32_moodys34_tenenbaum['Moodys 32'].fillna(False)
  tfr500_moodys32_moodys34_tenenbaum['Moodys 34'] = tfr500_moodys32_moodys34_tenenbaum['Moodys 34'].fillna(False)


Unnamed: 0,Master US firm name,TFR-500,Moodys 32,Moodys 34,Tenenbaum
0,Addressograph-Multigraph Corporation,True,True,True,True
1,Alta United Mines Co.,False,False,True,False
2,Aluminum Company of America,False,False,True,False
3,Amber Mines Inc.,False,False,False,True
4,American -Austrian Magnesite Corporation,True,False,False,False
...,...,...,...,...,...
261,"William R. Warner & Co., Inc.",True,False,False,False
262,William Rhodes Davis,False,False,False,True
263,Witroth Corporation,True,False,False,False
264,Wm. Wrigley Jr. Company,False,False,True,True


### HB 1932

In [94]:
# Open the sheet
worksheet = client.open("Handbuch 1932").worksheet("validated")

# Get all records as list of dicts
records = worksheet.get_all_records()

# Convert to DataFrame
hb1932 = pd.DataFrame(records)

In [95]:
hb1932_w_us_firm = hb1932[hb1932['US parent']!='']
hb1932_us_firm = hb1932_w_us_firm.groupby('Master US firm name')['corrected firm name'].agg(list).reset_index()

hb1932_us_firm = hb1932_us_firm[['Master US firm name']]

In [96]:
tfr500_moodys32_moodys34_tenenbaum_hb32 = tfr500_moodys32_moodys34_tenenbaum.merge(hb1932_us_firm.rename(columns={'corrected firm name': 'HB 1932 affiliated German firm name'}), 
                                                                                   on='Master US firm name', 
                                                                                   how='outer',
                                                                                   indicator=True)
tfr500_moodys32_moodys34_tenenbaum_hb32['TFR-500'] = tfr500_moodys32_moodys34_tenenbaum_hb32['TFR-500'].fillna(False)
tfr500_moodys32_moodys34_tenenbaum_hb32['Moodys 32'] = tfr500_moodys32_moodys34_tenenbaum_hb32['Moodys 32'].fillna(False)
tfr500_moodys32_moodys34_tenenbaum_hb32['Moodys 34'] = tfr500_moodys32_moodys34_tenenbaum_hb32['Moodys 34'].fillna(False)
tfr500_moodys32_moodys34_tenenbaum_hb32['Tenenbaum'] = tfr500_moodys32_moodys34_tenenbaum_hb32['Tenenbaum'].fillna(False)
tfr500_moodys32_moodys34_tenenbaum_hb32['HB 32'] = tfr500_moodys32_moodys34_tenenbaum_hb32['_merge'].isin(['right_only', 'both'])

tfr500_moodys32_moodys34_tenenbaum_hb32 = tfr500_moodys32_moodys34_tenenbaum_hb32.drop(columns='_merge')

first_cols = ['Master US firm name', 'TFR-500', 'Moodys 32', 'Moodys 34', 'Tenenbaum', 'HB 32']
other_cols = [col for col in tfr500_moodys32_moodys34_tenenbaum_hb32.columns if col not in first_cols]

tfr500_moodys32_moodys34_tenenbaum_hb32 = tfr500_moodys32_moodys34_tenenbaum_hb32[first_cols + other_cols]

tfr500_moodys32_moodys34_tenenbaum_hb32

  tfr500_moodys32_moodys34_tenenbaum_hb32['TFR-500'] = tfr500_moodys32_moodys34_tenenbaum_hb32['TFR-500'].fillna(False)
  tfr500_moodys32_moodys34_tenenbaum_hb32['Moodys 32'] = tfr500_moodys32_moodys34_tenenbaum_hb32['Moodys 32'].fillna(False)
  tfr500_moodys32_moodys34_tenenbaum_hb32['Moodys 34'] = tfr500_moodys32_moodys34_tenenbaum_hb32['Moodys 34'].fillna(False)
  tfr500_moodys32_moodys34_tenenbaum_hb32['Tenenbaum'] = tfr500_moodys32_moodys34_tenenbaum_hb32['Tenenbaum'].fillna(False)


Unnamed: 0,Master US firm name,TFR-500,Moodys 32,Moodys 34,Tenenbaum,HB 32
0,Addressograph-Multigraph Corporation,True,True,True,True,False
1,Alta United Mines Co.,False,False,True,False,False
2,Aluminum Company of America,False,False,True,False,False
3,Amber Mines Inc.,False,False,False,True,False
4,American -Austrian Magnesite Corporation,True,False,False,False,False
...,...,...,...,...,...,...
272,"William R. Warner & Co., Inc.",True,False,False,False,False
273,William Rhodes Davis,False,False,False,True,False
274,Witroth Corporation,True,False,False,False,False
275,Wm. Wrigley Jr. Company,False,False,True,True,True


### HB 1934

In [97]:
# Open the sheet
worksheet = client.open("Handbuch 1934").worksheet("validated")

# Get all records as list of dicts
records = worksheet.get_all_records()

# Convert to DataFrame
hb1934 = pd.DataFrame(records)

In [98]:
hb1934_w_us_firm = hb1934[hb1934['US parent']!='']
hb1934_us_firm = hb1934_w_us_firm.groupby('Master US firm name')['corrected firm name'].agg(list).reset_index()

hb1934_us_firm = hb1934_us_firm[['Master US firm name']]

In [99]:
tfr500_moodys32_moodys34_tenenbaum_hb32_hb34 = tfr500_moodys32_moodys34_tenenbaum_hb32.merge(hb1934_us_firm.rename(columns={'corrected firm name': 'HB 1934 affiliated German firm name'}), 
                                                                                   on='Master US firm name', 
                                                                                   how='outer',
                                                                                   indicator=True)
tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['TFR-500'] = tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['TFR-500'].fillna(False)
tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['Moodys 32'] = tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['Moodys 32'].fillna(False)
tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['Moodys 34'] = tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['Moodys 34'].fillna(False)
tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['Tenenbaum'] = tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['Tenenbaum'].fillna(False)
tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['HB 32'] = tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['HB 32'].fillna(False)
tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['HB 34'] = tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['_merge'].isin(['right_only', 'both'])


tfr500_moodys32_moodys34_tenenbaum_hb32_hb34 = tfr500_moodys32_moodys34_tenenbaum_hb32_hb34.drop(columns='_merge')

first_cols = ['Master US firm name', 'TFR-500', 'Moodys 32', 'Moodys 34', 'Tenenbaum', 'HB 32', 'HB 34']
other_cols = [col for col in tfr500_moodys32_moodys34_tenenbaum_hb32_hb34.columns if col not in first_cols]

tfr500_moodys32_moodys34_tenenbaum_hb32_hb34 = tfr500_moodys32_moodys34_tenenbaum_hb32_hb34[first_cols + other_cols]

tfr500_moodys32_moodys34_tenenbaum_hb32_hb34

  tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['TFR-500'] = tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['TFR-500'].fillna(False)
  tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['Moodys 32'] = tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['Moodys 32'].fillna(False)
  tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['Moodys 34'] = tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['Moodys 34'].fillna(False)
  tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['Tenenbaum'] = tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['Tenenbaum'].fillna(False)
  tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['HB 32'] = tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['HB 32'].fillna(False)


Unnamed: 0,Master US firm name,TFR-500,Moodys 32,Moodys 34,Tenenbaum,HB 32,HB 34
0,Addressograph-Multigraph Corporation,True,True,True,True,False,False
1,Alta United Mines Co.,False,False,True,False,False,False
2,Aluminum Company of America,False,False,True,False,False,False
3,Amber Mines Inc.,False,False,False,True,False,False
4,American -Austrian Magnesite Corporation,True,False,False,False,False,False
...,...,...,...,...,...,...,...
274,"William R. Warner & Co., Inc.",True,False,False,False,False,False
275,William Rhodes Davis,False,False,False,True,False,False
276,Witroth Corporation,True,False,False,False,False,False
277,Wm. Wrigley Jr. Company,False,False,True,True,True,True


### HB GmbH 32

In [100]:
hb_gmbh = pd.read_excel('../../Moodys_name_to_handbuch/output/manual/gmbh_validated.xlsx', sheet_name='validated')
hb_gmbh = hb_gmbh[hb_gmbh['validated'].isin(['USC', 'USO', 'USOP'])]

hb_gmbh["is_duplicate"] = hb_gmbh.duplicated(subset=['Master German firm name', 'Master US firm name'])
# dropping duplicates
hb_gmbh = hb_gmbh[hb_gmbh["is_duplicate"] == False]

hb_gmbh = hb_gmbh[['german_name', 'Master German firm name', 'US_name', 'Master US firm name', 'validated']]
hb_gmbh

Unnamed: 0,german_name,Master German firm name,US_name,Master US firm name,validated
1,American News Company,American News Company,"American News Company, Inc.","American News Company, Inc.",USC
2,Columbia Film,Columbia Film,Columbia Pictures Corporation,Columbia Pictures Corp.,USC
3,O-Cedar,O-Cedar,O-Cedar Corporation,O-Cedar Corporation,USC
4,Fairbanks,The Fairbanks G.m.b.H. Company,Fairbanks Company,The Fairbanks Company,USO
5,Gillette Safety Razor Co.,Gillette Safety Razor Co.,Gillette Safety Razor Co.,Gillette Safety Rasor Company,USO
6,Addressograph,Addressograph-Multigraph G.m.b.H.,Addressograph Company,Addressograph-Multigraph Corporation,USO
7,Aeolian Company,Aeolian Company,Aeolian Company,Aeolian Company,USO
8,American Oil Company,American Oil Company,American Oil Company,American Oil Company,USO
9,Bakelite,Bakelite Gesellschaft,Bakelite Corporation,Bakelite Corp.,USO
10,Bausch & Lomb Optical Co.,Bausch & Lomb Optical Co.,Bausch & Lomb Optical Company,Bausch & Lomb Optical Co.,USO


In [101]:
hb_gmbh_parent = hb_gmbh.groupby('Master US firm name')['Master German firm name'].agg(list).reset_index()
hb_gmbh_parent = hb_gmbh_parent[['Master US firm name']]

In [102]:
parent_large_table = tfr500_moodys32_moodys34_tenenbaum_hb32_hb34.merge(hb_gmbh_parent,
                                                                         on='Master US firm name', 
                                                                         how='outer',
                                                                         indicator=True)
parent_large_table['TFR-500'] = parent_large_table['TFR-500'].fillna(False)
parent_large_table['Moodys 32'] = parent_large_table['Moodys 32'].fillna(False)
parent_large_table['Moodys 34'] = parent_large_table['Moodys 34'].fillna(False)
parent_large_table['Tenenbaum'] = parent_large_table['Tenenbaum'].fillna(False)
parent_large_table['HB 32'] = parent_large_table['HB 32'].fillna(False)
parent_large_table['HB 34'] = parent_large_table['HB 34'].fillna(False)
parent_large_table['HB GmbH'] = parent_large_table['_merge'].isin(['right_only', 'both'])


parent_large_table = parent_large_table.drop(columns='_merge')

first_cols = ['Master US firm name', 'TFR-500', 'Moodys 32', 'Moodys 34', 'Tenenbaum', 'HB 32', 'HB 34', 'HB GmbH']
other_cols = [col for col in parent_large_table.columns if col not in first_cols]

parent_large_table = parent_large_table[first_cols + other_cols]

parent_large_table

  parent_large_table['TFR-500'] = parent_large_table['TFR-500'].fillna(False)
  parent_large_table['Moodys 32'] = parent_large_table['Moodys 32'].fillna(False)
  parent_large_table['Moodys 34'] = parent_large_table['Moodys 34'].fillna(False)
  parent_large_table['Tenenbaum'] = parent_large_table['Tenenbaum'].fillna(False)
  parent_large_table['HB 32'] = parent_large_table['HB 32'].fillna(False)
  parent_large_table['HB 34'] = parent_large_table['HB 34'].fillna(False)


Unnamed: 0,Master US firm name,TFR-500,Moodys 32,Moodys 34,Tenenbaum,HB 32,HB 34,HB GmbH
0,Addressograph-Multigraph Corporation,True,True,True,True,False,False,True
1,Aeolian Company,False,False,False,False,False,False,True
2,Alta United Mines Co.,False,False,True,False,False,False,False
3,Aluminum Company of America,False,False,True,False,False,False,False
4,Amber Mines Inc.,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...
286,"William R. Warner & Co., Inc.",True,False,False,False,False,False,False
287,William Rhodes Davis,False,False,False,True,False,False,False
288,Witroth Corporation,True,False,False,False,False,False,False
289,Wm. Wrigley Jr. Company,False,False,True,True,True,True,False


In [103]:
parent_large_table_to_csv = parent_large_table[['Master US firm name', 'TFR-500', 'Tenenbaum', 'Moodys 32', 'Moodys 34', 'HB 32', 'HB 34', 'HB GmbH']]
parent_large_table_to_csv = parent_large_table_to_csv.rename(columns={'Master US firm name': 'US Company'})
parent_large_table_to_csv.to_csv('output/parent_union.csv', index=False)

## Analysis

In [200]:
kw = ''
tfr500_moodys32_moodys34_tenenbaum_hb32_hb34[tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['Master US firm name'].str.contains(kw, case=False)]

Unnamed: 0,Master US firm name,in TFR-500,in Moodys 32,in Moodys 34,in Tenenbaum,in HB 32,in HB 34,affiliated German firms,type_Partnership,type_branch,...,mentions a subsidiary,mentions stock ownership,subsidiary is AG,subsidiary is GmbH,mentions an affilated company,mentions a plant/office/branch,is a subsidiary of a German firm,other types of agreement,HB 1932 affiliated German firm name,HB 1934 affiliated German firm name
0,Addressograph-Multigraph Corporation,True,True,True,True,False,False,[Addressograph-Multigraph G.m.b.H.],False,False,...,True,False,False,True,False,False,False,False,,
1,Alta United Mines Co.,False,False,True,False,False,False,,,,...,,,,,,,,,,
2,Aluminum Company of America,False,False,True,False,False,False,,,,...,,,,,,,,,,
3,Amber Mines Inc.,False,False,False,True,False,False,,,,...,False,False,False,False,False,False,True,False,,
4,American -Austrian Magnesite Corporation,True,False,False,False,False,False,"[Deutsche Heraklith A.G., Deutsche Heraklith A...",False,False,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274,"William R. Warner & Co., Inc.",True,False,False,False,False,False,"[Godecke & Co. Chemische Fabrik A.G., Godecke ...",False,False,...,,,,,,,,,,
275,William Rhodes Davis,False,False,False,True,False,False,,,,...,True,False,True,False,False,False,False,False,,
276,Witroth Corporation,True,False,False,False,False,False,"[Oppelener Lagerhaus Gesellschaft, Ostrhederei]",False,False,...,,,,,,,,,,
277,Wm. Wrigley Jr. Company,False,False,True,True,True,True,,,,...,False,False,True,False,False,False,False,False,[Wrigley Aktiengesellschaft],[Wrigley Aktiengesellschaft.]


In [201]:
tfr500_moodys32_moodys34_tenenbaum_hb32_hb34[(tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['in Moodys 32']) |
                                        (tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['in Moodys 34']) | 
                                        (tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['in HB 32']) |
                                        (tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['in HB 34'])]

Unnamed: 0,Master US firm name,in TFR-500,in Moodys 32,in Moodys 34,in Tenenbaum,in HB 32,in HB 34,affiliated German firms,type_Partnership,type_branch,...,mentions a subsidiary,mentions stock ownership,subsidiary is AG,subsidiary is GmbH,mentions an affilated company,mentions a plant/office/branch,is a subsidiary of a German firm,other types of agreement,HB 1932 affiliated German firm name,HB 1934 affiliated German firm name
0,Addressograph-Multigraph Corporation,True,True,True,True,False,False,[Addressograph-Multigraph G.m.b.H.],False,False,...,True,False,False,True,False,False,False,False,,
1,Alta United Mines Co.,False,False,True,False,False,False,,,,...,,,,,,,,,,
2,Aluminum Company of America,False,False,True,False,False,False,,,,...,,,,,,,,,,
6,American Bemberg Corp.,False,True,True,False,False,False,,,,...,,,,,,,,,,
7,American Can Company,False,True,True,False,False,False,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264,Valvoline Oil Co.,False,False,True,False,False,False,,,,...,,,,,,,,,,
265,"W. A. Harriman & Co., Inc., New York.",False,False,False,False,True,True,,,,...,,,,,,,,,[Deutsch-Atlantische Telegraphengesellschaft],"[Vereinigte Königs- und Laurahütte, Actien-Ges..."
268,Warner Bros,False,False,False,False,True,False,,,,...,,,,,,,,,[Tobis Tonbild-Syndikat Aktiengesellschaft],
277,Wm. Wrigley Jr. Company,False,False,True,True,True,True,,,,...,False,False,True,False,False,False,False,False,[Wrigley Aktiengesellschaft],[Wrigley Aktiengesellschaft.]


In [202]:
tfr500_moodys32_moodys34_tenenbaum_hb32_hb34[((tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['in Moodys 32']) |
                                        (tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['in Moodys 34']) | 
                                        (tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['in HB 32']) |
                                        (tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['in HB 34'])) & 
                                        (tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['in TFR-500'])]

Unnamed: 0,Master US firm name,in TFR-500,in Moodys 32,in Moodys 34,in Tenenbaum,in HB 32,in HB 34,affiliated German firms,type_Partnership,type_branch,...,mentions a subsidiary,mentions stock ownership,subsidiary is AG,subsidiary is GmbH,mentions an affilated company,mentions a plant/office/branch,is a subsidiary of a German firm,other types of agreement,HB 1932 affiliated German firm name,HB 1934 affiliated German firm name
0,Addressograph-Multigraph Corporation,True,True,True,True,False,False,[Addressograph-Multigraph G.m.b.H.],False,False,...,True,False,False,True,False,False,False,False,,
14,American Radiator & Standard Sanitary Corporation,True,True,True,True,False,False,[Nationale Radiator Gesellschaft m.b.H.],False,False,...,True,False,False,True,False,False,False,False,,
25,Bavarian oil and Gas Corporation,True,False,False,False,True,True,[Bayerische Mineral-Industrie A.G.],False,False,...,,,,,,,,,[Bayerische Mineral-Industrie Aktiengesellschaft],[Bayerische Mineral-Industrie Aktiengesellscha...
31,Burroughs Adding Machine Company,True,True,True,True,False,False,[Deutsche Burroughs Rechenmaschinen A.G.],False,False,...,True,False,True,False,False,False,False,False,,
41,Chicago Pneumatic Tool Co.,True,True,True,True,False,False,[Internationale Pressluft und Elektricitats Ge...,False,False,...,True,False,True,True,False,False,False,True,,
42,Colgate-Palmolive-Peet Co.,True,True,True,True,False,False,[Palmolive-Binder & Ketals G.m.b.H.],False,False,...,True,False,False,True,False,False,False,False,,
44,Combustion Engineering Co. Inc.,True,True,False,False,False,False,"[Kohlenscheidungs-Gesellschaft, m.b.H.]",False,False,...,,,,,,,,,,
50,Corn Products Refining company,True,False,False,True,False,True,"[Aktien-Malsfabrik Landsburg, A. G., Convertga...",False,False,...,True,False,True,True,False,False,False,False,,[W. A. Schölten Stärke- und Syrup-Fabriken Akt...
60,E.I. du Pont de Nemours and Company,True,True,True,True,True,True,[Duco Aktiengesellschaft],False,False,...,True,True,True,False,False,False,False,True,[Dynamit-Act.-Ges. vormals Alfred Nobel & Co.],[Dynamit-Actien-Gesellschaft vormals Alfred No...
61,Eastman Kodak Company,True,True,True,True,True,False,[Kodak Aktiengesellschaft],False,False,...,True,False,True,False,False,False,False,False,[Kodak Aktiengesellschaft],


In [203]:
tfr500_moodys32_moodys34_tenenbaum_hb32_hb34[((~tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['in Moodys 32']) &
                                        (~tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['in Moodys 34']) & 
                                        (~tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['in HB 32']) &
                                        (~tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['in HB 34'])) & 
                                        (tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['in TFR-500'])]

Unnamed: 0,Master US firm name,in TFR-500,in Moodys 32,in Moodys 34,in Tenenbaum,in HB 32,in HB 34,affiliated German firms,type_Partnership,type_branch,...,mentions a subsidiary,mentions stock ownership,subsidiary is AG,subsidiary is GmbH,mentions an affilated company,mentions a plant/office/branch,is a subsidiary of a German firm,other types of agreement,HB 1932 affiliated German firm name,HB 1934 affiliated German firm name
4,American -Austrian Magnesite Corporation,True,False,False,False,False,False,"[Deutsche Heraklith A.G., Deutsche Heraklith A...",False,False,...,,,,,,,,,,
8,American Cyanamid Company,True,False,False,False,False,False,[Farsals-Gesellschaft m.b.H.],False,False,...,,,,,,,,,,
9,American Foreign Insurance Association,True,False,False,False,False,False,[Great American Insurance Co. - Home Insurance...,False,True,...,,,,,,,,,,
13,American Nickel Alloy Mfg. Corp.,True,False,False,False,False,False,[Cosmo Montangesellschaft fuer metallurgische ...,False,False,...,,,,,,,,,,
16,American-Austrian Magnesite Corporation,True,False,False,False,False,False,[Alpenminen A. G.],False,False,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
270,Waterman Steamship,True,False,False,False,False,False,[Waterman Linien (Vertretung Bremen) G.m.b.H.],False,False,...,,,,,,,,,,
271,Western Electric Export Corporation,True,False,False,False,False,False,[Western Electric G.m.b.H.],False,False,...,,,,,,,,,,
272,Westhold Corporation,True,False,False,False,False,False,[Ota Schlesische Schurwerke Ottmuth A.G.],False,False,...,,,,,,,,,,
274,"William R. Warner & Co., Inc.",True,False,False,False,False,False,"[Godecke & Co. Chemische Fabrik A.G., Godecke ...",False,False,...,,,,,,,,,,


In [204]:
tfr500_moodys32_moodys34_tenenbaum_hb32_hb34[((tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['in Moodys 32']) |
                                        (tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['in Moodys 34']) | 
                                        (tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['in HB 32']) |
                                        (tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['in HB 34'])) & 
                                        (tfr500_moodys32_moodys34_tenenbaum_hb32_hb34['in Tenenbaum'])]

Unnamed: 0,Master US firm name,in TFR-500,in Moodys 32,in Moodys 34,in Tenenbaum,in HB 32,in HB 34,affiliated German firms,type_Partnership,type_branch,...,mentions a subsidiary,mentions stock ownership,subsidiary is AG,subsidiary is GmbH,mentions an affilated company,mentions a plant/office/branch,is a subsidiary of a German firm,other types of agreement,HB 1932 affiliated German firm name,HB 1934 affiliated German firm name
0,Addressograph-Multigraph Corporation,True,True,True,True,False,False,[Addressograph-Multigraph G.m.b.H.],False,False,...,True,False,False,True,False,False,False,False,,
10,American I. G. Chemical Corp.,False,False,True,True,False,False,,,,...,False,False,False,False,False,False,True,False,,
14,American Radiator & Standard Sanitary Corporation,True,True,True,True,False,False,[Nationale Radiator Gesellschaft m.b.H.],False,False,...,True,False,False,True,False,False,False,False,,
15,American Ship & Commerce Corp.,False,False,True,True,False,False,,,,...,False,True,False,False,False,False,False,False,,
31,Burroughs Adding Machine Company,True,True,True,True,False,False,[Deutsche Burroughs Rechenmaschinen A.G.],False,False,...,True,False,True,False,False,False,False,False,,
41,Chicago Pneumatic Tool Co.,True,True,True,True,False,False,[Internationale Pressluft und Elektricitats Ge...,False,False,...,True,False,True,True,False,False,False,True,,
42,Colgate-Palmolive-Peet Co.,True,True,True,True,False,False,[Palmolive-Binder & Ketals G.m.b.H.],False,False,...,True,False,False,True,False,False,False,False,,
50,Corn Products Refining company,True,False,False,True,False,True,"[Aktien-Malsfabrik Landsburg, A. G., Convertga...",False,False,...,True,False,True,True,False,False,False,False,,[W. A. Schölten Stärke- und Syrup-Fabriken Akt...
60,E.I. du Pont de Nemours and Company,True,True,True,True,True,True,[Duco Aktiengesellschaft],False,False,...,True,True,True,False,False,False,False,True,[Dynamit-Act.-Ges. vormals Alfred Nobel & Co.],[Dynamit-Actien-Gesellschaft vormals Alfred No...
61,Eastman Kodak Company,True,True,True,True,True,False,[Kodak Aktiengesellschaft],False,False,...,True,False,True,False,False,False,False,False,[Kodak Aktiengesellschaft],


In [205]:
firms_unique = tfr500_moodys32_moodys34_tenenbaum_hb32_hb34.groupby('Master US firm name').first()

firms_unique[['in TFR-500', 'in Moodys 32', 'in Moodys 34', 'in Tenenbaum', 'in HB 32', 'in HB 34']].sum()

in TFR-500      171
in Moodys 32     79
in Moodys 34    100
in Tenenbaum     89
in HB 32         34
in HB 34         25
dtype: int64

### Moody's

In [206]:
bool_cols = ['mentions a subsidiary', 
             'mentions stock ownership',
             'mentions an affilated company',
             'mentions a plant/office/branch', 
             'is a subsidiary of a German firm',
             'other types of agreement']

In [207]:
moodys32_counts = moodys32_parent_list[bool_cols].sum().to_frame(name="Moody's 32")
moodys34_counts = moodys34_parent_list[bool_cols].sum().to_frame(name="Moody's 34")

In [208]:
moodys_merged = moodys32_parent_list.merge(moodys34_parent_list, on='Master US firm name', how='outer', suffixes=['_moodys32', '_moodys34'])

for col in bool_cols:
    moodys_merged[col] = (
        moodys_merged[f"{col}_moodys32"]
        .fillna(False) | moodys_merged[f"{col}_moodys34"].fillna(False)
    )
    # # If both are NaN, result should stay NaN
    # both_na = moodys_merged[f"{col}_moodys32"].isna() & moodys_merged[f"{col}_moodys34"].isna()
    # moodys_merged.loc[both_na, col] = pd.NA

# Optional: drop the suffixed columns
moodys_merged.drop(columns=[f"{col}_moodys32" for col in bool_cols] + [f"{col}_moodys34" for col in bool_cols], inplace=True)

  .fillna(False) | moodys_merged[f"{col}_moodys34"].fillna(False)
  .fillna(False) | moodys_merged[f"{col}_moodys34"].fillna(False)
  .fillna(False) | moodys_merged[f"{col}_moodys34"].fillna(False)
  .fillna(False) | moodys_merged[f"{col}_moodys34"].fillna(False)
  .fillna(False) | moodys_merged[f"{col}_moodys34"].fillna(False)
  .fillna(False) | moodys_merged[f"{col}_moodys34"].fillna(False)


In [209]:
moodys_merged_counts = moodys_merged[bool_cols].sum().to_frame(name="merged")

In [210]:
moodys_counts = pd.concat([moodys32_counts, moodys34_counts, moodys_merged_counts], axis=1)
moodys_counts.loc['Total'] = [
    moodys32_parent_list.shape[0],
    moodys34_parent_list.shape[0],
    moodys_merged.shape[0]
]
moodys_counts.to_latex('output/table/moodys_count.tex')

### print table

In [211]:
print('in TFR: ' + str(parent_large_table[parent_large_table['in TFR-500']].shape[0]))
print('in Tenenbaum: ' + str(parent_large_table[parent_large_table['in Tenenbaum']].shape[0]))
print('in TFR or Tenenbaum: ' + str(parent_large_table[(parent_large_table['in TFR-500']) | (parent_large_table['in Tenenbaum'])].shape[0]))

in TFR: 171
in Tenenbaum: 89
in TFR or Tenenbaum: 213


In [212]:
print('in HB 32: ' + str(parent_large_table[(parent_large_table['in HB 32'])].shape[0]))
print('in HB 32 and TFR: ' + str(parent_large_table[(parent_large_table['in HB 32']) & (parent_large_table['in TFR-500'])].shape[0]))
print('in HB 32 and Tenenbaum: ' + str(parent_large_table[(parent_large_table['in HB 32']) & (parent_large_table['in Tenenbaum'])].shape[0]))
print('in HB 32 and in TFR or Tenenbaum: ' + str(parent_large_table[(parent_large_table['in HB 32']) & ((parent_large_table['in TFR-500']) | (parent_large_table['in Tenenbaum']))].shape[0]))
print('-----------------')
print('in HB 34: ' + str(parent_large_table[(parent_large_table['in HB 34'])].shape[0]))
print('in HB 34 and TFR: ' + str(parent_large_table[(parent_large_table['in HB 34']) & (parent_large_table['in TFR-500'])].shape[0]))
print('in HB 34 and Tenenbaum: ' + str(parent_large_table[(parent_large_table['in HB 34']) & (parent_large_table['in Tenenbaum'])].shape[0]))
print('in HB 34 and in TFR or Tenenbaum: ' + str(parent_large_table[(parent_large_table['in HB 34']) & ((parent_large_table['in TFR-500']) | (parent_large_table['in Tenenbaum']))].shape[0]))
print('-----------------')
print('in HB 34 or 32: ' + str(parent_large_table[((parent_large_table['in HB 32']) | (parent_large_table['in HB 34']))].shape[0]))
print('in HB 34 or 32 and TFR: ' + str(parent_large_table[((parent_large_table['in HB 32']) | (parent_large_table['in HB 34'])) & (parent_large_table['in TFR-500'])].shape[0]))
print('in HB 34 or 32 and Tenenbaum: ' + str(parent_large_table[((parent_large_table['in HB 32']) | (parent_large_table['in HB 34'])) & (parent_large_table['in Tenenbaum'])].shape[0]))
print('in HB 34 or 32 and in TFR or Tenenbaum: ' + str(parent_large_table[((parent_large_table['in HB 32']) | (parent_large_table['in HB 34'])) & ((parent_large_table['in TFR-500']) | (parent_large_table['in Tenenbaum']))].shape[0]))

in HB 32: 34
in HB 32 and TFR: 20
in HB 32 and Tenenbaum: 15
in HB 32 and in TFR or Tenenbaum: 22
-----------------
in HB 34: 25
in HB 34 and TFR: 14
in HB 34 and Tenenbaum: 11
in HB 34 and in TFR or Tenenbaum: 16
-----------------
in HB 34 or 32: 38
in HB 34 or 32 and TFR: 22
in HB 34 or 32 and Tenenbaum: 16
in HB 34 or 32 and in TFR or Tenenbaum: 24


In [213]:
print('in HB GmbH: ' + str(parent_large_table[(parent_large_table['in HB GmbH'])].shape[0]))
print('in HB GmbH and TFR: ' + str(parent_large_table[(parent_large_table['in HB GmbH']) & (parent_large_table['in TFR-500'])].shape[0]))
print('in HB GmbH and Tenenbaum: ' + str(parent_large_table[(parent_large_table['in HB GmbH']) & (parent_large_table['in Tenenbaum'])].shape[0]))
print('in HB GmbH and in TFR or Tenenbaum: ' + str(parent_large_table[(parent_large_table['in HB GmbH']) & ((parent_large_table['in TFR-500']) | (parent_large_table['in Tenenbaum']))].shape[0]))

in HB GmbH: 35
in HB GmbH and TFR: 15
in HB GmbH and Tenenbaum: 8
in HB GmbH and in TFR or Tenenbaum: 16


In [214]:
print('in Moodys 32: ' + str(parent_large_table[(parent_large_table['in Moodys 32'])].shape[0]))
print('in Moodys 32 and TFR: ' + str(parent_large_table[(parent_large_table['in Moodys 32']) & (parent_large_table['in TFR-500'])].shape[0]))
print('in Moodys 32 and Tenenbaum: ' + str(parent_large_table[(parent_large_table['in Moodys 32']) & (parent_large_table['in Tenenbaum'])].shape[0]))
print('in Moodys 32 and in TFR or Tenenbaum: ' + str(parent_large_table[(parent_large_table['in Moodys 32']) & ((parent_large_table['in TFR-500']) | (parent_large_table['in Tenenbaum']))].shape[0]))
print('-----------------')
print('in Moodys 34: ' + str(parent_large_table[(parent_large_table['in Moodys 34'])].shape[0]))
print('in Moodys 34 and TFR: ' + str(parent_large_table[(parent_large_table['in Moodys 34']) & (parent_large_table['in TFR-500'])].shape[0]))
print('in Moodys 34 and Tenenbaum: ' + str(parent_large_table[(parent_large_table['in Moodys 34']) & (parent_large_table['in Tenenbaum'])].shape[0]))
print('in Moodys 34 and in TFR or Tenenbaum: ' + str(parent_large_table[(parent_large_table['in Moodys 34']) & ((parent_large_table['in TFR-500']) | (parent_large_table['in Tenenbaum']))].shape[0]))
print('-----------------')
print('in Moodys 34 or 32: ' + str(parent_large_table[((parent_large_table['in Moodys 32']) | (parent_large_table['in Moodys 34']))].shape[0]))
print('in Moodys 34 or 32 and TFR: ' + str(parent_large_table[((parent_large_table['in Moodys 32']) | (parent_large_table['in Moodys 34'])) & (parent_large_table['in TFR-500'])].shape[0]))
print('in Moodys 34 or 32 and Tenenbaum: ' + str(parent_large_table[((parent_large_table['in Moodys 32']) | (parent_large_table['in Moodys 34'])) & (parent_large_table['in Tenenbaum'])].shape[0]))
print('in Moodys 34 or 32 and in TFR or Tenenbaum: ' + str(parent_large_table[((parent_large_table['in Moodys 32']) | (parent_large_table['in Moodys 34'])) & ((parent_large_table['in TFR-500']) | (parent_large_table['in Tenenbaum']))].shape[0]))

in Moodys 32: 79
in Moodys 32 and TFR: 44
in Moodys 32 and Tenenbaum: 32
in Moodys 32 and in TFR or Tenenbaum: 49
-----------------
in Moodys 34: 100
in Moodys 34 and TFR: 48
in Moodys 34 and Tenenbaum: 42
in Moodys 34 and in TFR or Tenenbaum: 58
-----------------
in Moodys 34 or 32: 116
in Moodys 34 or 32 and TFR: 53
in Moodys 34 or 32 and Tenenbaum: 43
in Moodys 34 or 32 and in TFR or Tenenbaum: 63


In [219]:
all_identified = parent_large_table[(parent_large_table['in HB 32']) | (parent_large_table['in HB 34']) | 
                                    (parent_large_table['in Moodys 32']) | (parent_large_table['in Moodys 34']) | 
                                    (parent_large_table['in HB GmbH'])]

print('all identified: ' + str(all_identified.shape[0]))
print('all identified and in TFR-500: ' + str(all_identified[all_identified['in TFR-500']].shape[0]))
print('all identified and in Tenenbaum: ' + str(all_identified[all_identified['in Tenenbaum']].shape[0]))
print('all identified and in Tenenbaum: ' + str(all_identified[(all_identified['in TFR-500']) | (all_identified['in Tenenbaum'])].shape[0]))

all identified: 157
all identified and in TFR-500: 68
all identified and in Tenenbaum: 50
all identified and in Tenenbaum: 79
