In [2]:
import pandas as pd
import gspread
from gspread_dataframe import set_with_dataframe
from oauth2client.service_account import ServiceAccountCredentials

# Auth
scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('../gspread_creds.json', scope)
client = gspread.authorize(creds)

### TFR-500

In [3]:
# Open the sheet
worksheet = client.open("tfr500_summarized").worksheet("classified")

# Get all records as list of dicts
records = worksheet.get_all_records()

# Convert to DataFrame
tfr500 = pd.DataFrame(records)

In [4]:
tfr500_gmbh = tfr500[tfr500['Type of Organization']=='GmbH']
tfr500_gmbh = tfr500_gmbh.groupby(['Name of Foreign Business', 'American Owner - Name']).first().reset_index()

In [5]:
tfr500_gmbh = tfr500_gmbh.rename(columns={
    'American Owner - Name': 'Master US firm name',
    'Name of Foreign Business': 'Master German firm name'
})

tfr500_gmbh = tfr500_gmbh[['Master US firm name', 'Master German firm name']]

In [6]:
tfr500_gmbh[tfr500_gmbh['Master German firm name'].str.contains('West')]

Unnamed: 0,Master US firm name,Master German firm name
79,"Hugo Stinnes Industries, Inc.",H. Westphal & Co.
201,"Socony-Vacuum Oil Company, Inc.",West-Oel Gesellschaft m.b.H.
202,Western Electric Export Corporation,Western Electric G.m.b.H.
203,The Westinghouse Air Brake Company,Westinghouse Bremsen Gesellschaft m.b.H.


### Moody's 1932

In [7]:
# Open the sheet
worksheet = client.open("Moodys 1932").worksheet("reviewed-expanded")

# Get all records as list of dicts
records = worksheet.get_all_records()

# Convert to DataFrame
moodys32 = pd.DataFrame(records)

In [8]:
moodys32_gmbh = moodys32[moodys32['subsidiary is GmbH']=='TRUE']
moodys32_gmbh = moodys32_gmbh[['Master US firm name', 'affiliated German firm name', 'Master German firm name']]

In [9]:
tfr500_moodys32 = tfr500_gmbh.merge(moodys32_gmbh, on='Master German firm name', how='outer', indicator=True, 
                                    suffixes=['', '_moodys32'])

tfr500_moodys32['in TFR-500'] = tfr500_moodys32['_merge'].isin(['left_only', 'both'])
tfr500_moodys32['in Moodys 32'] = tfr500_moodys32['_merge'].isin(['right_only', 'both'])

tfr500_moodys32['Master US firm name'] = tfr500_moodys32['Master US firm name'].fillna(tfr500_moodys32['Master US firm name_moodys32'])

tfr500_moodys32 = tfr500_moodys32.drop(columns=['_merge', 'Master US firm name_moodys32', 'affiliated German firm name'])

In [10]:
tfr500_moodys32

Unnamed: 0,Master US firm name,Master German firm name,in TFR-500,in Moodys 32
0,"Swiss ""Borvisk"" Company","""Borvisk"" Kunstseiden-Vertriebsgesellschaft, m...",True,False
1,Standard Oil Company (New Jersey),"""Gluckauf"" Deutsche Oel Gesellschaft Hersfeld ...",True,False
2,International Telephone and Telegraph Corp.,"""Volta"" Telephon & Signalbau G.m.b.H.",True,False
3,Edward G. Budd Manufacturing Company,"A. B. P. Gefolgschoftsheime, G.m.b.H.",True,False
4,The ARMCO International Corporation,ARMCO Eisen G.m.b.H.,True,False
...,...,...,...,...
223,Kupfer Bros. Co. Inc.,Wilheim Stern & Co. G.m.b.H.,True,False
224,The Dentists' Supply Co. of New York,Zahnfabrik Wienand Soehne & Co.,True,False
225,National Lead Company,Zinnwerke Wilheimsburg G.m.b.H.,True,False
226,Gillette Safety Rasor Company,Zoellner Werke G.m.b.H.,True,False


### Moody's 1934

In [11]:
# Open the sheet
worksheet = client.open("Moodys 1934").worksheet("reviewed-expanded")

# Get all records as list of dicts
records = worksheet.get_all_records()

# Convert to DataFrame
moodys34 = pd.DataFrame(records)

In [12]:
moodys34_gmbh = moodys34[moodys34['subsidiary is GmbH']=='TRUE']
moodys34_gmbh = moodys34_gmbh[['Master US firm name', 'affiliated German firm name', 'Master German firm name']]

moodys34_gmbh

Unnamed: 0,Master US firm name,affiliated German firm name,Master German firm name
8,Bakelite Corp.,Gorman Bakelite Corp. (Bakelite Gesellschaft),Bakelite Gesellschaft
15,The Atlantic Refining Company,Atlantic Refining Co. of Germany,The Atlantic Refining Company Germany GmbH
16,The Atlantic Refining Company,Allgemeine Oel Handels Ges. (Germany),"Allgemeine Oel-Handels, GmbH. (Oelhag)"
22,The Hobart Manufacturing Company,"Hobart Gesellschaft, Hamburg, Germany","Hobart Maschinen Gesellschaft, m.b.H."
24,"Hugo Stinnes Industries, Inc.",Hugo Stinnes G. m. b. H.,Hugo Stinnes G.m.b.H.
25,"Hugo Stinnes Industries, Inc.",Gewerkschaft Mathias Stinnes,Mathias Stinnes G.m.b.H.
26,American Radiator & Standard Sanitary Corporation,"Nationale Radiator Gesellschaft, m. b. H. (Ger...",Nationale Radiator Gesellschaft m.b.H.
27,American Radiator & Standard Sanitary Corporation,Deutsche Standard Werke G. m. b. H.,Deutsche Standard Werke G. m. b. H.
32,I. B. Kleinert Rubbert Company,I. B. Kleinert Rubber Co. G. m. b. H. (formerl...,"Kleinerts Hanseatische, Gummiwerke Gesellschaf..."
35,Norton Company,"Deutsche Norton Gesellschaft m.b.H., Wesseling...","Deutsche Norton-Gesellschaft, m.b.H."


In [13]:
tfr500_moodys32_moodys34 = tfr500_moodys32.merge(moodys34_gmbh, on='Master German firm name', how='outer', indicator=True,
                                                 suffixes=['', '_moodys34'])

tfr500_moodys32_moodys34['in TFR-500'] = tfr500_moodys32_moodys34['in TFR-500'].fillna(False)
tfr500_moodys32_moodys34['in Moodys 32'] = tfr500_moodys32_moodys34['in Moodys 32'].fillna(False)
tfr500_moodys32_moodys34['in Moodys 34'] = tfr500_moodys32_moodys34['_merge'].isin(['right_only', 'both'])

tfr500_moodys32_moodys34['Master US firm name'] = tfr500_moodys32_moodys34['Master US firm name'].fillna(tfr500_moodys32_moodys34['Master US firm name_moodys34'])

tfr500_moodys32_moodys34 = tfr500_moodys32_moodys34.drop(columns=['_merge', 'Master US firm name_moodys34', 'affiliated German firm name'])

  tfr500_moodys32_moodys34['in TFR-500'] = tfr500_moodys32_moodys34['in TFR-500'].fillna(False)
  tfr500_moodys32_moodys34['in Moodys 32'] = tfr500_moodys32_moodys34['in Moodys 32'].fillna(False)


In [14]:
tfr500_moodys32_moodys34

Unnamed: 0,Master US firm name,Master German firm name,in TFR-500,in Moodys 32,in Moodys 34
0,"Swiss ""Borvisk"" Company","""Borvisk"" Kunstseiden-Vertriebsgesellschaft, m...",True,False,False
1,Standard Oil Company (New Jersey),"""Gluckauf"" Deutsche Oel Gesellschaft Hersfeld ...",True,False,False
2,International Telephone and Telegraph Corp.,"""Volta"" Telephon & Signalbau G.m.b.H.",True,False,False
3,Edward G. Budd Manufacturing Company,"A. B. P. Gefolgschoftsheime, G.m.b.H.",True,False,False
4,The ARMCO International Corporation,ARMCO Eisen G.m.b.H.,True,False,False
...,...,...,...,...,...
236,Kupfer Bros. Co. Inc.,Wilheim Stern & Co. G.m.b.H.,True,False,False
237,The Dentists' Supply Co. of New York,Zahnfabrik Wienand Soehne & Co.,True,False,False
238,National Lead Company,Zinnwerke Wilheimsburg G.m.b.H.,True,False,False
239,Gillette Safety Rasor Company,Zoellner Werke G.m.b.H.,True,False,False


### Tenenbaum

In [15]:
# Open the sheet
worksheet = client.open("Tenenbaum").worksheet("Sheet1 expanded")

# Get all records as list of dicts
records = worksheet.get_all_records()

# Convert to DataFrame
tenenbaum = pd.DataFrame(records)

In [16]:
tenenbaum_gmbh = tenenbaum[tenenbaum['subsidiary is GmbH']=='TRUE']
tenenbaum_gmbh = tenenbaum_gmbh[['Master US firm name', 'affiliated German firm name', 'Master German firm name']]

In [17]:
tfr500_moodys32_moodys34_tenenbaum = tfr500_moodys32_moodys34.merge(tenenbaum_gmbh, on='Master German firm name', how='outer', indicator=True,
                                                 suffixes=['', '_Tenenbaum'])

tfr500_moodys32_moodys34_tenenbaum['in TFR-500'] = tfr500_moodys32_moodys34_tenenbaum['in TFR-500'].fillna(False)
tfr500_moodys32_moodys34_tenenbaum['in Moodys 32'] = tfr500_moodys32_moodys34_tenenbaum['in Moodys 32'].fillna(False)
tfr500_moodys32_moodys34_tenenbaum['in Moodys 34'] = tfr500_moodys32_moodys34_tenenbaum['in Moodys 34'].fillna(False)
tfr500_moodys32_moodys34_tenenbaum['in Tenenbaum'] = tfr500_moodys32_moodys34_tenenbaum['_merge'].isin(['right_only', 'both'])

tfr500_moodys32_moodys34_tenenbaum['Master US firm name'] = tfr500_moodys32_moodys34_tenenbaum['Master US firm name'].fillna(tfr500_moodys32_moodys34_tenenbaum['Master US firm name_Tenenbaum'])

tfr500_moodys32_moodys34_tenenbaum = tfr500_moodys32_moodys34_tenenbaum.drop(columns=['_merge', 'Master US firm name_Tenenbaum', 'affiliated German firm name'])


  tfr500_moodys32_moodys34_tenenbaum['in TFR-500'] = tfr500_moodys32_moodys34_tenenbaum['in TFR-500'].fillna(False)
  tfr500_moodys32_moodys34_tenenbaum['in Moodys 32'] = tfr500_moodys32_moodys34_tenenbaum['in Moodys 32'].fillna(False)
  tfr500_moodys32_moodys34_tenenbaum['in Moodys 34'] = tfr500_moodys32_moodys34_tenenbaum['in Moodys 34'].fillna(False)


In [18]:
tfr500_moodys32_moodys34_tenenbaum

Unnamed: 0,Master US firm name,Master German firm name,in TFR-500,in Moodys 32,in Moodys 34,in Tenenbaum
0,"Swiss ""Borvisk"" Company","""Borvisk"" Kunstseiden-Vertriebsgesellschaft, m...",True,False,False,False
1,Standard Oil Company (New Jersey),"""Gluckauf"" Deutsche Oel Gesellschaft Hersfeld ...",True,False,False,False
2,International Telephone and Telegraph Corp.,"""Volta"" Telephon & Signalbau G.m.b.H.",True,False,False,True
3,Edward G. Budd Manufacturing Company,"A. B. P. Gefolgschoftsheime, G.m.b.H.",True,False,False,False
4,The ARMCO International Corporation,ARMCO Eisen G.m.b.H.,True,False,False,True
...,...,...,...,...,...,...
245,Kupfer Bros. Co. Inc.,Wilheim Stern & Co. G.m.b.H.,True,False,False,False
246,The Dentists' Supply Co. of New York,Zahnfabrik Wienand Soehne & Co.,True,False,False,False
247,National Lead Company,Zinnwerke Wilheimsburg G.m.b.H.,True,False,False,False
248,Gillette Safety Rasor Company,Zoellner Werke G.m.b.H.,True,False,False,False


In [19]:
tfr500_moodys32_moodys34_tenenbaum.to_csv('output/gmbh_tfr500_moodys32_moodys34_tenenbaum.csv')

In [20]:
tfr500_moodys32_moodys34_tenenbaum[(tfr500_moodys32_moodys34_tenenbaum['in TFR-500']==True) &
                                   (tfr500_moodys32_moodys34_tenenbaum['in Moodys 32']==False) & 
                                   (tfr500_moodys32_moodys34_tenenbaum['in Moodys 34']==False) & 
                                   (tfr500_moodys32_moodys34_tenenbaum['in Tenenbaum']==False)]

Unnamed: 0,Master US firm name,Master German firm name,in TFR-500,in Moodys 32,in Moodys 34,in Tenenbaum
0,"Swiss ""Borvisk"" Company","""Borvisk"" Kunstseiden-Vertriebsgesellschaft, m...",True,False,False,False
1,Standard Oil Company (New Jersey),"""Gluckauf"" Deutsche Oel Gesellschaft Hersfeld ...",True,False,False,False
3,Edward G. Budd Manufacturing Company,"A. B. P. Gefolgschoftsheime, G.m.b.H.",True,False,False,False
6,Underwood Elliott Fisher Company,Adrema Maschinenebau G.m.b.H.,True,False,False,False
7,Jadev Corporation,"Allgemeine Erdol Gesellschaft, m.b.H.",True,False,False,False
...,...,...,...,...,...,...
245,Kupfer Bros. Co. Inc.,Wilheim Stern & Co. G.m.b.H.,True,False,False,False
246,The Dentists' Supply Co. of New York,Zahnfabrik Wienand Soehne & Co.,True,False,False,False
247,National Lead Company,Zinnwerke Wilheimsburg G.m.b.H.,True,False,False,False
248,Gillette Safety Rasor Company,Zoellner Werke G.m.b.H.,True,False,False,False


### Handbuch GmbH

In [21]:
hb_gmbh = pd.read_excel('../../Moodys_name_to_handbuch/output/manual/gmbh_validated.xlsx', sheet_name='validated')
hb_gmbh = hb_gmbh[hb_gmbh['validated'].isin(['USC', 'USO', 'USOP'])]

hb_gmbh["is_duplicate"] = hb_gmbh.duplicated(subset=['Master German firm name', 'Master US firm name'])
# dropping duplicates
hb_gmbh = hb_gmbh[hb_gmbh["is_duplicate"] == False]

hb_gmbh = hb_gmbh[['german_name', 'Master German firm name', 'US_name', 'Master US firm name', 'validated']]

In [51]:
hb_gmbh_full = pd.read_csv('../../Handbuch_GmbH_1932/output/firms_structured.csv')

In [72]:
# merging back to HB GmbH

# for those matched to more than on GmbH in the Handbuch (Handbuch has a few firms with identical names), we keep only the first match
hb_gmbh_to_match = hb_gmbh_full[hb_gmbh_full['firm_name'].notnull()]
hb_gmbh_to_match = hb_gmbh_to_match.groupby('firm_name').first()

# merging back to the HB GmbH to access the rest of the information on the GmbH
hb_gmbh_merged_back = hb_gmbh.rename(columns={'german_name': 'firm_name'}).merge(hb_gmbh_to_match, on='firm_name', how='left')

In [22]:
hb_gmbh_counts = hb_gmbh['validated'].value_counts().reset_index()
hb_gmbh_counts.to_latex('output/table/HB_GmbH_count.tex', index=False)

In [23]:
hb_gmbh= hb_gmbh.rename(columns={'validated': 'HB GmbH class'})

In [24]:
tfr500_moodys32_moodys34_tenenbaum_hb_gmbh = tfr500_moodys32_moodys34_tenenbaum.merge(hb_gmbh, on=['Master German firm name', 'Master US firm name'], how='outer', indicator=True,
                                                 suffixes=['', '_hb_gmbh'], validate='one_to_one')

tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['in TFR-500'] = tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['in TFR-500'].fillna(False)
tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['in Moodys 32'] = tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['in Moodys 32'].fillna(False)
tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['in Moodys 34'] = tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['in Moodys 34'].fillna(False)
tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['in Tenenbaum'] = tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['in Tenenbaum'].fillna(False)
tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['IDed in HB GmbH'] = tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['_merge'].isin(['right_only', 'both'])

tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['Master US firm name'] = tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['Master US firm name'].fillna(tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['US_name'])

tfr500_moodys32_moodys34_tenenbaum_hb_gmbh = tfr500_moodys32_moodys34_tenenbaum_hb_gmbh.drop(columns=['_merge', 'german_name', 'US_name'])
tfr500_moodys32_moodys34_tenenbaum_hb_gmbh

  tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['in TFR-500'] = tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['in TFR-500'].fillna(False)
  tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['in Moodys 32'] = tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['in Moodys 32'].fillna(False)
  tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['in Moodys 34'] = tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['in Moodys 34'].fillna(False)
  tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['in Tenenbaum'] = tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['in Tenenbaum'].fillna(False)


Unnamed: 0,Master US firm name,Master German firm name,in TFR-500,in Moodys 32,in Moodys 34,in Tenenbaum,HB GmbH class,IDed in HB GmbH
0,"Swiss ""Borvisk"" Company","""Borvisk"" Kunstseiden-Vertriebsgesellschaft, m...",True,False,False,False,,False
1,Standard Oil Company (New Jersey),"""Gluckauf"" Deutsche Oel Gesellschaft Hersfeld ...",True,False,False,False,,False
2,International Telephone and Telegraph Corp.,"""Volta"" Telephon & Signalbau G.m.b.H.",True,False,False,True,,False
3,Edward G. Budd Manufacturing Company,"A. B. P. Gefolgschoftsheime, G.m.b.H.",True,False,False,False,,False
4,The ARMCO International Corporation,ARMCO Eisen G.m.b.H.,True,False,False,True,,False
...,...,...,...,...,...,...,...,...
265,Kupfer Bros. Co. Inc.,Wilheim Stern & Co. G.m.b.H.,True,False,False,False,,False
266,The Dentists' Supply Co. of New York,Zahnfabrik Wienand Soehne & Co.,True,False,False,False,,False
267,National Lead Company,Zinnwerke Wilheimsburg G.m.b.H.,True,False,False,False,,False
268,Gillette Safety Rasor Company,Zoellner Werke G.m.b.H.,True,False,False,False,,False


In [25]:
tfr500_moodys32_moodys34_tenenbaum_hb_gmbh.to_csv('output/gmbh_tfr500_moodys32_moodys34_tenenbaum_hb_gmbh.csv')

### Search for all firms in the HB GmbH 1932

In [26]:
not_ided_in_HB = tfr500_moodys32_moodys34_tenenbaum_hb_gmbh[tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['IDed in HB GmbH']==False]

In [27]:
hb_gmbh_full = pd.read_csv('../../Handbuch_GmbH_1932/output/firms_structured.csv')

#### fuzzy matching

In [28]:
from rapidfuzz import process, fuzz

# Function to get top 3 matches from df_b for each row in df_a
def get_best_match(query, choices, scorer=fuzz.token_sort_ratio):
    """Return the single best fuzzy match."""
    match, score, idx = process.extractOne(query, choices, scorer=scorer)
    return match, score, idx

results = []

# Loop through df_a
for i, row in not_ided_in_HB.iterrows():
    query = row["Master German firm name"]
    match, score, idx = get_best_match(query, hb_gmbh_full["firm_name"].tolist())
    
    # merge row from A with its best match from B
    match_row = hb_gmbh_full.iloc[idx].to_dict()
    combined = {**row.to_dict(), **match_row}
    combined["fuzzy_score"] = score
    results.append(combined)

# Final result with all columns from A + B + score
matches_df = pd.DataFrame(results)

In [29]:
# loads the previous manual checking results
matches_df_checked = pd.read_excel('manual/hb_gmbh_32_fuzzy_matched_checked.xlsx')
matches_df_checked = matches_df_checked[['Master German firm name', 'firm_name', 'match_is_good']].groupby(['Master German firm name', 'firm_name', 'match_is_good']).first().reset_index()
matches_df_auto_manual_merged = matches_df.merge(matches_df_checked[['Master German firm name', 'firm_name', 'match_is_good']], 
                 on=['Master German firm name', 'firm_name'],
                 how='left')
matches_df_auto_manual_merged

Unnamed: 0,Master US firm name,Master German firm name,in TFR-500,in Moodys 32,in Moodys 34,in Tenenbaum,HB GmbH class,IDed in HB GmbH,index,page,column,firm_name,location,date,capital,business,persons,type,fuzzy_score,match_is_good
0,"Swiss ""Borvisk"" Company","""Borvisk"" Kunstseiden-Vertriebsgesellschaft, m...",True,False,False,False,,False,299,166,right,Borvisk-Kunstseiden Vertriebsgesellschaft,"Berlin W 62, Maaßenstr. 36",26.5.15,RM 40 000,Herstellung von Kunstseide,Dr. jur. E. Wohrizek; Alfr. Demant,2 Gf. od. 1 Gf. u. 1 Pk. od. 2 Pk.,84.782609,1.0
1,Standard Oil Company (New Jersey),"""Gluckauf"" Deutsche Oel Gesellschaft Hersfeld ...",True,False,False,False,,False,521,277,right,Deutsche Racine Gesellschaft Rudolf Bauer & Co.,"Berlin NW 6, Luisenstr. 21",6.7.27,RM 20000,Vertrieb der elektrischen „Racine“ Parkettbode...,R. Bauer; F. Bauer,Jed. Gf.,59.259259,0.0
2,International Telephone and Telegraph Corp.,"""Volta"" Telephon & Signalbau G.m.b.H.",True,False,False,True,,False,2573,1303,right,Volta Telephon- und Signalbau,"Berlin W 35, Potsdamei-Straße 38",1.7.05,RM 268 000,"Installation, Verkauf von Telefon-, Telegrafen...",A. Hoffmann; L. Hoepfner,2 Gf. gern.,54.545455,1.0
3,Edward G. Budd Manufacturing Company,"A. B. P. Gefolgschoftsheime, G.m.b.H.",True,False,False,False,,False,901,467,right,R.K. Ad. Gochsheimer,"Frankfurt a. M., Kaiserhofstr. 12",28.1.16,RM 16000,"Handel mit Weinen, Spirituosen und sonstigen e...",Jak. Gochsheimer,Der Gf.,52.631579,0.0
4,The ARMCO International Corporation,ARMCO Eisen G.m.b.H.,True,False,False,True,,False,414,224,left,A. F. H. Eilers,"Hamburg, Farmsener Str. 28",29. 12. 19,RM 1000,Kohlen- und Koks-Geschäft,Richard Mildner,Jed. Gf.,51.428571,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
230,Kupfer Bros. Co. Inc.,Wilheim Stern & Co. G.m.b.H.,True,False,False,False,,False,2320,1177,left,Wilhelm Stern & Co.,Fürth,2.9.11,"RM 500,000","Herstellung von Metall-, Bunt- und Luxuspapier...",B. Kaufmann; stellv. Dir. Erhard Ottenberger,"vertr. jeder allein, sonst 2 Gf. od. 1 Gf. u. ...",76.595745,1.0
231,The Dentists' Supply Co. of New York,Zahnfabrik Wienand Soehne & Co.,True,False,False,False,,False,2733,1383,right,Zahnfabrik Wienand Söhne & Co.,Sprendlingen in Hessen,7.6.21,RM 800 000,Fabrikation von künstlichen Zähnen,Dr. Aug. und Dr. Fritz Wienand,Jed. Gf.,95.081967,1.0
232,National Lead Company,Zinnwerke Wilheimsburg G.m.b.H.,True,False,False,False,,False,2760,1397,left,Zinnwerke Wilhelmsburg,Wilhelmsburg bei Harburg,12.11.03,RM 1 500 000,"Umwandlung von Erzen, Rohmetallen u. Hüttenpro...",Henryk Goldmann; Dr. phil. W. Baer,2 Gf. od. 1 Gf. u. 1 Pk. od. 2 Pk.,79.245283,1.0
233,Gillette Safety Rasor Company,Zoellner Werke G.m.b.H.,True,False,False,False,,False,2626,1330,left,Im. Weller,Berlin,7. 11. 20,RM 50 000,Tuchhandlung,Siegfr. Lichtenstein,1 od. 2 Gf.,54.545455,0.0


In [58]:
matches_df_auto_manual_merged.to_excel('output/hb_gmbh_32_fuzzy_matched_auto.xlsx', index=False)

In [59]:
# after the manual updates on matches_df_auto_manual_merged
matches_df_checked = pd.read_excel('manual/hb_gmbh_32_fuzzy_matched_checked.xlsx')

In [60]:
matches_df_checked

Unnamed: 0,Master US firm name,Master German firm name,in TFR-500,in Moodys 32,in Moodys 34,in Tenenbaum,HB GmbH class,IDed in HB GmbH,index,page,column,firm_name,location,date,capital,business,persons,type,fuzzy_score,match_is_good
0,"Swiss ""Borvisk"" Company","""Borvisk"" Kunstseiden-Vertriebsgesellschaft, m...",True,False,False,False,,False,299,166,right,Borvisk-Kunstseiden Vertriebsgesellschaft,"Berlin W 62, Maaßenstr. 36",26.5.15,RM 40 000,Herstellung von Kunstseide,Dr. jur. E. Wohrizek; Alfr. Demant,2 Gf. od. 1 Gf. u. 1 Pk. od. 2 Pk.,84.782609,1
1,Standard Oil Company (New Jersey),"""Gluckauf"" Deutsche Oel Gesellschaft Hersfeld ...",True,False,False,False,,False,521,277,right,Deutsche Racine Gesellschaft Rudolf Bauer & Co.,"Berlin NW 6, Luisenstr. 21",6.7.27,RM 20000,Vertrieb der elektrischen „Racine“ Parkettbode...,R. Bauer; F. Bauer,Jed. Gf.,59.259259,0
2,International Telephone and Telegraph Corp.,"""Volta"" Telephon & Signalbau G.m.b.H.",True,False,False,True,,False,2573,1303,right,Volta Telephon- und Signalbau,"Berlin W 35, Potsdamei-Straße 38",1.7.05,RM 268 000,"Installation, Verkauf von Telefon-, Telegrafen...",A. Hoffmann; L. Hoepfner,2 Gf. gern.,54.545455,1
3,Edward G. Budd Manufacturing Company,"A. B. P. Gefolgschoftsheime, G.m.b.H.",True,False,False,False,,False,901,467,right,R.K. Ad. Gochsheimer,"Frankfurt a. M., Kaiserhofstr. 12",28.1.16,RM 16000,"Handel mit Weinen, Spirituosen und sonstigen e...",Jak. Gochsheimer,Der Gf.,52.631579,0
4,The ARMCO International Corporation,ARMCO Eisen G.m.b.H.,True,False,False,True,,False,414,224,left,A. F. H. Eilers,"Hamburg, Farmsener Str. 28",29. 12. 19,RM 1000,Kohlen- und Koks-Geschäft,Richard Mildner,Jed. Gf.,51.428571,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
230,Kupfer Bros. Co. Inc.,Wilheim Stern & Co. G.m.b.H.,True,False,False,False,,False,2320,1177,left,Wilhelm Stern & Co.,Fürth,2.9.11,"RM 500,000","Herstellung von Metall-, Bunt- und Luxuspapier...",B. Kaufmann; stellv. Dir. Erhard Ottenberger,"vertr. jeder allein, sonst 2 Gf. od. 1 Gf. u. ...",76.595745,1
231,The Dentists' Supply Co. of New York,Zahnfabrik Wienand Soehne & Co.,True,False,False,False,,False,2733,1383,right,Zahnfabrik Wienand Söhne & Co.,Sprendlingen in Hessen,7.6.21,RM 800 000,Fabrikation von künstlichen Zähnen,Dr. Aug. und Dr. Fritz Wienand,Jed. Gf.,95.081967,1
232,National Lead Company,Zinnwerke Wilheimsburg G.m.b.H.,True,False,False,False,,False,2760,1397,left,Zinnwerke Wilhelmsburg,Wilhelmsburg bei Harburg,12.11.03,RM 1 500 000,"Umwandlung von Erzen, Rohmetallen u. Hüttenpro...",Henryk Goldmann; Dr. phil. W. Baer,2 Gf. od. 1 Gf. u. 1 Pk. od. 2 Pk.,79.245283,1
233,Gillette Safety Rasor Company,Zoellner Werke G.m.b.H.,True,False,False,False,,False,2626,1330,left,Im. Weller,Berlin,7. 11. 20,RM 50 000,Tuchhandlung,Siegfr. Lichtenstein,1 od. 2 Gf.,54.545455,0


#### manually checking the rest

In [61]:
needs_manual_search = matches_df_checked[matches_df_checked['match_is_good']==0]
needs_manual_search = needs_manual_search[['Master US firm name', 'Master German firm name', 'in TFR-500', 'in Moodys 32', 'in Moodys 34', 'in Tenenbaum', 'IDed in HB GmbH']]

In [62]:
manual_searched = pd.read_excel('manual/hb_gmbh_32_search_manual.xlsx')
manual_searched = manual_searched[['Master German firm name', 'firm_name', 'searched']].groupby('Master German firm name').first().reset_index()
needs_manual_search_auto_manual_merged = needs_manual_search.merge(manual_searched, 
                                                                   on='Master German firm name', 
                                                                   how='left')

In [63]:
needs_manual_search_auto_manual_merged.to_excel('output/hb_gmbh_32_to_search_auto.xlsx', index=False)

In [64]:
# after the manual updates on matches_df_auto_manual_merged
searched = pd.read_excel('manual/hb_gmbh_32_search_manual.xlsx')

In [65]:
# for those matched to more than on GmbH in the Handbuch (Handbuch has a few firms with identical names), we keep only the first match
hb_gmbh_to_match = hb_gmbh_full[hb_gmbh_full['firm_name'].notnull()]
hb_gmbh_to_match = hb_gmbh_to_match.groupby('firm_name').first()

# merging back to the HB GmbH to access the rest of the information on the GmbH
searched_merged_hb_gmbh = searched.merge(hb_gmbh_to_match, on='firm_name', how='left')


# concating the firms fuzzy matched and manually searched
fuzzy_searched_merged_hb_gmbh = pd.concat([searched_merged_hb_gmbh, matches_df_checked[matches_df_checked['match_is_good']==1]]).reset_index()
fuzzy_searched_merged_hb_gmbh

Unnamed: 0,level_0,Master US firm name,Master German firm name,in TFR-500,in Moodys 32,in Moodys 34,in Tenenbaum,IDed in HB GmbH,firm_name,searched,...,column,location,date,capital,business,persons,type,HB GmbH class,fuzzy_score,match_is_good
0,0,Standard Oil Company (New Jersey),"""Gluckauf"" Deutsche Oel Gesellschaft Hersfeld ...",True,False,False,False,False,,1.0,...,,,,,,,,,,
1,1,Edward G. Budd Manufacturing Company,"A. B. P. Gefolgschoftsheime, G.m.b.H.",True,False,False,False,False,,1.0,...,,,,,,,,,,
2,2,The ARMCO International Corporation,ARMCO Eisen G.m.b.H.,True,False,False,True,False,,1.0,...,,,,,,,,,,
3,3,Underwood Elliott Fisher Company,Adrema Maschinenebau G.m.b.H.,True,False,False,False,False,„Adrema“ Maschinenbau-Gesellschaft,1.0,...,left,"Berlin-Charlottenburg, Alt-Moabit 62/63",2. 4. 13,RM 1000 000,Maschinenfabrikation,Julius Goldschmidt,1 Gf. od. 2 Pk,,,
4,4,The Atlantic Refining Company,"Allgemeine Oel-Handels, GmbH. (Oelhag)",True,False,True,True,False,Allgemeine Oel-Handels-Gesellschaft,1.0,...,left,"Hamburg, Mittelweg 38",8. 12. 20,RM 9 000 000,Handel mit Gelen und verwandten Waren,Emanuel Rein,2 Gf. od. 1 Gf. od. 2 Pk.,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
230,228,Western Electric Export Corporation,Western Electric G.m.b.H.,True,False,False,False,False,Deutsche Western Electric,,...,left,"Berlin W 15, Emser Straße Nr 40-41",20. 6. 29,RM 200 000,"Manufacture, sale, and distribution of film an...",,,,64.000000,1.0
231,230,Kupfer Bros. Co. Inc.,Wilheim Stern & Co. G.m.b.H.,True,False,False,False,False,Wilhelm Stern & Co.,,...,left,Fürth,2.9.11,"RM 500,000","Herstellung von Metall-, Bunt- und Luxuspapier...",B. Kaufmann; stellv. Dir. Erhard Ottenberger,"vertr. jeder allein, sonst 2 Gf. od. 1 Gf. u. ...",,76.595745,1.0
232,231,The Dentists' Supply Co. of New York,Zahnfabrik Wienand Soehne & Co.,True,False,False,False,False,Zahnfabrik Wienand Söhne & Co.,,...,right,Sprendlingen in Hessen,7.6.21,RM 800 000,Fabrikation von künstlichen Zähnen,Dr. Aug. und Dr. Fritz Wienand,Jed. Gf.,,95.081967,1.0
233,232,National Lead Company,Zinnwerke Wilheimsburg G.m.b.H.,True,False,False,False,False,Zinnwerke Wilhelmsburg,,...,left,Wilhelmsburg bei Harburg,12.11.03,RM 1 500 000,"Umwandlung von Erzen, Rohmetallen u. Hüttenpro...",Henryk Goldmann; Dr. phil. W. Baer,2 Gf. od. 1 Gf. u. 1 Pk. od. 2 Pk.,,79.245283,1.0


In [84]:
hb_gmbh_merged_back_and_dummies = hb_gmbh_merged_back.merge(tfr500_moodys32_moodys34_tenenbaum_hb_gmbh.drop(columns=['HB GmbH class']), 
                                                            on=['Master US firm name', 'Master German firm name'], 
                                                            how='left', 
                                                            indicator=True)
hb_gmbh_merged_back_and_dummies

Unnamed: 0,firm_name,Master German firm name,US_name,Master US firm name,HB GmbH class,index,page,column,location,date,capital,business,persons,type,in TFR-500,in Moodys 32,in Moodys 34,in Tenenbaum,IDed in HB GmbH,_merge
0,American News Company,American News Company,"American News Company, Inc.","American News Company, Inc.",USC,53,43,right,"Hamburg, Kl. Reichenstr. 21",9.8.22,RM 600,Herausgabe einer Zeitung in englischer Sprache,Eugene Deutsch; A. H. Bergmann geb. Thomaschek,2 Gf.,False,False,False,False,True,both
1,Columbia Film,Columbia Film,Columbia Pictures Corporation,Columbia Pictures Corp.,USC,428,231,left,"Berlin, Charlottenstr. 96",14. 5. 20,RM 600,"Filmschauspiel, Kostüme",P. Groß; K. Rellstab,Einl.,False,False,False,False,True,both
2,O-Cedar,O-Cedar,O-Cedar Corporation,O-Cedar Corporation,USC,1810,922,left,"Berlin-Borsigwalde, Charlottenburger Str. 15-26",,RM 800 000,Fabrikation und Vertrieb von O-Cedar-Fabrikate...,Fr. Howe Packord,Jed. Gf.,False,False,False,False,True,both
3,Fairbanks,The Fairbanks G.m.b.H. Company,Fairbanks Company,The Fairbanks Company,USO,710,372,left,"Hamburg, Brook 1",30.12.19,RM 5000,Vertretung der The Fairbanks Company (Europe) ...,H. S. Fainaru,1 Gf.,True,False,False,False,True,both
4,Gillette Safety Razor Co.,Gillette Safety Razor Co.,Gillette Safety Razor Co.,Gillette Safety Rasor Company,USO,887,460,right,"Hamburg, Spitalstr. 10",26. 3. 08,RM 62 500,Gewerbliche und fabrikatorische Ausbeutung u. ...,C. Ferd. Adderhalden,Jed. Gf.,False,False,False,False,True,both
5,Addressograph,Addressograph-Multigraph G.m.b.H.,Addressograph Company,Addressograph-Multigraph Corporation,USO,9,21,right,"Berlin S 24, Ritterstr. 99",28.3.28,RM 700000,Vertrieb von Organisationsmaschinen u. Büroart...,R. I. Grog; CI. E. Thone; A. Porter,Jed. Gf. od. 1 Gf. u. 1 Pk. Sacheinl. RM 391000,True,False,False,True,True,both
6,Aeolian Company,Aeolian Company,Aeolian Company,Aeolian Company,USO,14,24,left,"Berlin W 10, Lützowufer 17",5. 6. 07,RM 500 000,Herstellung und Vertrieb von Musikinstrumenten,John Albert Findley; Johannes Hauff,1 Gf. od. 2 Pk,False,False,False,False,True,both
7,American Oil Company,American Oil Company,American Oil Company,American Oil Company,USO,53,43,right,"Hamburg, Billwärder Steindamm 14/19",6.3.20,RM 500,Friedrich Bachmann,Friedrich Bachmann,1 Gf.,False,False,False,False,True,both
8,Bakelite,Bakelite Gesellschaft,Bakelite Corporation,Bakelite Corp.,USO,130,82,left,"Berlin W 35, Lützowstr. 32",25. 5. 10,RM 1100 000,Verwertung von Patenten von Kondensationsprodu...,David Sachs; Dr. phil. Max Weger,2 Gf. oder 1 Gf. u. 1 Pk.,False,False,True,False,True,both
9,Bausch & Lomb Optical Co.,Bausch & Lomb Optical Co.,Bausch & Lomb Optical Company,Bausch & Lomb Optical Co.,USO,171,102,right,"Frankfurt a. M., Schillerstraße 30",30.7.02,"RM 100,000",Fabrikation und Vertrieb von optischen Instrum...,August Heinrich Lomb,1 Gf or 1 Pk,False,False,False,False,True,both


In [85]:
gmbh_in_hb = pd.concat([fuzzy_searched_merged_hb_gmbh, hb_gmbh_merged_back_and_dummies]).drop(columns=['level_0', 'fuzzy_score', 'match_is_good'])
gmbh_in_hb.to_csv('output/all_searched_in_hb_gmbh.csv')

In [86]:
gmbh_in_hb

Unnamed: 0,Master US firm name,Master German firm name,in TFR-500,in Moodys 32,in Moodys 34,in Tenenbaum,IDed in HB GmbH,firm_name,searched,index,...,column,location,date,capital,business,persons,type,HB GmbH class,US_name,_merge
0,Standard Oil Company (New Jersey),"""Gluckauf"" Deutsche Oel Gesellschaft Hersfeld ...",True,False,False,False,False,,1.0,,...,,,,,,,,,,
1,Edward G. Budd Manufacturing Company,"A. B. P. Gefolgschoftsheime, G.m.b.H.",True,False,False,False,False,,1.0,,...,,,,,,,,,,
2,The ARMCO International Corporation,ARMCO Eisen G.m.b.H.,True,False,False,True,False,,1.0,,...,,,,,,,,,,
3,Underwood Elliott Fisher Company,Adrema Maschinenebau G.m.b.H.,True,False,False,False,False,„Adrema“ Maschinenbau-Gesellschaft,1.0,12.0,...,left,"Berlin-Charlottenburg, Alt-Moabit 62/63",2. 4. 13,RM 1000 000,Maschinenfabrikation,Julius Goldschmidt,1 Gf. od. 2 Pk,,,
4,The Atlantic Refining Company,"Allgemeine Oel-Handels, GmbH. (Oelhag)",True,False,True,True,False,Allgemeine Oel-Handels-Gesellschaft,1.0,38.0,...,left,"Hamburg, Mittelweg 38",8. 12. 20,RM 9 000 000,Handel mit Gelen und verwandten Waren,Emanuel Rein,2 Gf. od. 1 Gf. od. 2 Pk.,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30,Oppenheimer (S.) & Company,S. Oppenheimer & Co.,False,False,False,False,True,S. Oppenheimer & Co.,,1827.0,...,right,"Hamburg, Catharinenkirchhof 2",27. 8. 20,RM 10 000,"Handelsgeschäfte mit Därmen, u. anderen animal...",A. W. Kempner; L. C. Stör,Jed. Gf.,USO,Oppenheimer (S.) & Company,both
31,German-American Tobacco Co.,German-American Tobacco Co.,False,False,False,False,True,German American Tobacco Company,,844.0,...,left,"Berlin W 10, Lützowufer 17",27. 5. 31,RM 20 000,Handel mit Tabak und Tabakserzeugnissen aller ...,F. Wirth jun.,1 od. 2 Gf.,USO,German-American Tobacco Co.,both
32,"Holophane Company, Inc.",Holophane-Gesellschaft,False,False,False,False,True,Holophane-Gesellschaft,,1160.0,...,left,"Berlin SW 68, Alte Jakobstraße 20/22",16.5.11,"RM 20,000",Vertrieb der Produkte der Holophane-Gesellscha...,J. Franck,Der Gf.,USOP,"Holophane Company, Inc.",both
33,Hupp Motor Car Corporation,Hupp Motors,False,False,False,False,True,Hupp Motors,,1190.0,...,left,Hamburg,10. 6. 29,RM 20 000,"Verkauf, Lagerung u. Spedition von Beförderung...",Th. Onken,,USOP,Hupp Motor Car Corporation,both


#### combining

In [37]:
all_not_ided_searched = fuzzy_searched_merged_hb_gmbh.copy()
all_not_ided_searched['appears in HB GmbH'] = all_not_ided_searched['firm_name'].notnull()
all_not_ided_searched = all_not_ided_searched[['Master US firm name', 'Master German firm name', 'appears in HB GmbH']]

In [38]:
gmbh_large_table = tfr500_moodys32_moodys34_tenenbaum_hb_gmbh.merge(all_not_ided_searched, 
                                                 on=['Master US firm name', 'Master German firm name'],
                                                 how='outer')

gmbh_large_table['appears in HB GmbH'] = gmbh_large_table['appears in HB GmbH'].fillna(False)

  gmbh_large_table['appears in HB GmbH'] = gmbh_large_table['appears in HB GmbH'].fillna(False)


In [39]:
gmbh_large_table

Unnamed: 0,Master US firm name,Master German firm name,in TFR-500,in Moodys 32,in Moodys 34,in Tenenbaum,HB GmbH class,IDed in HB GmbH,appears in HB GmbH
0,Addressograph-Multigraph Corporation,Addressograph-Multigraph G.m.b.H.,True,False,False,True,USO,True,False
1,Aeolian Company,Aeolian Company,False,False,False,False,USO,True,False
2,American Cyanamid Company,Farsals-Gesellschaft m.b.H.,True,False,False,False,,False,False
3,"American News Company, Inc.",American News Company,False,False,False,False,USC,True,False
4,American Nickel Alloy Mfg. Corp.,Cosmo Montangesellschaft fuer metallurgische I...,True,False,False,False,,False,False
...,...,...,...,...,...,...,...,...,...
265,Western Electric Export Corporation,Western Electric G.m.b.H.,True,False,False,False,,False,True
266,"William R. Warner & Co., Inc.","Warner, W.R. and Co. G.m.b.H.",True,False,False,False,,False,True
267,Witroth Corporation,Oppelener Lagerhaus Gesellschaft,True,False,False,False,,False,True
268,Witroth Corporation,Ostrhederei,True,False,False,False,,False,True


In [40]:
gmbh_in_hb = gmbh_large_table[gmbh_large_table['IDed in HB GmbH']==True]
gmbh_in_hb_counts = gmbh_in_hb.groupby('HB GmbH class').agg(
    **{
    "count": ("in TFR-500", "size"),       # count of all rows
    "in TFR": ("in TFR-500", "sum")          # sum of True values (sin
}).reindex(['USO', 'USC', 'USOP'])

gmbh_in_hb_counts.to_latex('output/table/hb_gbmh_count_TFR.tex')

## Search for firms in the large list

In [41]:
kw = 'Western Electric'
tfr500_moodys32_moodys34_tenenbaum_hb_gmbh[tfr500_moodys32_moodys34_tenenbaum_hb_gmbh['Master German firm name'].str.contains(kw, case=False)]

Unnamed: 0,Master US firm name,Master German firm name,in TFR-500,in Moodys 32,in Moodys 34,in Tenenbaum,HB GmbH class,IDed in HB GmbH
263,Western Electric Export Corporation,Western Electric G.m.b.H.,True,False,False,False,,False


In [42]:
gmbh_large_table

Unnamed: 0,Master US firm name,Master German firm name,in TFR-500,in Moodys 32,in Moodys 34,in Tenenbaum,HB GmbH class,IDed in HB GmbH,appears in HB GmbH
0,Addressograph-Multigraph Corporation,Addressograph-Multigraph G.m.b.H.,True,False,False,True,USO,True,False
1,Aeolian Company,Aeolian Company,False,False,False,False,USO,True,False
2,American Cyanamid Company,Farsals-Gesellschaft m.b.H.,True,False,False,False,,False,False
3,"American News Company, Inc.",American News Company,False,False,False,False,USC,True,False
4,American Nickel Alloy Mfg. Corp.,Cosmo Montangesellschaft fuer metallurgische I...,True,False,False,False,,False,False
...,...,...,...,...,...,...,...,...,...
265,Western Electric Export Corporation,Western Electric G.m.b.H.,True,False,False,False,,False,True
266,"William R. Warner & Co., Inc.","Warner, W.R. and Co. G.m.b.H.",True,False,False,False,,False,True
267,Witroth Corporation,Oppelener Lagerhaus Gesellschaft,True,False,False,False,,False,True
268,Witroth Corporation,Ostrhederei,True,False,False,False,,False,True


In [43]:
print('in TFR: ' + str(gmbh_large_table[(gmbh_large_table['in TFR-500'])].shape[0]))
print('in Tenenbaum: ' + str(gmbh_large_table[(gmbh_large_table['in Tenenbaum'])].shape[0]))
print('in TFR or Tenenbaum: ' + str(gmbh_large_table[((gmbh_large_table['in TFR-500']) |
                    (gmbh_large_table['in Tenenbaum']))].shape[0]))
print('---------')
print('in TFR, and present in HB GmbH: ' + str(gmbh_large_table[(gmbh_large_table['in TFR-500']) &
                    (gmbh_large_table['appears in HB GmbH'])].shape[0]))
print('in Tenenbaum, and present in HB GmbH: ' + str(gmbh_large_table[(gmbh_large_table['in Tenenbaum']) &
                    (gmbh_large_table['appears in HB GmbH'])].shape[0]))
print('in TFR or Tenenbaum, and present in HB GmbH: ' + str(gmbh_large_table[((gmbh_large_table['in TFR-500']) |
                    (gmbh_large_table['in Tenenbaum'])) & 
                    (gmbh_large_table['appears in HB GmbH'])].shape[0]))

in TFR: 209
in Tenenbaum: 59
in TFR or Tenenbaum: 221
---------
in TFR, and present in HB GmbH: 149
in Tenenbaum, and present in HB GmbH: 45
in TFR or Tenenbaum, and present in HB GmbH: 156


In [44]:
print('IDed in HB GmbH: ' + str(gmbh_large_table[(gmbh_large_table['IDed in HB GmbH'])].shape[0]))
print('IDed in HB GmbH and in TFR: ' + str(gmbh_large_table[(gmbh_large_table['IDed in HB GmbH']) & (gmbh_large_table['in TFR-500'])].shape[0]))
print('IDed in HB GmbH and in Tenenbaum: ' + str(gmbh_large_table[(gmbh_large_table['IDed in HB GmbH']) & (gmbh_large_table['in Tenenbaum'])].shape[0]))
print('IDed HB GmbH and in TFR or Tenenbaum: ' + str(gmbh_large_table[(gmbh_large_table['IDed in HB GmbH']) & ((gmbh_large_table['in TFR-500']) |
                    (gmbh_large_table['in Tenenbaum']))].shape[0]))

IDed in HB GmbH: 35
IDed in HB GmbH and in TFR: 13
IDed in HB GmbH and in Tenenbaum: 4
IDed HB GmbH and in TFR or Tenenbaum: 13


In [45]:
print('in Moodys 32: ' + str(gmbh_large_table[(gmbh_large_table['in Moodys 32'])].shape[0]))
print('in Moodys 32 and in TFR: ' + str(gmbh_large_table[(gmbh_large_table['in Moodys 32']) & (gmbh_large_table['in TFR-500'])].shape[0]))
print('in Moodys 32 and in Tenenbaum: ' + str(gmbh_large_table[(gmbh_large_table['in Moodys 32']) & (gmbh_large_table['in Tenenbaum'])].shape[0]))
print('in Moodys 32 and in TFR or Tenenbaum: ' + str(gmbh_large_table[(gmbh_large_table['in Moodys 32']) & ((gmbh_large_table['in TFR-500']) |
                    (gmbh_large_table['in Tenenbaum']))].shape[0]))
print('----------------')
print('in Moodys 34: ' + str(gmbh_large_table[(gmbh_large_table['in Moodys 34'])].shape[0]))
print('in Moodys 34 and in TFR: ' + str(gmbh_large_table[(gmbh_large_table['in Moodys 34']) & (gmbh_large_table['in TFR-500'])].shape[0]))
print('in Moodys 34 and in Tenenbaum: ' + str(gmbh_large_table[(gmbh_large_table['in Moodys 34']) & (gmbh_large_table['in Tenenbaum'])].shape[0]))
print('in Moodys 34 and in TFR or Tenenbaum: ' + str(gmbh_large_table[(gmbh_large_table['in Moodys 34']) & ((gmbh_large_table['in TFR-500']) |
                    (gmbh_large_table['in Tenenbaum']))].shape[0]))
print('----------------')
print('in Moodys 32 or 34: ' + str(gmbh_large_table[((gmbh_large_table['in Moodys 32']) | (gmbh_large_table['in Moodys 34']))].shape[0]))
print('in Moodys 32 or 34 and in TFR: ' + str(gmbh_large_table[((gmbh_large_table['in Moodys 32']) | (gmbh_large_table['in Moodys 34']))
                                                                & (gmbh_large_table['in TFR-500'])].shape[0]))
print('in Moodys 32 or 34 and in Tenenbaum: ' + str(gmbh_large_table[((gmbh_large_table['in Moodys 32']) | (gmbh_large_table['in Moodys 34']))
                                                                      & (gmbh_large_table['in Tenenbaum'])].shape[0]))
print('in Moodys 32 or 34 and in TFR or Tenenbaum: ' + str(gmbh_large_table[((gmbh_large_table['in Moodys 32']) | (gmbh_large_table['in Moodys 34']))
                                                                             & ((gmbh_large_table['in TFR-500']) | (gmbh_large_table['in Tenenbaum']))].shape[0]))

in Moodys 32: 43
in Moodys 32 and in TFR: 24
in Moodys 32 and in Tenenbaum: 15
in Moodys 32 and in TFR or Tenenbaum: 27
----------------
in Moodys 34: 46
in Moodys 34 and in TFR: 26
in Moodys 34 and in Tenenbaum: 19
in Moodys 34 and in TFR or Tenenbaum: 27
----------------
in Moodys 32 or 34: 64
in Moodys 32 or 34 and in TFR: 32
in Moodys 32 or 34 and in Tenenbaum: 23
in Moodys 32 or 34 and in TFR or Tenenbaum: 35


In [46]:
all_identified = gmbh_large_table[((gmbh_large_table['in Moodys 32']) | (gmbh_large_table['in Moodys 34'])) | (gmbh_large_table['IDed in HB GmbH'])]

In [47]:
print('all identified: ' + str(all_identified.shape[0]))
print('all identified and in TFR: ' + str(all_identified[all_identified['in TFR-500']].shape[0]))
print('all identified and in Tenenbaum: ' + str(all_identified[all_identified['in Tenenbaum']].shape[0]))
print('all identified and in TFR or Tenenbaum: ' + str(all_identified[(all_identified['in TFR-500']) | (all_identified['in Tenenbaum'])].shape[0]))

all identified: 95
all identified and in TFR: 43
all identified and in Tenenbaum: 26
all identified and in TFR or Tenenbaum: 46
