In [1]:
import pandas as pd
import gspread
from gspread_dataframe import set_with_dataframe
from oauth2client.service_account import ServiceAccountCredentials

# Auth
scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('../gspread_creds.json', scope)
client = gspread.authorize(creds)

In [2]:
# Open the sheet
worksheet = client.open("Moodys 1934").worksheet("reviewed-expanded")

# Get all records as list of dicts
records = worksheet.get_all_records()

# Convert to DataFrame
moodys34 = pd.DataFrame(records)

bool_cols = ['mentions a subsidiary', 
             'mentions stock ownership',
             'subsidiary is AG',
             'subsidiary is GmbH',
             'mentions an affilated company',
             'mentions a plant/office/branch', 
             'is a subsidiary of a German firm',
             'other types of agreement']
moodys34[bool_cols] = moodys34[bool_cols].apply(lambda x: x == "TRUE")
moodys34['affiliated German firm name'] = moodys34['affiliated German firm name'].replace('NA', '')

In [3]:
moodys34

Unnamed: 0,page,block,line,x0,y0,x1,y1,text,keyword,page_right,...,Master German firm name,reviewed_notes,mentions a subsidiary,mentions stock ownership,subsidiary is AG,subsidiary is GmbH,mentions an affilated company,mentions a plant/office/branch,is a subsidiary of a German firm,other types of agreement
0,176,25,5,74.65,410.171,524.00,421.870,"Together with Th. Goldschmidt of Essen, German...",German,176,...,,"with Th. Goldschmidt of Essen, Germany, organi...",False,False,False,False,False,False,False,False
1,176,25,5,74.65,410.171,524.00,421.870,"Together with Th. Goldschmidt of Essen, German...",German,176,...,,"with Th. Goldschmidt of Essen, Germany, organi...",False,False,False,False,False,False,False,False
2,305,74,1,305.30,75.673,524.20,86.974,German subsidiary which are subject to exchang...,German,305,...,,,False,False,False,False,False,False,False,False
3,351,5,1,77.50,98.241,165.40,105.170,German factory (not ourr.),German,351,...,,German factory appears in the balance sheet,False,False,False,False,False,True,False,False
4,375,34,4,303.10,117.600,525.20,131.500,acquired all capital stock of Berlin A Co. and...,Berlin,375,...,,No subsidiary in German,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,3377,27,73,225.85,446.732,370.85,457.033,"of oil fields in Germany and at Mar. 10,",German,3377,...,"Erdoelwerke Wathlingen, Wuerttembergische Mine...",In description: Owns entire capital stock of f...,True,False,False,True,False,False,False,False
128,3377,27,73,225.85,446.732,370.85,457.033,"of oil fields in Germany and at Mar. 10,",German,3377,...,Nordeuropaeische Oelgesellschaft m.b.H.,In description: Owns entire capital stock of f...,True,False,False,True,False,False,False,False
129,3377,27,73,225.85,446.732,370.85,457.033,"of oil fields in Germany and at Mar. 10,",German,3377,...,Gewerkschaft Regina zu Caan,In description: Owns entire capital stock of f...,True,False,False,True,False,False,False,False
130,3377,27,73,225.85,446.732,370.85,457.033,"of oil fields in Germany and at Mar. 10,",German,3377,...,Oldenburgische Oelausbeutungesellschaft m.b.H.,In description: Owns entire capital stock of f...,True,False,False,True,False,False,False,False


In [4]:
moodys34_to_csv = moodys34.rename(columns={'Master US firm name': 'US Company'})
moodys34_to_csv.to_csv('output/moodys34.csv', index=False)