In [None]:
import pandas as pd
import geopandas as gpd

def fix_borders(df):
    conversion = pd.read_excel("id-changes/bw-id-changes.xlsx")
    conversion_dict = dict(zip(conversion['id1'], conversion['id2']))
    df['ID'] = df['ID'].map(conversion_dict).fillna(df['ID'])
    return df.groupby('ID', as_index=False).sum()

# Load data and keep relevant columns
df = pd.read_stata('braun-franke-2022.dta')


df = df[['ID', 'name', 'year',
    'ind_popshare_occ', 'agr_popshare_occ', 'trade_popshare_occ',
    'tax_income_base_priv_pc', 'd_steamengine', 'pop_log']]
df = df.dropna(subset=['ID'], inplace=True)

df['d_steamengine'] = df['d_steamengine'].fillna(0).astype(int)
df['ID'] = df['ID'].astype(int)

# Set d_steamengine to 1 in 1905 if it was 1 in 1867
for i in df['ID'].unique():
    if df.loc[(df['ID'] == i) & (df['year'] == 1867), 'd_steamengine'].iloc[0] == 1:
        df.loc[(df['ID'] == i) & (df['year'] == 1905), 'd_steamengine'] = 1

In [18]:
# Add the locations and their IDs to a new dataframe. We get this information from the maps.
locations = pd.DataFrame(columns=['ID', 'Name', 'reichsbank'])
locations.loc[0] = ['10701', 'Heilbronn', '3']
locations.loc[1] = ['41307', 'Friedrichshafen', '3']
locations.loc[2] = ['41001', 'Ravensburg', '3']
locations.loc[3] = ['40101', 'Biberach', '3']
locations.loc[4] = ['21601', 'Tuttlingen', '3']
locations.loc[5] = ['21201', 'Rottweil', '3']
locations.loc[6] = ['21227', 'Schwenningen', '3']
locations.loc[7] = ['30801', 'Heidenheim', '3']
locations.loc[8] = ['40401', 'Geislingen', '3'] # Not sure, many Geislingens.
locations.loc[9] = ['40501', 'Göppingen', '3']
locations.loc[10] = ['10601', 'Eßlingen', '3']
locations.loc[11] = ['30601', 'Schwäbisch Gmünd', '3']
locations.loc[12] = ['30101', 'Aalen', '3']
locations.loc[13] = ['41401', 'Ulm', '2']
locations.loc[14] = ['21001', 'Reutlingen', '2']
locations.loc[15] = ['11301', 'Stuttgart', '1']
locations.loc[16] = ['41001', 'Weingarten', '4'] # Small location, could be counted in Ravensburg as well.
locations.to_csv('reichsbank-locations/reichsbank-locations.csv', index=False)

In [19]:
# Load data and compute centroids
shp = gpd.read_file('raw-shapefile/BW-1939.shp')
loc = pd.read_csv('reichsbank-locations/reichsbank-locations.csv')
shp['centroid'] = shp.geometry.centroid

# Merge reichsbank info by ID and name, filling missing values with 0
shp = shp.merge(loc[['ID', 'Name', 'reichsbank']], on=['ID', 'Name'], how='left')
shp['reichsbank'] = shp['reichsbank'].fillna(0)

# For each geometry, compute minimum distance to any reichsbank centroid
rb_centroids = shp.loc[shp['reichsbank'] != 0, 'centroid']
shp['distance'] = shp['centroid'].apply(lambda c: rb_centroids.distance(c).min())

# Keep only required columns and update reichsbank from locations by ID
result = shp[['ID', 'reichsbank', 'distance']].drop_duplicates('ID')
result = result.drop(columns='reichsbank').merge(loc[['ID', 'reichsbank']], on='ID', how='left')

result.to_csv('reichsbank-distances.csv', index=False)