In [1]:
import pandas as pd

# War Data 1910-1920 (see wars_notes.pdf for notes)
wars = pd.read_csv("wars.csv")
wars = wars[wars.strtyr < 1920]
wars = wars[wars.endyear > 1910]
wars.drop(['mid3hiact', 'mid3hia', 'mid3hib', 'ongo2010', 'new', 'change', 'changetype_1', 'changetype_2'], axis=1)

# Alliances Data 1910-1920 (see alliances_notes.pdf for notes)
alliances = pd.read_csv("alliances.csv")
alliances = alliances[alliances.year > 1910]
alliances = alliances[alliances.year < 1920]
alliances = alliances.drop(['version4id', 'left_censor', 'right_censor', 'version'], axis=1)

# Trade Data 1910-1920 (see trade_notes.pdf for notes)
trade = pd.read_csv("trade.csv")
trade = trade[trade.year > 1910]
trade = trade[trade.year < 1920]
trade = trade.drop(['bel_lux_alt_flow1', 'bel_lux_alt_flow2', 'china_alt_flow1', 'china_alt_flow2', 'source1', 'source2', 'version'], axis=1)

# Capabilities Data 1910-1920 (see capabilities_notes.pdf and capabilities_columns.png for notes)
capabilities = pd.read_csv("capabilities.csv")
capabilities = capabilities[capabilities.year > 1910]
capabilities = capabilities[capabilities.year < 1920]
capabilities = capabilities.drop(['version'], axis=1)

In [2]:
wars.head()

Unnamed: 0,disno,dyindex,statea,namea,stateb,nameb,strtday,strtmnth,strtyr,year,...,duration,disno4,mid3hiact,mid3hia,mid3hib,ongo2010,new,change,changetype_1,changetype_2
4,3,3.001,300,AUH,345,YUG,2,5.0,1913,1913,...,177,-9,8,8,0,0,0,0,0,
5,3,3.001,345,YUG,300,AUH,2,5.0,1913,1913,...,177,-9,8,0,8,0,0,0,0,
66,21,21.001,300,AUH,345,YUG,21,11.0,1912,1912,...,13,-9,10,10,0,0,0,0,0,
67,21,21.001,345,YUG,300,AUH,21,11.0,1912,1912,...,13,-9,10,0,10,0,0,0,0,
68,21,21.002,300,AUH,365,RUS,21,11.0,1912,1912,...,13,-9,10,10,0,0,0,0,0,


In [3]:
alliances.head()

Unnamed: 0,ccode1,state_name1,ccode2,state_name2,dyad_st_day,dyad_st_month,dyad_st_year,dyad_end_day,dyad_end_month,dyad_end_year,defense,neutrality,nonaggression,entente,year
95,200,United Kingdom,235,Portugal,1,1,1816,,,2012.0,1,0,1.0,0.0,1911
96,200,United Kingdom,235,Portugal,1,1,1816,,,2012.0,1,0,1.0,0.0,1912
97,200,United Kingdom,235,Portugal,1,1,1816,,,2012.0,1,0,1.0,0.0,1913
98,200,United Kingdom,235,Portugal,1,1,1816,,,2012.0,1,0,1.0,0.0,1914
99,200,United Kingdom,235,Portugal,1,1,1816,,,2012.0,1,0,1.0,0.0,1915


In [4]:
trade.head()

Unnamed: 0,ccode1,ccode2,year,importer1,importer2,flow1,flow2,smoothflow1,smoothflow2,smoothtotrade,spike1,spike2,dip1,dip2,trdspike,tradedip
144,2,40,1911,United States of America,Cuba,61.0,60.009998,134.64,60.009998,121.00999,0,0,1,0,0,0
145,2,40,1912,United States of America,Cuba,137.89,65.220001,137.89,65.220001,203.11,0,0,0,0,0,0
146,2,40,1913,United States of America,Cuba,125.09,73.230003,125.09,73.230003,198.32001,0,0,0,0,0,0
147,2,40,1914,United States of America,Cuba,-9.0,-9.0,-9.0,-9.0,-9.0,0,0,0,0,0,0
148,2,40,1915,United States of America,Cuba,-9.0,-9.0,-9.0,-9.0,-9.0,0,0,0,0,0,0


In [5]:
capabilities.head()

Unnamed: 0,stateabb,ccode,year,milex,milper,irst,pec,tpop,upop,cinc
95,USA,2,1911,57157,145,24056,521004,93863.0,21039.0,0.216205
96,USA,2,1912,58992,153,31753,557076,95335.0,21804.0,0.224816
97,USA,2,1913,62825,155,31803,600107,97225.0,22596.0,0.219911
98,USA,2,1914,253205,166,23890,556778,99111.0,23340.0,0.206364
99,USA,2,1915,257648,174,32667,580731,100546.0,24008.0,0.222083


In [6]:
countries = ['AUH', 'UKG', 'FRN', 'GMY', 'ITA', 'RUS', 'TUR']

In [7]:
cincs = capabilities[capabilities.stateabb.isin(countries)]
cincs = cincs[cincs.year.isin([1912, 1913, 1914])][['stateabb', 'cinc']]
cincs = cincs.groupby('stateabb').mean()

# CINC is power of country over total power.
# Take CINC range of participants, then pad a bit so no one starts out extremely low or high.
def norm(cincs):
    cmin, cmax = min(cincs['cinc']) - .01, max(cincs['cinc']) + .01
    rng = cmax - cmin
    cincs['cinc'] = (cincs['cinc'] - cmin) / rng
    return cincs
    
list(norm(cincs)['cinc'].round(2))

[0.31, 0.44, 0.93, 0.18, 0.74, 0.07, 0.78]

In [8]:
codes = {e[1][0]: e[1][1] for e in capabilities.iterrows()}
codes

{'USA': 2,
 'CUB': 40,
 'HAI': 41,
 'DOM': 42,
 'MEX': 70,
 'GUA': 90,
 'HON': 91,
 'SAL': 92,
 'NIC': 93,
 'PAN': 95,
 'COL': 100,
 'VEN': 101,
 'ECU': 130,
 'PER': 135,
 'BRA': 140,
 'BOL': 145,
 'PAR': 150,
 'CHL': 155,
 'ARG': 160,
 'URU': 165,
 'UKG': 200,
 'NTH': 210,
 'BEL': 211,
 'FRN': 220,
 'SWZ': 225,
 'SPN': 230,
 'POR': 235,
 'GMY': 255,
 'POL': 290,
 'AUH': 300,
 'AUS': 305,
 'HUN': 310,
 'CZE': 315,
 'ITA': 325,
 'ALB': 339,
 'YUG': 345,
 'GRC': 350,
 'BUL': 355,
 'ROM': 360,
 'RUS': 365,
 'EST': 366,
 'LAT': 367,
 'LIT': 368,
 'FIN': 375,
 'SWD': 380,
 'NOR': 385,
 'DEN': 390,
 'ETH': 530,
 'MOR': 600,
 'IRN': 630,
 'TUR': 640,
 'AFG': 700,
 'CHN': 710,
 'JPN': 740,
 'THI': 800}

In [9]:
c_codes = [codes[c] for c in countries]
c_codes

[300, 200, 220, 255, 325, 365, 640]

In [10]:
defense_pacts = alliances[alliances.ccode1.isin(c_codes) & alliances.ccode2.isin(c_codes) &
          (alliances.dyad_st_year < 1914) & (alliances.defense == 1)][
    ['ccode1', 'state_name1', 'ccode2', 'state_name2', 'defense'] #, 'neutrality', 'entente']
].drop_duplicates()
# defense_pacts['total'] = defense_pacts['defense'] + defense_pacts['entente']
defense_pacts

Unnamed: 0,ccode1,state_name1,ccode2,state_name2,defense
5796,255,Germany,300,Austria-Hungary,1
5836,300,Austria-Hungary,255,Germany,1
5982,255,Germany,325,Italy,1
6053,300,Austria-Hungary,325,Italy,1
6087,325,Italy,255,Germany,1
6121,325,Italy,300,Austria-Hungary,1
6774,220,France,365,Russia,1
6799,365,Russia,220,France,1


In [11]:
index = {c : i for i, c in enumerate(c_codes)}
index

{300: 0, 200: 1, 220: 2, 255: 3, 325: 4, 365: 5, 640: 6}

In [12]:
ally_init = np.zeros((7, 7))

for r in defense_pacts.iterrows():
    ally_init[index[r[1][0]]][index[r[1][2]]] = ally_init[index[r[1][2]]][index[r[1][0]]] = r[1][-1]
    
ally_init

array([[0., 0., 0., 1., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 1., 0., 0.],
       [1., 0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.]])