In [1]:
import numpy as np
import pandas as pd
import random

In [2]:
countries = {
    'Argentina': ['Bolivia', 'Brazil', 'Chile', 'Paraguay', 'Uruguay'],
    'Bolivia': ['Argentina', 'Brazil', 'Chile', 'Paraguay', 'Peru'],
    'Brazil': ['Argentina', 'Bolivia', 'Colombia', 'Guyana', 'Paraguay', 'Peru', 'Suriname', 'Uruguay', 'Venezuela', 'French Guiana'],
    'Chile': ['Argentina', 'Bolivia', 'Peru'],
    'Colombia': ['Brazil', 'Ecuador', 'Peru', 'Venezuela'],
    'Ecuador': ['Colombia', 'Peru'],
    'French Guiana': ['Brazil', 'Suriname'],
    'Guyana': ['Brazil', 'Suriname', 'Venezuela'],
    'Paraguay': ['Argentina', 'Bolivia', 'Brazil'],
    'Peru': ['Bolivia', 'Brazil', 'Chile', 'Colombia', 'Ecuador'],
    'Suriname': ['Brazil', 'Guyana', 'French Guiana'],
    'Uruguay': ['Argentina', 'Brazil'],
    'Venezuela': ['Brazil', 'Colombia', 'Guyana']
}


In [3]:
country_list = list(countries.keys())
country_matrix = np.zeros(shape=(len(country_list),len(country_list)))

In [4]:
# loop through for each country (13x)
for i, country in enumerate(country_list):
    # loop through each country again to check if any of them are neighbors
    for j, neighbor in enumerate(country_list):
        # if the country is in the list of neighbors in the dict, flag 1
        if neighbor in countries[country]:
            country_matrix[i][j] = 1

country_matrix

array([[0., 1., 1., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0.],
       [1., 0., 1., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0.],
       [1., 1., 0., 0., 1., 0., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 1., 0., 0., 0., 1., 0., 0., 1.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1.],
       [1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0.],
       [1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0.]])

In [5]:
# double check, returns True if transposed = regular
# argentina first column going down == argentina first row across 
np.all(country_matrix.T == country_matrix)

True

In [6]:
# loop through array
# value/sum of row
for i in range(len(country_list)):
    country_matrix[i,:] /= country_matrix[i,:].sum()

In [7]:
# visualize it
# check uruguay only borders Argentina and Brazil
# row should total 1 and argentina/brazil should be .5/.5
df = pd.DataFrame(country_matrix)
df.columns = country_list
df.index = country_list
df.style.format(precision=2).background_gradient(cmap="Purples", axis=None)


Unnamed: 0,Argentina,Bolivia,Brazil,Chile,Colombia,Ecuador,French Guiana,Guyana,Paraguay,Peru,Suriname,Uruguay,Venezuela
Argentina,0.0,0.2,0.2,0.2,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.2,0.0
Bolivia,0.2,0.0,0.2,0.2,0.0,0.0,0.0,0.0,0.2,0.2,0.0,0.0,0.0
Brazil,0.1,0.1,0.0,0.0,0.1,0.0,0.1,0.1,0.1,0.1,0.1,0.1,0.1
Chile,0.33,0.33,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.33,0.0,0.0,0.0
Colombia,0.0,0.0,0.25,0.0,0.0,0.25,0.0,0.0,0.0,0.25,0.0,0.0,0.25
Ecuador,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0
French Guiana,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0
Guyana,0.0,0.0,0.33,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.33,0.0,0.33
Paraguay,0.33,0.33,0.33,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Peru,0.0,0.2,0.2,0.2,0.2,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
# what are the probabilities for the spy being in each country
# given we start in Chile (get Chiles neighbors)
df.loc['Chile']

Argentina        0.333333
Bolivia          0.333333
Brazil           0.000000
Chile            0.000000
Colombia         0.000000
Ecuador          0.000000
French Guiana    0.000000
Guyana           0.000000
Paraguay         0.000000
Peru             0.333333
Suriname         0.000000
Uruguay          0.000000
Venezuela        0.000000
Name: Chile, dtype: float64

In [9]:
def get_probs(days):
    # each successive day is day 1 prob * probability ^ day
    probs = np.dot(df.loc['Chile'],np.linalg.matrix_power(country_matrix, days))
    return probs

In [10]:
biglist = []
for i in range(0,1000):
    smalldict = {}
    row = get_probs(i)
    smalldict['day'] = i+1
    for j,thing in enumerate(row):
        smalldict[country_list[j]] = thing
    biglist.append(smalldict)



In [11]:
newdf = pd.DataFrame(biglist)
newdf.set_index('day', inplace=True)
newdf.head()


Unnamed: 0_level_0,Argentina,Bolivia,Brazil,Chile,Colombia,Ecuador,French Guiana,Guyana,Paraguay,Peru,Suriname,Uruguay,Venezuela
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,0.333333,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0
2,0.066667,0.133333,0.2,0.2,0.066667,0.066667,0.0,0.0,0.133333,0.066667,0.0,0.066667,0.0
3,0.191111,0.157778,0.147778,0.053333,0.066667,0.03,0.02,0.02,0.06,0.163333,0.02,0.033333,0.036667
4,0.100778,0.123444,0.191333,0.102444,0.074667,0.049333,0.021444,0.033667,0.084556,0.095778,0.031444,0.053,0.038111
5,0.132656,0.120778,0.182481,0.064,0.075659,0.037822,0.029615,0.042319,0.063978,0.121304,0.041078,0.039289,0.049022


In [12]:
# check our math
# day 1 countries can be Argentina, Bolivia, Peru
# day 2 countries must touch any of the above three
temp = newdf.loc[2]
holder = temp[temp>0]
d2countries = countries['Argentina'] + countries['Bolivia'] + countries['Peru']
for country in list(holder.index):
    if country in d2countries:
        print(f'{country} checks out')
    else:
        print(f'{country} error')

Argentina checks out
Bolivia checks out
Brazil checks out
Chile checks out
Colombia checks out
Ecuador checks out
Paraguay checks out
Peru checks out
Uruguay checks out


In [13]:
newdf.tail(1)

Unnamed: 0_level_0,Argentina,Bolivia,Brazil,Chile,Colombia,Ecuador,French Guiana,Guyana,Paraguay,Peru,Suriname,Uruguay,Venezuela
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1000,0.1,0.1,0.2,0.06,0.08,0.04,0.04,0.06,0.06,0.1,0.06,0.04,0.06


In [14]:
class Simulator:
   def __init__(self, name, start_country = 'Chile'):
      self.start_country = start_country
      self.df = df
      self.start_probs = self.df.loc['Chile']
      self.choice = start_country
      self.probs = self.df.loc[self.choice]
   
   def simulate(self, days):
      for i in range(1, days):
         available_countries = list(self.df.loc[self.choice][self.df.loc[self.choice]>0].index)
         #print(available_countries)
         self.probs = self.df.loc[self.choice]
         self.choice = np.random.choice(available_countries, p=list(self.probs[self.probs>0].values))
        # print(f'choice = {self.choice}')
      self.final_country = self.choice

In [15]:
simus = [Simulator(i) for i in range(10000)]
for simu in simus: 
    simu.simulate(30)

In [16]:
bigdict = {}
for simu in simus:
    bigdict[simu.final_country] = bigdict.get(simu.final_country,0) + 1
bigdict

{'French Guiana': 413,
 'Bolivia': 1010,
 'Brazil': 1955,
 'Peru': 973,
 'Suriname': 630,
 'Venezuela': 626,
 'Argentina': 1039,
 'Paraguay': 647,
 'Guyana': 566,
 'Colombia': 758,
 'Chile': 580,
 'Uruguay': 402,
 'Ecuador': 401}

In [17]:
percent_dict = {}
for choice in bigdict.keys():
    percent_dict[choice] = bigdict[choice]/sum(bigdict.values())
percent_dict

{'French Guiana': 0.0413,
 'Bolivia': 0.101,
 'Brazil': 0.1955,
 'Peru': 0.0973,
 'Suriname': 0.063,
 'Venezuela': 0.0626,
 'Argentina': 0.1039,
 'Paraguay': 0.0647,
 'Guyana': 0.0566,
 'Colombia': 0.0758,
 'Chile': 0.058,
 'Uruguay': 0.0402,
 'Ecuador': 0.0401}