<a href="https://colab.research.google.com/github/neil-menghani/cricket_wc/blob/master/nlm2138_cricket.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Sports Economics Project
### Neil Menghani

In [0]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
import scipy.stats as stat

from datetime import date
import random
import operator

KNN = False
LOG = True
RF = True
PLAYERS_ONLY = False
CHEMISTRY = False
INCLUDE_PLAYERS = True
pre_1996 = True

  from pandas.core import datetools


## Team Data Cleaning and Splitting

In [0]:
# read data files
matchups = pd.read_csv('matchups.csv')
temp = pd.read_csv('temp.csv')
historical = pd.read_csv('historical.csv')
players = pd.read_csv('players.csv')

matchups_copy = matchups.copy()

In [0]:
# print matchups_copy for debugging

X = matchups_copy.drop(['result'], 1)
y = matchups_copy['result']
    
print X.shape
print len(y)

print X[0:10]
print y[0:10]

(439, 16)
439
      nation_a     nation_b  day month  year location  round  temp  home  \
0      england        india    7  june  1975  england      0   NaN   NaN   
1  new_zealand  east_africa    7  june  1975  england      0   NaN   NaN   
2      england  new_zealand   11  june  1975  england      0   NaN   NaN   
3      england  east_africa   14  june  1975  england      0   NaN   NaN   
4    australia     pakistan    7  june  1975  england      0   NaN   NaN   
5    australia    sri_lanka   11  june  1975  england      0   NaN   NaN   
6     pakistan    sri_lanka   14  june  1975  england      0   NaN   NaN   
7    australia  west_indies   21  june  1975  england      2   NaN   NaN   
8     pakistan    australia   14  june  1979  england      0   NaN   NaN   
9      england     pakistan   16  june  1979  england      0   NaN   NaN   

   appear  semi_finalist  runner_up  winner  prev  coach_exp  rest  
0     NaN            NaN        NaN     NaN   NaN        NaN   NaN  
1     NaN  

In [0]:
# lists to assist with operations
metrics = ['appear', 'semi_finalist', 'runner_up', 'winner', 'prev', 'coach_exp']
months = ['january', 'february', 'march', 'april', 'may', 'june', 'july',
          'august', 'september', 'october', 'november', 'december']

# perform operations to fill in data
for i in xrange(len(X)):
    
    # print i # for debugging
    
    nation_a = str(X['nation_a'][i])
    nation_b = str(X['nation_b'][i])
    
    # weather
    temps_host = temp[temp['nation'] == X['location'][i]]
    temps_a = temp[temp['nation'] == nation_a]
    temps_b = temp[temp['nation'] == nation_b]
    temp_host = temps_host[X['month'][i]]
    temp_a = abs(float(temps_a[X['month'][i]]) - float(temp_host))
    temp_b = abs(float(temps_b[X['month'][i]]) - float(temp_host))
    X.at[i, 'temp'] = temp_b - temp_a
    
    # home-field advantage
    if nation_a == X['location'][i]:
        X.at[i, 'home'] = -1
    elif nation_b == X['location'][i]:
        X.at[i, 'home'] = 1
    else:
        X.at[i, 'home'] = 0
        
    # historical success and coaching
    h_a = historical[(historical['year'] == X['year'][i]) & 
                     (historical['nation'] == nation_a)]
    h_b = historical[(historical['year'] == X['year'][i]) & 
                     (historical['nation'] == nation_b)]
    for m in metrics:
        X.at[i, m] = int(h_b[m]) - int(h_a[m])
    
    # rest
    matchday = date(X['year'][i], months.index(X['month'][i]) + 1, X['day'][i])
    try:
        found_a = False
        found_b = False
        j = i-1
        while not (found_a and found_b):
            if X['year'][j] != X['year'][i]:
                raise KeyError
            
            if not found_a and (X['nation_a'][j] == nation_a or X['nation_b'][j] == nation_a):
                prev_a = date(X['year'][j], months.index(X['month'][j]) + 1, X['day'][j])
                found_a = True
            
            if not found_b and (X['nation_a'][j] == nation_b or X['nation_b'][j] == nation_b):
                prev_b = date(X['year'][j], months.index(X['month'][j]) + 1, X['day'][j])
                found_b = True
            
            j -= 1
        X.at[i, 'rest'] = (matchday - prev_b).days - (matchday - prev_a).days
    except KeyError: # if first match for at least one team that year
        X.at[i, 'rest'] = 0
        
X['home'] = X['home'].astype(int)
for m in metrics:
    X[m] = X[m].astype(int)
X['rest'] = X['rest'].astype(int)

print X

         nation_a      nation_b  day    month  year location  round  temp  \
0         england         india    7     june  1975  england      0  31.8   
1     new_zealand   east_africa    7     june  1975  england      0 -10.3   
2         england   new_zealand   11     june  1975  england      0  12.4   
3         england   east_africa   14     june  1975  england      0   2.1   
4       australia      pakistan    7     june  1975  england      0   9.0   
5       australia     sri_lanka   11     june  1975  england      0   1.0   
6        pakistan     sri_lanka   14     june  1975  england      0  -8.0   
7       australia   west_indies   21     june  1975  england      2  -0.2   
8        pakistan     australia   14     june  1979  england      0  -9.0   
9         england      pakistan   16     june  1979  england      0  27.2   
10      sri_lanka         india   18     june  1979  england      0  12.6   
11    west_indies   new_zealand   16     june  1979  england      0  -5.6   

## Player Data Cleaning

In [0]:
add_cols = ['c_odi','wk_odi','squad_odi','c_wc','wk_wc','squad_wc',
            'bat_avg_11','bat_avg_5','centuries',
            'bowl_avg_11','bowl_avg_5','fives']
chemistry = ['chem_2','chem_3','chem_4','chem_5','chem_6','chem_7','chem_8',
             'chem_9','chem_10','chem_11']

final_X = X.copy()
if INCLUDE_PLAYERS:
    for c in add_cols:
        final_X[c] = np.nan
    for c in chemistry:
        final_X[c] = np.nan
    
print list(final_X)

['nation_a', 'nation_b', 'day', 'month', 'year', 'location', 'round', 'temp', 'home', 'appear', 'semi_finalist', 'runner_up', 'winner', 'prev', 'coach_exp', 'rest', 'c_odi', 'wk_odi', 'squad_odi', 'c_wc', 'wk_wc', 'squad_wc', 'bat_avg_11', 'bat_avg_5', 'centuries', 'bowl_avg_11', 'bowl_avg_5', 'fives', 'chem_2', 'chem_3', 'chem_4', 'chem_5', 'chem_6', 'chem_7', 'chem_8', 'chem_9', 'chem_10', 'chem_11']


In [0]:
if INCLUDE_PLAYERS:    
    squad = historical[['year','nation']]
    for c in add_cols:
        squad[c] = np.nan
    for c in chemistry:
        squad[c] = 0


    for i in xrange(len(squad)):
        
        # number of wcs and odis played
        sq_players = players[(players['year'] == squad['year'][i]) &
                         (players['nation'] == squad['nation'][i])]
        sq_players['role'].fillna('', inplace=True)
        sq_players['local_team'].fillna('', inplace=True)
        
        captains = sq_players[sq_players['role'].str.contains('c')]
        wicketkeepers = sq_players[sq_players['role'].str.contains('wk')]
        
        cap_ids = [(row['player_id'], row['year']) for captain,row in captains.iterrows()]
        wk_ids = [(row['player_id'], row['year']) for wicket,row in wicketkeepers.iterrows()]
        sq_ids = [(row['player_id'], row['year']) for splay,row in sq_players.iterrows()]
        
        c_list = []
        wk_list = []
        sq_list = []
        for cid in cap_ids:
            cap_wc = players[(players['player_id'] == cid[0]) &
                             (players['year'] < cid[1])]
            c_list.append(len(cap_wc))
        for wid in wk_ids:
            wk_wc = players[(players['player_id'] == wid[0]) &
                         (players['year'] < wid[1])]
            wk_list.append(len(wk_wc))
        for sid in sq_ids:
            sq_wc = players[(players['player_id'] == sid[0]) &
                         (players['year'] < sid[1])]
            sq_list.append(len(wk_wc))
        
        squad.at[i, 'c_wc'] = np.mean(c_list)
        squad.at[i, 'wk_wc'] = np.mean(wk_list)
        squad.at[i, 'squad_wc'] = np.sum(sq_list) / len(sq_players)
        
        squad.at[i, 'c_odi'] = np.mean(list(captains['odis']))
        squad.at[i, 'wk_odi'] = np.mean(list(wicketkeepers['odis']))
        squad.at[i, 'squad_odi'] = np.sum(list(sq_players['odis'])) / len(sq_players)
        
        # chemistry
        for j,r in sq_players.iterrows():
            player_teams = str(r['local_team']).split(',')
            chem_vals = []
            for loc_team in player_teams:
                if (loc_team != ''):
                    chem_ind_val = len(sq_players[sq_players['local_team'].str.contains(loc_team)])
                    chem_vals.append(chem_ind_val)
            #print player_teams
            if len(chem_vals) > 0:
                chem_result = np.max(chem_vals) - 1
            else:
                chem_result = 0
                
            try:
                for k in xrange(chem_result):
                    squad.at[i, chemistry[k]] += 1
            except IndexError:
                pass
            
        # stats
        bat_11 = sq_players.nlargest(11, 'bat_avg')['bat_avg']
        bat_5 = sq_players.nlargest(5, 'bat_avg')['bat_avg']
        bowl_11 = sq_players.nsmallest(11, 'bowl_avg')['bowl_avg']
        bowl_5 = sq_players.nsmallest(5, 'bowl_avg')['bowl_avg']
        
        squad.at[i, 'bat_avg_11'] = np.mean(bat_11)
        squad.at[i, 'bat_avg_5'] = np.mean(bat_5)
        squad.at[i, 'bowl_avg_11'] = np.mean(bowl_11)
        squad.at[i, 'bowl_avg_5'] = np.mean(bowl_5)
        
        squad.at[i, 'centuries'] = np.sum(sq_players['centuries'])
        squad.at[i, 'fives'] = np.sum(sq_players['fives'])
        
    squad.fillna(0, inplace=True)
        
    print squad [0:10]
    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


   year       nation  c_odi  wk_odi  squad_odi  c_wc  wk_wc  squad_wc  \
0  1975    australia    7.0     7.0   0.000000   0.0    0.0       0.0   
1  1975  east_africa    0.0     0.0   0.000000   0.0    0.0       0.0   
2  1975      england    7.0     5.5   6.846154   0.0    0.0       0.0   
3  1975        india    1.0     0.0   0.000000   0.0    0.0       0.0   
4  1975  new_zealand    3.0     5.0   0.000000   0.0    0.0       0.0   
5  1975     pakistan    3.0     3.0   0.000000   0.0    0.0       0.0   
6  1975    sri_lanka    0.0     0.0   0.000000   0.0    0.0       0.0   
7  1975  west_indies    2.0     1.0   0.000000   0.0    0.0       0.0   
8  1979    australia    4.0     0.0   0.000000   0.0    0.0       0.0   
9  1979       canada    0.0     0.0   0.000000   0.0    0.0       0.0   

   bat_avg_11  bat_avg_5   ...     chem_2  chem_3  chem_4  chem_5  chem_6  \
0   25.376667     36.678   ...         11      10       5       5       0   
1    0.000000      0.000   ...          0 

In [0]:
player_columns = add_cols + chemistry
if INCLUDE_PLAYERS:
    print list(final_X)
    for i in xrange(len(final_X)):
        for c in player_columns:
            a_row = squad[(squad['nation'] == final_X['nation_a'][i]) &
                          (squad['year'] == final_X['year'][i])]
            a_value = float(a_row[c])
            b_row = squad[(squad['nation'] == final_X['nation_b'][i]) &
                          (squad['year'] == final_X['year'][i])]
            b_value = float(b_row[c])
            if c == 'bowl_avg_11' or c == 'bowl_avg_5':
                value_to_insert = a_value - b_value
            else:
                value_to_insert = b_value - a_value
            final_X.at[i, c] = value_to_insert
    if not CHEMISTRY:
        chemistry = ['chem_3','chem_4','chem_5','chem_6',
                     'chem_7','chem_8','chem_9','chem_10','chem_11']
        final_X = final_X.drop(chemistry, 1)
final_X.to_csv('final_data')

['nation_a', 'nation_b', 'day', 'month', 'year', 'location', 'round', 'temp', 'home', 'appear', 'semi_finalist', 'runner_up', 'winner', 'prev', 'coach_exp', 'rest', 'c_odi', 'wk_odi', 'squad_odi', 'c_wc', 'wk_wc', 'squad_wc', 'bat_avg_11', 'bat_avg_5', 'centuries', 'bowl_avg_11', 'bowl_avg_5', 'fives', 'chem_2', 'chem_3', 'chem_4', 'chem_5', 'chem_6', 'chem_7', 'chem_8', 'chem_9', 'chem_10', 'chem_11']


In [0]:
#print final_X.to_string()
#print y
if PLAYERS_ONLY:
    final_X = final_X[['nation_a', 'nation_b', 'day', 'month', 'year', 'location', 
                       'c_odi', 'wk_odi', 'squad_odi', 'bat_avg_11', 'bat_avg_5', 
                       'centuries', 'bowl_avg_11', 'bowl_avg_5', 'fives']]

## Train/Validation/Test Split

In [0]:
# separate 1996-2011 rows from 2015 test set and 2019 rows to predict on

# whether to include pre-1996 WCs
if pre_1996:
    matchups_X = final_X[final_X['year'] < 2015]
else:
    matchups_X = final_X[(final_X['year'] > 1992) & (final_X['year'] < 2015)]
matchups_y = [float(i) for i in y[0:len(matchups_X)]]

test_X = final_X[final_X['year'] == 2015]
test_y = [float(i) for i in y[len(matchups_X):len(matchups_X) + len(test_X)]] 

predict_X = final_X[final_X['year'] == 2019] # no y values in 2019

print matchups_X.shape
print len(matchups_y)
print test_X.shape
print len(test_y)
print predict_X.shape

(346, 29)
346
(48, 29)
48
(45, 29)


In [0]:
# shuffle order of rows to alleviate bias between years
random.seed(716)
p = np.random.permutation(len(matchups_X))
data_X = matchups_X.iloc[p].reset_index(drop=True)
data_y = [matchups_y[i] for i in p]

print data_X[['nation_a', 'nation_b', 'day', 'month', 'year']][0:10]
print data_y[0:10]

      nation_a     nation_b  day     month  year
0    sri_lanka      england   30   october  1987
1     pakistan     zimbabwe   27  february  1992
2    sri_lanka      bermuda   15     march  2007
3   bangladesh  new_zealand    2     april  2007
4    australia     zimbabwe    9      june  1999
5      ireland    sri_lanka   18     april  2007
6      england    australia    8     april  2007
7  west_indies  new_zealand   16      june  1979
8    sri_lanka    australia   16     april  2007
9      ireland    australia   13     april  2007
[1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0]


In [0]:
# split train and val (test set is 2015 WC)

val_split = 0.8
val_len = int(0.8 * len(matchups_X))

train_X = data_X[0:val_len].drop(['nation_a', 'nation_b', 'day', 
                                   'month', 'year', 'location'], 1)
train_y = [int(v) for v in data_y[0:val_len]]
val_X = data_X[val_len:].drop(['nation_a', 'nation_b', 'day', 
                                 'month', 'year', 'location'], 1)
val_y = [int(v) for v in data_y[val_len:]]


# prepare test and prediction sets

test_matchups = test_X[['nation_a', 'nation_b', 'day', 
                        'month', 'year', 'location']]
test_X = test_X.drop(['nation_a', 'nation_b', 'day', 
                      'month', 'year', 'location'], 1)
test_y = [int(v) for v in test_y]
predict_matchups = predict_X[['nation_a', 'nation_b', 'day', 
                        'month', 'year', 'location']]
predict_X = predict_X.drop(['nation_a', 'nation_b', 'day', 
                      'month', 'year', 'location'], 1)

print len(train_X), len(train_y), len(val_X), len(val_y)

276 276 70 70


In [0]:
"""
# uncomment to output cleaned data
train_y_df = pd.Series(train_y)
train_X_df = train_X.reset_index(drop=True)
train_X_df['result'] = train_y_df
train_X_df.to_csv('train_X.csv')

val_y_df = pd.Series(val_y)
val_X_df = val_X.reset_index(drop=True)
val_X_df['result'] = val_y_df
val_X_df.to_csv('val_X.csv')

test_y_df = pd.Series(test_y)
test_X_df = test_X.reset_index(drop=True)
test_X_df['result'] = test_y_df
test_X_df.to_csv('test_X.csv')
"""

"\n# uncomment to output cleaned data\ntrain_y_df = pd.Series(train_y)\ntrain_X_df = train_X.reset_index(drop=True)\ntrain_X_df['result'] = train_y_df\ntrain_X_df.to_csv('train_X.csv')\n\nval_y_df = pd.Series(val_y)\nval_X_df = val_X.reset_index(drop=True)\nval_X_df['result'] = val_y_df\nval_X_df.to_csv('val_X.csv')\n\ntest_y_df = pd.Series(test_y)\ntest_X_df = test_X.reset_index(drop=True)\ntest_X_df['result'] = test_y_df\ntest_X_df.to_csv('test_X.csv')\n"

## Building the Model

In [0]:
# function to evaluate accuracy of predictions
def check_acc(a, b):
    correct = 0.0
    for i in xrange(len(a)):
        if a[i] == b[i]:
            correct += 1
    return correct / len(a)

In [0]:
if KNN == True:
    # K Nearest Neighbors

    neigh = KNeighborsClassifier(n_neighbors=3)
    neigh.fit(train_X, train_y)
    train_pred_y = neigh.predict(train_X)
    val_pred_y = neigh.predict(val_X)

    print check_acc(train_pred_y, train_y)
    print check_acc(val_pred_y, val_y)


In [0]:
if LOG == True:
    # Logistic Regression
    
    from sklearn import linear_model
    import numpy as np
    import scipy.stats as stat

    clf = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial')
    clf.fit(train_X, train_y)
    train_pred_y = clf.predict(train_X)
    val_pred_y = clf.predict(val_X)
    val_pred_prob = clf.predict_proba(val_X)

    print check_acc(train_pred_y, train_y)
    print check_acc(val_pred_y, val_y)

    print data_X[['nation_a', 'nation_b', 'day', 'month', 'year']][val_len:][0:10]
    print clf.classes_
    print val_pred_prob[0:10]
    print val_pred_y[0:10]
    print val_y[0:10]


0.804347826087
0.628571428571
         nation_a     nation_b  day     month  year
276  south_africa     pakistan    8     march  1992
277  south_africa  new_zealand   16  february  2003
278     australia     zimbabwe   16      june  1983
279     australia      england    8  november  1987
280      pakistan      england   13      june  1983
281   netherlands      namibia    3     march  2003
282     australia        india    4      june  1999
283      pakistan    australia   23       may  1999
284   new_zealand     pakistan   18     march  1992
285         india     zimbabwe   18      june  1983
[0 1]
[[0.13421616 0.86578384]
 [0.86686656 0.13313344]
 [0.87026583 0.12973417]
 [0.6866038  0.3133962 ]
 [0.57659315 0.42340685]
 [0.44068505 0.55931495]
 [0.25965391 0.74034609]
 [0.8359446  0.1640554 ]
 [0.38650383 0.61349617]
 [0.91410184 0.08589816]]
[1 0 0 0 0 1 1 0 1 0]
[0, 1, 0, 0, 0, 0, 0, 0, 1, 0]




In [0]:
if RF == True:
    # Random Forest

    rf = RandomForestClassifier(n_estimators=20, max_depth=10).fit(train_X, train_y)
    importances = rf.feature_importances_
    train_pred_y = rf.predict(train_X)
    val_pred_y = rf.predict(val_X)
    val_pred_prob = rf.predict_proba(val_X)

    print check_acc(train_pred_y, train_y)
    print check_acc(val_pred_y, val_y)

    print data_X[['nation_a', 'nation_b', 'day', 'month', 'year']][val_len:][0:10]
    print rf.classes_
    print val_pred_prob[0:10]
    print val_pred_y[0:10]
    print val_y[0:10]
    
    print list(train_X)
    print importances

0.996376811594
0.685714285714
         nation_a     nation_b  day     month  year
276  south_africa     pakistan    8     march  1992
277  south_africa  new_zealand   16  february  2003
278     australia     zimbabwe   16      june  1983
279     australia      england    8  november  1987
280      pakistan      england   13      june  1983
281   netherlands      namibia    3     march  2003
282     australia        india    4      june  1999
283      pakistan    australia   23       may  1999
284   new_zealand     pakistan   18     march  1992
285         india     zimbabwe   18      june  1983
[0 1]
[[0.05084746 0.94915254]
 [0.8        0.2       ]
 [0.99166667 0.00833333]
 [0.73129464 0.26870536]
 [0.98441964 0.01558036]
 [1.         0.        ]
 [0.59553571 0.40446429]
 [0.3        0.7       ]
 [0.15       0.85      ]
 [0.95       0.05      ]]
[1 0 0 0 0 0 0 1 1 0]
[0, 1, 0, 0, 0, 0, 0, 0, 1, 0]
['round', 'temp', 'home', 'appear', 'semi_finalist', 'runner_up', 'winner', 'prev', 'coa

## Demo

In [0]:
test_pred_y = rf.predict(test_X)
test_pred_prob = rf.predict_proba(test_X)
predict_pred_prob = rf.predict_proba(predict_X)

print check_acc(test_pred_y, test_y)

print test_matchups[0:3], test_pred_prob[0:3]
print predict_matchups[0:3], predict_pred_prob[0:3]

0.458333333333
        nation_a     nation_b  day     month  year     location
187  new_zealand    sri_lanka   14  february  2015  new_zealand
188    australia      england   14  february  2015    australia
189   bangladesh  afghanistan   18  february  2015    australia [[0.4        0.6       ]
 [0.89357143 0.10642857]
 [0.57258523 0.42741477]]
        nation_a      nation_b  day month  year location
394      england  south_africa   30   may  2019  england
395     pakistan   west_indies   31   may  2019  england
396  new_zealand     sri_lanka    1  june  2019  england [[0.69553571 0.30446429]
 [0.95       0.05      ]
 [0.8102381  0.1897619 ]]


In [0]:
# perform simulations for 2015 and 2019
random.seed(1997)

teams_15 = ['afghanistan', 'australia', 'bangladesh', 'england', 
            'south_africa', 'india', 'ireland', 'new_zealand',
            'pakistan', 'scotland', 'sri_lanka','uae',
            'west_indies','zimbabwe']
standings_15 = {}
records_15 = {}
teams_19 = ['afghanistan', 'australia', 'bangladesh', 'england', 
            'south_africa', 'india', 'new_zealand',
            'pakistan', 'sri_lanka', 'west_indies']
standings_19 = {}
records_19 = {}

for i15 in teams_15:
    standings_15[i15] = 0
    records_15[i15] = [0, 0, 0]
for i19 in teams_19:
    standings_19[i19] = 0
    records_19[i19] = [0, 0, 0]
    
test_matchups = test_matchups.reset_index(drop=True)
predict_matchups = predict_matchups.reset_index(drop=True)

for sim in xrange(10000):
    stand = standings_15.copy()
    for i in xrange(len(test_matchups)):
        gen = random.uniform(0,1)
        if gen > test_pred_prob[i][0]:
            # b won match
            stand[test_matchups['nation_b'][i]] += 3
        else:
            # a won match
            stand[test_matchups['nation_a'][i]] += 3
    sort_stand = sorted(stand.items(), key=operator.itemgetter(1))
    for j in [0, 1, 2]:
        records_15[sort_stand[-1][0]][j] += 1
    for k in (0, 1):
        records_15[sort_stand[-2][0]][k] += 1
    records_15[sort_stand[-3][0]][0] += 1
    records_15[sort_stand[-4][0]][0] += 1
    #print sort_stand
    
for sim in xrange(10000):
    stand = standings_19.copy()
    for i in xrange(len(predict_matchups)):
        gen = random.uniform(0,1)
        if gen > predict_pred_prob[i][0]:
            # b won match
            stand[predict_matchups['nation_b'][i]] += 3
        else:
            # a won match
            stand[predict_matchups['nation_a'][i]] += 3
    sort_stand = sorted(stand.items(), key=operator.itemgetter(1))
    for j in [0, 1, 2]:
        records_19[sort_stand[-1][0]][j] += 1
    for k in (0, 1):
        records_19[sort_stand[-2][0]][k] += 1
    records_19[sort_stand[-3][0]][0] += 1
    records_19[sort_stand[-4][0]][0] += 1
    #print sort_stand
    
print records_15
print records_19

{'afghanistan': [8, 0, 0], 'australia': [6473, 3533, 1831], 'england': [185, 26, 1], 'bangladesh': [467, 59, 16], 'pakistan': [4157, 1526, 557], 'south_africa': [5986, 2559, 1049], 'scotland': [24, 2, 0], 'new_zealand': [6447, 3808, 2212], 'india': [7488, 4733, 2722], 'zimbabwe': [354, 52, 6], 'ireland': [165, 25, 5], 'west_indies': [2223, 707, 242], 'sri_lanka': [5883, 2956, 1356], 'uae': [140, 14, 3]}
{'afghanistan': [191, 19, 4], 'australia': [2978, 606, 148], 'england': [8027, 4470, 1527], 'bangladesh': [1766, 351, 57], 'pakistan': [2855, 559, 74], 'south_africa': [4783, 1196, 187], 'new_zealand': [7587, 3373, 697], 'india': [9873, 9110, 7246], 'west_indies': [577, 70, 12], 'sri_lanka': [1363, 246, 48]}
