# Schulze's Method of Voting
This notebook implements Schulze's method of voting to combine a set of ranked lists into an overall ranked list.

In [1]:
import numpy as np
import pandas as pd

## Test Data
Test data from a [Google sheet](https://docs.google.com/spreadsheets/d/1NUoOme-YQHA8cfnbPjtHBt13bkZPQkQei3cPAQ611I8/edit?usp=sharing) which has all of its tabs published as CSV files. Some of these data have the names of voters in the first row, and others have the number of matching ballots, hence the two different methods for importing votes.

In [2]:
test1 = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vRGPvxWcqyo86JDfGhCn9laHlLHUrMEb8jAqdBFNN_uimR95VxE9JCPpBxr4WWaeLeAPuLHCzGDbQ8y/pub?gid=1591363425&single=true&output=csv'
test2 = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vRGPvxWcqyo86JDfGhCn9laHlLHUrMEb8jAqdBFNN_uimR95VxE9JCPpBxr4WWaeLeAPuLHCzGDbQ8y/pub?gid=541601874&single=true&output=csv'
test3 = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vRGPvxWcqyo86JDfGhCn9laHlLHUrMEb8jAqdBFNN_uimR95VxE9JCPpBxr4WWaeLeAPuLHCzGDbQ8y/pub?gid=2108916501&single=true&output=csv'
test4 = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vRGPvxWcqyo86JDfGhCn9laHlLHUrMEb8jAqdBFNN_uimR95VxE9JCPpBxr4WWaeLeAPuLHCzGDbQ8y/pub?gid=662509174&single=true&output=csv'
example1 = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vRGPvxWcqyo86JDfGhCn9laHlLHUrMEb8jAqdBFNN_uimR95VxE9JCPpBxr4WWaeLeAPuLHCzGDbQ8y/pub?gid=721130038&single=true&output=csv'
example2 = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vRGPvxWcqyo86JDfGhCn9laHlLHUrMEb8jAqdBFNN_uimR95VxE9JCPpBxr4WWaeLeAPuLHCzGDbQ8y/pub?gid=1181234950&single=true&output=csv'
example3 = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vRGPvxWcqyo86JDfGhCn9laHlLHUrMEb8jAqdBFNN_uimR95VxE9JCPpBxr4WWaeLeAPuLHCzGDbQ8y/pub?gid=1865810740&single=true&output=csv'
example4 = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vRGPvxWcqyo86JDfGhCn9laHlLHUrMEb8jAqdBFNN_uimR95VxE9JCPpBxr4WWaeLeAPuLHCzGDbQ8y/pub?gid=920600198&single=true&output=csv'
example6 = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vRGPvxWcqyo86JDfGhCn9laHlLHUrMEb8jAqdBFNN_uimR95VxE9JCPpBxr4WWaeLeAPuLHCzGDbQ8y/pub?gid=753260081&single=true&output=csv'
example7 = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vRGPvxWcqyo86JDfGhCn9laHlLHUrMEb8jAqdBFNN_uimR95VxE9JCPpBxr4WWaeLeAPuLHCzGDbQ8y/pub?gid=959087225&single=true&output=csv'

In [3]:
def import_votes(url, first_row='voters'):
    if first_row=='voters':
        votes = pd.read_csv(url)
        #votes.columns = [(f'B{n:02d}') for n in range(votes.shape[1])]
    elif first_row=='count':
        vote_counts = pd.read_csv(url, header=None)
        votes = np.tile(vote_counts.iloc[1:,0].values, [np.int64(vote_counts.iloc[0,0]),1])
        if vote_counts.shape[1]>1:
            for i in np.arange(1,vote_counts.shape[1]):
                votes = np.append(votes, np.tile(vote_counts.iloc[1:,i].values, [np.int64(vote_counts.iloc[0,i]),1]), axis=0)
        votes = pd.DataFrame(votes.T)
    else:
        raise ValueError('Pass voters or count as first_row.')
    return votes

In [4]:
votes = import_votes(test4, first_row='voters')
votes

Unnamed: 0,Tim,John,Cindy,Fabian
0,Jenny,Jenny,Jenny,Bill
1,Betty,Betty,Betty,Kim
2,Kim,Kim,Kim,Darlene
3,Darlene,Darlene,Sandy,Sandy


In [5]:
candidates = np.unique(votes.values.reshape(votes.shape[0]*votes.shape[1]))
candidates

array(['Betty', 'Bill', 'Darlene', 'Jenny', 'Kim', 'Sandy'], dtype=object)

In [6]:
len(candidates)

6

## Preference Matrix
The preference matrix is the number of ballots that have candidate A over candidate B.

In [7]:
def get_pref_count(votes, candidate_a, candidate_b):
    count = 0
    for i in range(votes.shape[1]):
        try:
            apos = votes.iloc[:,i][votes.iloc[:,i]==candidate_a].index.values[0]
        except:
            apos = None
        try:
            bpos = votes.iloc[:,i][votes.iloc[:,i]==candidate_b].index.values[0]
        except:
            bpos = None
            
        if apos == None:
            continue
        if bpos == None:
            if apos != None:
                count += 1
                continue
        if apos < bpos:
            count +=1
    return count

In [8]:
def get_pref_matrix(votes):
    candidates = np.unique(votes.values.reshape(votes.shape[0]*votes.shape[1]))
    pref_vals = np.zeros([len(candidates), len(candidates)], dtype=np.int64)
    i = 0
    j = 0
    for candidate_a in candidates:
        for candidate_b in candidates:
            pref_vals[i,j] = get_pref_count(votes, candidate_a, candidate_b)
            j += 1
        i += 1
        j = 0
    return pd.DataFrame(pref_vals, columns=candidates, index=pd.Index(candidates))

In [9]:
pref_matrix = get_pref_matrix(votes)
pref_matrix

Unnamed: 0,Betty,Bill,Darlene,Jenny,Kim,Sandy
Betty,0,3,3,0,3,3
Bill,1,0,1,1,1,1
Darlene,1,2,0,1,0,3
Jenny,3,3,3,0,3,3
Kim,1,3,4,1,0,4
Sandy,1,1,1,1,0,0


## Path Strengths

In [10]:
def get_path_strengths(pref_matrix):
    pref_vals = pref_matrix.values
    candidates = pref_matrix.columns.values
    
    p = pref_vals * 0
    for i in range(len(candidates)):
        for j in range(len(candidates)):
            if pref_vals[i,j] > pref_vals[j,i]:
                p[i,j] = pref_vals[i,j]

    for i in range(len(candidates)):
        for j in range(len(candidates)):
            if i != j:
                for k in range(len(candidates)):
                    if i != j and j != k:
                        p[j,k] = np.max([p[j,k], min([p[j,i], p[i,k]])])
                        
    return pd.DataFrame(p, columns=candidates, index=pd.Index(candidates))

In [11]:
path_strengths = get_path_strengths(pref_matrix)
path_strengths

Unnamed: 0,Betty,Bill,Darlene,Jenny,Kim,Sandy
Betty,0,3,3,0,3,3
Bill,0,0,0,0,0,0
Darlene,0,2,0,0,0,3
Jenny,3,3,3,0,3,3
Kim,0,3,4,0,0,4
Sandy,0,0,0,0,0,0


## Schulze Winner Matrix
The winner matrix indicates which candidates are preferred over the others, based on the path strengths.

In [12]:
def get_winner_matrix(path_strengths):
    p = path_strengths.values
    winner = p * 0
    for i in range(len(candidates)):
        for j in range(len(candidates)):
            if i != j:
                if p[i,j] > p[j,i]:
                    winner[i,j] = 1
    return pd.DataFrame(winner, columns=candidates, index=pd.Index(candidates))

In [13]:
winner_matrix = get_winner_matrix(path_strengths)
winner_matrix

Unnamed: 0,Betty,Bill,Darlene,Jenny,Kim,Sandy
Betty,0,1,1,0,1,1
Bill,0,0,0,0,0,0
Darlene,0,1,0,0,0,1
Jenny,1,1,1,0,1,1
Kim,0,1,1,0,0,1
Sandy,0,0,0,0,0,0


## Overall Schultze Ranking
In this example, using the Test4 data, Bill and Sandy are tied for last place.

In [14]:
final_ranking = pd.Series(np.sum(winner_matrix.values, axis=1), index=candidates).sort_values(ascending=False)
final_ranking

Jenny      5
Betty      4
Kim        3
Darlene    2
Bill       0
Sandy      0
dtype: int64