In [1]:
import (
    "encoding/csv"
    "fmt"
    "os"
    "math"
    "sort"
    "strconv"
    "strings"
)

import (
    "gonum.org/v1/plot"
    "gonum.org/v1/plot/plotter"
    "gonum.org/v1/plot/plotutil"
    "gonum.org/v1/plot/vg"
    "gonum.org/v1/gonum/stat"
)

In [2]:
func ReadCSV(path string) ([][]string, error) {
    csvFile, err := os.Open(path)
    if err != nil {
        return [][]string{}, err
    }
    defer csvFile.Close()

    lines, err := csv.NewReader(csvFile).ReadAll()
    if err != nil {
        return [][]string{}, err
    }

    return lines, nil
}

func ParseInt(s string) int {
    i, err := strconv.Atoi(s)
    if len(s) > 0 && err != nil {
        fmt.Printf("Failed to convert string %s to an int\n", s)
    }
    
    return i
}

func ParseFloat(s string) float64 {
    i, err := strconv.ParseFloat(s, 64)
    if len(s) > 0 &&  err != nil {
        fmt.Printf("Failed to convert string %s to a float\n", s)
    }
    
    return i
}

In [3]:
type Station struct {
    Ward int
    Name string
    Voters int
    Votes int
    Turnout float64
}

csv, err := ReadCSV("./data/voter_stats_2018.csv")
var data = make([]Station, 0)

for i, line := range csv[1:] {
    if strings.HasSuffix(line[0], "Total") {
        continue
    }
    
    data = append(data, Station{
        Ward: ParseInt(line[0]),
        Name: line[2],
        Voters: ParseInt(line[13]),
        Votes: ParseInt(line[14]),
        Turnout: ParseFloat(line[14]) / ParseFloat(line[13]),
    })
}

var stations = make(map[int] int)
var voters = make(map[int] int)
var votes = make(map[int] int)

var turnouts = make(map[int] float64) 

for _, r := range data {
    stations[r.Ward] += 1
    
    voters[r.Ward] += r.Voters
    votes[r.Ward] += r.Votes
}

for k := range voters {
    turnouts[k] = float64(votes[k]) / float64(voters[k])
}

In [4]:
pts := make(plotter.XYs, len(stations))

for k, v := range turnouts {
    idx := k - 1
    
    pts[idx].X = float64(k)
    pts[idx].Y = v
}

p, err := plot.New()
if err != nil {
    panic(err)
}

// p.Title.Text = title
p.X.Label.Text = "Ward #"
p.Y.Label.Text = "Turnout"

// p.HideX()

err = plotutil.AddLinePoints(p, pts)
if err != nil {
    panic(err)
}

p.Y.Min = 0
p.Y.Max = p.Y.Max * 1.25

if err := p.Save(6*vg.Inch, 6*vg.Inch, "./visualizations/turnout_dist.png"); err != nil {
    panic(err)
}

In [5]:
sum := float64(0)

for i := 1; i <= len(turnouts); i++ {
    value := turnouts[i]
    
    fmt.Print(i)
    fmt.Print(" | ")
    fmt.Println(value)
    
    sum += value
}

fmt.Println()
fmt.Print("Average turnout for the 25 wards: ")
fmt.Println(sum / float64(len(turnouts)))

1 | 0.35234109338216063
2 | 0.4298099180911681
3 | 0.4213419218499177
4 | 0.46534157938011655
5 | 0.3560733881574577
6 | 0.370916280950883
7 | 0.3458363352545629
8 | 0.439675421738202
9 | 0.4013132579471966
10 | 0.3484955389657684
11 | 0.418022604374726
12 | 0.484542404675156
13 | 0.43137254901960786
14 | 0.4922455492943699
15 | 0.4851542231190545
16 | 0.39920698116580267
17 | 0.3908943374709417
18 | 0.38191374469004247
19 | 0.4843041283345719
20 | 0.4081161153542986
21 | 0.3527364035838612
22 | 0.40435233160621764
23 | 0.34052857120099406
24 | 0.3655339805825243
25 | 0.3968044403729051

Average turnout for the 25 wards: 0.40667492402250033


20 <nil>

In [6]:
type Ward struct {
    ID int
    EducationSampleSize int
    Education int
    IncomeSampleSize int
    Income int
}

csv, err := ReadCSV("./data/ward_profile_2016.csv")
var profileData = make([]Ward, 0)

for i, line := range csv[1:] {
    profileData = append(profileData, Ward{
        ID: ParseInt(strings.Replace(line[0], "Ward ", "", -1)),
        EducationSampleSize: ParseInt(line[1]),
        Education: ParseInt(line[2]),
        IncomeSampleSize: ParseInt(line[3]),
        Income: ParseInt(line[4]),
    })
}

In [7]:
var education = make(plotter.XYs, len(profileData))
var income = make(plotter.XYs, len(profileData))

for i, r := range profileData {
    education[i].X = turnouts[r.ID]
    income[i].X = turnouts[r.ID]
    
    education[i].Y = float64(r.Education) / float64(r.EducationSampleSize)
    income[i].Y = float64(r.Income) / float64(r.IncomeSampleSize)
}

plotData := []plotter.XYs{education, income}

plotYLabels := []string{"% of Pop. with Postsecondary or Higher Education", "% of Households with $80,000 or Higher Income"}
plotName := []string{"./visualizations/ward_profile_education.png", "./visualizations/ward_profile_income.png"}

for i, pts := range plotData {
    p, err := plot.New()
    if err != nil {
        panic(err)
    }

    p.X.Label.Text = "Voter Turnout"
    p.Y.Label.Text = plotYLabels[i]
    
    s, err := plotter.NewScatter(pts)
    if err != nil {
        panic(err)
    }
    
    p.Add(s)
    
    p.X.Min = 0
    p.X.Max = p.X.Max * 1.25
    p.Y.Min = 0
    p.Y.Max = 1
    
    if err := p.Save(6*vg.Inch, 6*vg.Inch, plotName[i]); err != nil {
        panic(err)
    }
}

In [8]:
weight := make([]float64, len(education))
educationValues := make([]float64, len(education))
incomeValues := make([]float64, len(education))
turnoutValues := make([]float64, len(education))

for i, _ := range education {
    weight[i] = 1
    
    educationValues[i] = education[i].Y
    incomeValues[i] = income[i].Y
    
    turnoutValues[i] = turnouts[i+1]
}

fmt.Println("Correlation b/w education and turnout:")
fmt.Println(stat.Correlation(turnoutValues, educationValues, weight))

fmt.Println("Correlation b/w income and turnout:")
fmt.Println(stat.Correlation(turnoutValues, incomeValues, weight))

Correlation b/w education and turnout:
0.5725954918266374
Correlation b/w income and turnout:
0.55276354295324


17 <nil>

In [9]:
p, err := plot.New()
if err != nil {
    panic(err)
}

var max = make(map[int] float64)
var min = make(map[int] float64)

w := vg.Points(5)
for ward, station := range stations {
    pts := make(plotter.Values, station)
    values := make([]float64, 0)

    i := 0
    for _, row := range data {
        if row.Ward == ward && !math.IsInf(row.Turnout, 0) {
            pts[i] = row.Turnout
            values = append(values, row.Turnout)
            
            i += 1
        }
    }
    
    b, err := plotter.NewBoxPlot(w, float64(ward - 1), pts)
    if err != nil {
        panic(err)
    }
    
    p.Add(b)
    
    sort.Slice(values, func(i, j int) bool {
        return values[i] < values[j]
    })

    max[ward] = stat.Quantile(0.91, stat.Empirical, values, nil)
    min[ward] = stat.Quantile(0.09, stat.Empirical, values, nil)
}

p.X.Label.Text = "Turnout per Voting Station"

if err := p.Save(6*vg.Inch, 6*vg.Inch, "./visualizations/station_turnout_dist.png"); err != nil {
    panic(err)
}

In [15]:
func ContainKeywords(s string) bool {
    keywords := [8]string{
        "retirement", "care", "nursing", "senior", "healthcare", "residence", "addiction", "mental health",
    }
    
    for _, v := range keywords {
        if strings.Contains(strings.ToLower(s), v) {
            return true
        }
    }
    
    return false
}

In [15]:
outliers := make([]Station, 0)

voterCount := 0
stationCount := 0

outlierStations := 0
outlierVoterCount := 0

for i, row := range data {
    if math.IsInf(row.Turnout, 0) {
        continue
    }
    
    voterCount += row.Voters
    stationCount += 1
    
    if row.Turnout >= max[row.Ward] {
        if ContainKeywords(station.Name) {
            outlierStations += 1
            outlierVoterCount += station.Voters
        }
    }
}

fmt.Println("Average Eligible Voter per Station:")
fmt.Println(float64(voterCount) / float64(stationCount))

fmt.Println("Average Eligible Voter per Station for Outliers:")
fmt.Println(float64(outlierVoterCount) / float64(outlierStations))

Average Eligible Voter per Station:
1106.100588235294
Average Eligible Voter per Station for Outliers:
94.11764705882354


18 <nil>