Skip to content
Permalink
Browse files
Initial import (taken from Francis's subversion repository)
  • Loading branch information
frabcus committed Aug 14, 2003
0 parents commit 78d0f62e7c754e6051df28cc9e63a77d58457c0d
Show file tree
Hide file tree
Showing 67 changed files with 6,696 additions and 0 deletions.

Large diffs are not rendered by default.

@@ -0,0 +1,31 @@
The Public Whip Source Code
---------------------------

Hello! Here's the source code used to generate the Public Whip website.
To see the end product go to http://www.publicwhip.org.uk. If you don't
know what this is all about, have a look at the FAQ there. The rest of
this file is if you are interested in the code - for example, if you
want to add your own analyses, fix bugs or whatever.

If you need any help, please email me francis@flourish.org.

LICENSE.html - Details of open source licensing terms, under the GNU GPL
todo.txt - Things I'm thinking of doing in the short term
ideas.txt - Zillions of ideas of things which could be done
errata.txt - Errors in Hansard that the software has found

Here's how it works: Perl code downloads data from the UK parliament
website, and stores it in a MySQL database. A combination of Perl and
Octave (an open source mathematics package, compatible with Matlab) code
perform various calculations on the data to form other database tables.
A website in PHP is provided to make it easy to look up information and
search the database.

At the moment I've only run this on Linux, but it should run on Windows.

scraper - Screen scrapes Hansard website to fill the database, some analysis
rawdata - Source data files not previously available on the Internet
cluster - MP cluster analysis using Multi-dimensional Scaling
website - Code for www.publicwhip.org.uk, PHP extracts data from database
build - Scripts I use to upload to www.publicwhip.org.uk

@@ -0,0 +1,19 @@
#! /bin/bash

# Uploads tables from local database to database on public web server sphinx
# Use rsync of a textual copy of the table to reduce amount of data
# resent

source /home/francis/.sphinxpass

USER=francis@sphinx.mythic-beasts.com

echo "Uploading new db..."
echo "drop table if exists pw_cache_rebels;" >pw_db.sql
mysqldump --add-drop-table tpw pw_cache_divinfo pw_cache_mpinfo pw_cache_whip pw_division pw_mp pw_vote pw_cache_partyinfo pw_cache_mpcoords >>pw_db.sql
rsync -v --progress -az -e "ssh" pw_db.sql $USER:./pw_db.sql
rm pw_db.sql

ssh $USER "cat pw_db.sql | mysql francis --password=$DBPASSWORD"


@@ -0,0 +1,11 @@
#! /bin/bash

USER=francis@sphinx.mythic-beasts.com

echo "Uploading web pages..."
rsync -v --progress --delete --cvs-exclude -az -e "ssh" website/ $USER:./public_html/publicwhip
ssh $USER "chmod 0755 /home/francis/public_html/publicwhip"
ssh $USER "chmod 0755 /home/francis/public_html/publicwhip/license"
ssh $USER "chmod 0644 /home/francis/public_html/publicwhip/license/*"
ssh $USER "chmod 0755 /home/francis/public_html/publicwhip/*.php"

@@ -0,0 +1,9 @@
#! /bin/bash

USER=francis@sphinx.mythic-beasts.com

echo "Uploading web pages..."
rsync -v --progress --delete --cvs-exclude -az -e "ssh" website/ $USER:./www.publicwhip.org.uk_html
ssh $USER "chmod 0755 /web/SERVERS/www.publicwhip.org.uk/docs"
ssh $USER "chmod 0755 /web/SERVERS/www.publicwhip.org.uk/docs/*.php"

@@ -0,0 +1,5 @@
DN.m
mpcoords.txt
mpscatt.jar
mpsee.png

@@ -0,0 +1,42 @@
# $Id: Makefile,v 1.1 2003/08/14 19:35:48 frabcus Exp $
# Used to run the clustering calculation, build the Java applet etc.

# The Public Whip, Copyright (C) 2003 Francis Irving and Julian Todd
# This is free software, and you are welcome to redistribute it under
# certain conditions. However, it comes with ABSOLUTELY NO WARRANTY.
# For details see the file LICENSE.html in the top level of the source.

all: mpscatt.jar mpcoords.txt mpsee.png db
cp mpsee.png ../website
cp mpscatt.jar ../website
.PHONY: all

# Make Java applet
mpscatt.jar: src/*.java
javac -d . src/*.java
jar cvf mpscatt.jar *.class
rm *.class

# Make distance matrix file
DN.m:
rm -f DN.m
./octavein.pl > DN.m
.PHONY: DN.m

# Make file of coordinates of MPs
mpcoords.txt: DN.m
octave --silent mds.m

# Copy into database table
db: mpcoords.txt
cat mpcoords.txt | ./mpcoords2db.pl > ../website/mpcoords-eigen.inc

# Test view the cluster
test: mpcoords.txt mpscatt.jar
java -classpath mpscatt.jar mpframe mpcoords.txt

# Static graphic of cluster
mpsee.png: mpcoords.txt mpscatt.jar
java -classpath mpscatt.jar mpframe mpcoords.txt mpsee.png


@@ -0,0 +1,31 @@
# $Id: mds.m,v 1.1 2003/08/14 19:35:48 frabcus Exp $
# Multidimensional scaling on matrix of distances between
# pairs of MPs, for some distance metric.
# Octave source file (should be compatible with Matlab)

# The Public Whip, Copyright (C) 2003 Francis Irving and Julian Todd
# This is free software, and you are welcome to redistribute it under
# certain conditions. However, it comes with ABSOLUTELY NO WARRANTY.
# For details see the file LICENSE.html in the top level of the source.

# read in the matrix of distances between pairs of MPs
source "DN.m";
s=size(D);
mps=s(1)

# perform the MDS decomposition
A=-0.5*D.*D;
H=eye(mps) - 1/mps; # idempotent H*H=H
B=H*A*H;

# this should be a diagonal decomposition because B is symmetric
[U, S]=schur(B,"u");

# output data to file
ff = fopen("mpcoords.txt", "w");
fprintf(ff, "%d %f %f %f\n", mps, S(1,1), S(2,2), S(3,3));
for i=1:mps
fprintf(ff, "%d %f %f %f \"%s\" \"%s\"\n", i, U(i,1),U(i,2),U(i,3),ns(i,:),ps(i,:));
endfor
fclose(ff)

@@ -0,0 +1,54 @@
#! /usr/bin/perl -w
use strict;

# $Id: mpcoords2db.pl,v 1.1 2003/08/14 19:35:48 frabcus Exp $
# Converts mpcoords.txt file into database table

# The Public Whip, Copyright (C) 2003 Francis Irving and Julian Todd
# This is free software, and you are welcome to redistribute it under
# certain conditions. However, it comes with ABSOLUTELY NO WARRANTY.
# For details see the file LICENSE.html in the top level of the source.

use POSIX qw(strftime);

require "../scraper/db.pm";
my $dbh = db::connect();

# Load number --> mp_id lookup tables
# (the mp_ids don't start at 1 and aren't sequential,
# but the ids which octave outputs are matrix row/column
# numbers, so do start at 1 and are sequential)
require "mpquery.pm";
my @mp_ixs = mpquery::get_mp_ixs($dbh);

db::query($dbh, "drop table if exists pw_cache_mpcoords");
db::query($dbh,
"create table pw_cache_mpcoords (
mp_id int not null,
x float not null,
y float not null,
z float not null,
unique(mp_id)
);");

$_ = <>;
my ($count, $eigx, $eigy, $eigz) = split(" ");
print <<END;
<?php
# Generated by mpcoords2db.pl
\$eigx = $eigx;
\$eigy = $eigy;
\$eigz = $eigz;
?>
END

while (<>)
{
my ($mp, $x, $y, $z) = split(" ");
my $mp_ix = $mp_ixs[$mp - 1];
# print "ix $mp id $mp_ix pos $x $y $z\n";
db::query($dbh, "insert into pw_cache_mpcoords (mp_id, x, y, z) values (?, ?, ?, ?)",
$mp_ix, $x, $y, $z);
}


@@ -0,0 +1,26 @@
# $Id: mpquery.pm,v 1.1 2003/08/14 19:35:48 frabcus Exp $
# This is included by octavein.pl and mpcoords2db.pl.
# It defines the set of MPs which we are going to analyse.

# The Public Whip, Copyright (C) 2003 Francis Irving and Julian Todd
# This is free software, and you are welcome to redistribute it under
# certain conditions. However, it comes with ABSOLUTELY NO WARRANTY.
# For details see the file LICENSE.html in the top level of the source.

package mpquery;
use strict;

sub get_mp_ixs()
{
my $dbh = shift;
my $sth = db::query($dbh, "select pw_mp.mp_id from pw_mp, pw_cache_mpinfo where
pw_mp.mp_id = pw_cache_mpinfo.mp_id and votes_attended > 0");
my @mp_ixs;
while (my @data = $sth->fetchrow_array())
{
push @mp_ixs, $data[0];
}
return @mp_ixs;
}

1;
@@ -0,0 +1,8 @@
#!/bin/bash

for X in $@
do
echo \"$X\"
./wordfreq.pl $X
done

@@ -0,0 +1,127 @@
#! /usr/bin/perl -w
use strict;

# $Id: octavein.pl,v 1.1 2003/08/14 19:35:48 frabcus Exp $
# Outputs a matrix of distances between pairs of MPs for
# use by the GNU Octave script mds.m to do clustering.

# The Public Whip, Copyright (C) 2003 Francis Irving and Julian Todd
# This is free software, and you are welcome to redistribute it under
# certain conditions. However, it comes with ABSOLUTELY NO WARRANTY.
# For details see the file LICENSE.html in the top level of the source.

use POSIX qw(strftime);
my $now_string = strftime "%a %b %e %H:%M:%S %Y", localtime;
print "# Autogenerated by octavein.pl from The Public Whip project on $now_string\n\n";

require "../scraper/db.pm";
my $dbh = db::connect();

# Count MPs (which have voted at least once)
require "mpquery.pm";
my @mp_ixs = mpquery::get_mp_ixs($dbh);

# Count divisions
my $sth = db::query($dbh, "select division_id from pw_division");
my @div_ixs;
while (my @data = $sth->fetchrow_array())
{
push @div_ixs, $data[0];
}

# Read all votes in, and make array of MPs and their vote in each division
$sth = db::query($dbh, "select division_id, mp_id, vote from pw_vote order by mp_id, division_id");
my @votematrix;
while (my @data = $sth->fetchrow_array())
{
my ($div_dat, $mp_dat, $vote) = @data;
my $votescore = undef;
$votescore = 1 if ($vote eq "aye");
$votescore = -1 if ($vote eq "noe");
die "Unexpected $vote voted" if (!defined $votescore);

$votematrix[$mp_dat][$div_dat] += $votescore;
}

# Create matrix of "distances" between MPs
my @metricD;
for my $mp_1 (@mp_ixs)
{
for my $mp_2 (@mp_ixs)
{
# For the pair of MPs, tot up which divisions they voted the same in
my $divs_both_at = 0;
my $divs_voted_same = 0;
for my $div_ix (@div_ixs)
{
my $vote1 = $votematrix[$mp_1][$div_ix];
my $vote2 = $votematrix[$mp_2][$div_ix];
$vote1 = 0 if (!defined $vote1);
$vote2 = 0 if (!defined $vote2);
if ($vote1 != 0 and $vote2 != 0)
{
$divs_both_at++;
if ($vote1 == $vote2)
{
$divs_voted_same++;
}
}
}

# Create score based on this
if ($divs_both_at != 0)
{
$metricD[$mp_1][$mp_2] = ($divs_both_at - $divs_voted_same) / $divs_both_at;
}
elsif ($mp_1 == $mp_2)
{
$metricD[$mp_1][$mp_2] = 0; # No-voters, like Gerry Adams need this clause
}
else
{
$metricD[$mp_1][$mp_2] = 1;
}
}
}

# Print it all out
for my $mp_1 (@mp_ixs)
{
my $sthmp = db::query($dbh, "select last_name, first_name, party from pw_mp where mp_id=?", $mp_1);
die "Wrong number of rows back" if $sthmp->rows != 1;
my @data = $sthmp->fetchrow_array();
my ($lastname, $firstname, $party) = @data;

print "na" . $mp_1 . " = \"" . $lastname . ", " . $firstname . "\";\n";
print "pa" . $mp_1 . " = \"" . $party . "\";\n";
print "r" . $mp_1 . " = [";
for my $mp_2 (@mp_ixs)
{
print "," if ($mp_2 != $mp_ixs[0]);
print $metricD[$mp_1][$mp_2];
}
print "];\n";
}

print "D=[\n";
foreach my $mp_ix (@mp_ixs)
{
print "r" . $mp_ix . ";";
}
print "];\n";

print "ns=[\n";
for my $mp_ix (@mp_ixs)
{
print "na" . $mp_ix . ";";
}
print "];\n";

print "ps=[\n";
for my $mp_ix (@mp_ixs)
{
print "pa" . $mp_ix . ";";
}
print "];\n";


@@ -0,0 +1,2 @@
*.class

0 comments on commit 78d0f62

Please sign in to comment.