# Evaluating the model: Visualising results across constituencies

In order to visually inspect the predicted wins in my MRP model and their accuracy, I harness the capabilities of `geopandas`, an open source project that allows me to conduct spatial operations on geometric types. 

In [2]:
# Load libraries
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib
import matplotlib.colors as colors
from matplotlib.colors import ListedColormap

## Loading relevant dataframes

In [3]:
# Load est_true results
est_true_results = pd.read_csv("est_true_results.csv")

# Load constituency codes
constituencies = pd.read_csv("constituencies.csv")

## Which party won in each constituency?

In [7]:
# Get the column names for true vote share
true_columns = [col for col in est_true_results.columns if col.endswith('_true')]

# Find the winning party for each row
est_true_results['winner'] = est_true_results[true_columns].idxmax(axis=1)

# Remove '_true' suffix from the winner column
est_true_results['winner'] = est_true_results['winner'].str.replace('_true', '')

# Get the column names for estimated vote share
est_columns = [col for col in est_true_results.columns if col.endswith('_est')]

# Find the winning party predicted by estimated vote share
est_true_results['predicted_winner'] = est_true_results[est_columns].idxmax(axis=1)
est_true_results['predicted_winner'] = est_true_results['predicted_winner'].str.replace('_est', '')

# Compare the true winner with the predicted winner
est_true_results['correct'] = (est_true_results['winner'] == est_true_results['predicted_winner']).astype(int)

# Print the DataFrame
est_true_results.head()

  est_true_results['predicted_winner'] = est_true_results[est_columns].idxmax(axis=1)


Unnamed: 0,ccode,cname,con_true,labour_true,libdem_true,other_true,con_est,labour_est,libdem_est,other_est,winner,predicted_winner,correct
0,E14000530,Aldershot,58.374363,23.537512,14.437119,3.651006,36.624868,18.411756,10.673036,9.733252,con,con,1
1,E14000531,Aldridge-Brownhills,70.78949,20.370089,6.026639,2.813787,28.851879,18.71119,11.687915,15.788898,con,con,1
2,E14000532,Altrincham and Sale West,48.045212,36.835091,11.02204,4.097657,28.971533,15.26182,20.849231,13.790519,con,con,1
3,E14000533,Amber Valley,63.85323,26.795706,6.305002,3.046064,30.747374,23.20863,7.743018,14.753756,con,con,1
4,E14000534,Arundel and South Downs,57.917534,15.831813,21.24316,5.007491,44.879461,13.132948,11.543497,9.872903,con,con,1


In [8]:
# Create a new DataFrame to store party wins
party_wins = pd.DataFrame(index=['con', 'labour', 'libdem', 'other'], columns=['Predicted Wins', 'Actual Wins'])

# Calculate the counts for predicted and actual wins for each party
party_wins['Predicted Wins'] = est_true_results['predicted_winner'].value_counts(dropna=True)
party_wins['Actual Wins'] = est_true_results['winner'].value_counts(dropna=True)
# Change index names
party_wins.index = ['Conservative', 'Labour', 'Liberal Democrats', 'Other']
# Export the DataFrame to a CSV file
party_wins.to_csv('party_wins.csv')

In [9]:
# Count the number of 1s and 0s in the "correct" column
correct_counts = est_true_results['correct'].value_counts()
# Print the counts
print("Number of correctly predicted constituencies:", correct_counts.get(1, 0))
print("Number of wrongly predicted constituencies:", correct_counts.get(0, 0))

Number of correctly predicted constituencies: 463
Number of wrongly predicted constituencies: 110


## Visualising vote shares across constituencies with GeoPandas

First I create a geopandas dataframe with boundaries for Westminister Parliamentary constituencies, following which I merge the dataframe with the results (estimated and true) that I created earlier, on `ccode`. This will allow me to map the predicted outcomes of the elections.

In [10]:
import geopandas as gpd

# Load shapefile
gdf = gpd.read_file("Westminster_Parliamentary_Constituencies_Dec_2021/PCON_DEC_2021_UK_BFC.shp")
gdf = gdf.to_crs(epsg=3857) 

# Filter rows in gdf based on ccode column of constituencies dataframe
con_gdf = gdf[gdf['PCON21CD'].isin(constituencies['ccode'])]
con_gdf_copy = con_gdf.copy()

# Rename column "PCON21CD" to "ccode" 
con_gdf_copy.rename(columns={'PCON21CD': 'ccode'}, inplace=True)

# Rename column "PCON21NM" to "Name" 
con_gdf_copy.rename(columns={'PCON21NM': 'Name'}, inplace=True)

# Merge con_gdf_copy and estimates by ccode, excluding cname
merged_gdf = con_gdf_copy.merge(est_true_results[['ccode', 'con_est', 'con_true', 
                                                  'labour_est', 'labour_true', 'libdem_est', 'libdem_true', 'other_est', 'other_true',
                                                 'winner', 'predicted_winner', 'correct']], 
                                on='ccode', how='inner')

# Change column name "predicted_winner" to "Predicted Winner" and "winner" to "Winner"
merged_gdf.rename(columns={'predicted_winner': 'Predicted Winner'}, inplace=True)
merged_gdf.rename(columns={'winner': 'Winner'}, inplace=True)


# Map the values in the "Predicted Winner" column to desired party names
merged_gdf['Predicted Winner'] = merged_gdf['Predicted Winner'].map({
    'con': 'Conservative',
    'labour': 'Labour',
    'libdem': 'Liberal Democrats',
    'other': 'Other'
})

merged_gdf['Winner'] = merged_gdf['Winner'].map({
    'con': 'Conservative',
    'labour': 'Labour',
    'libdem': 'Liberal Democrats',
    'other': 'Other'
})


# Display the modified DataFrame
merged_gdf.head()

Unnamed: 0,ccode,Name,BNG_E,BNG_N,LONG,LAT,GlobalID,geometry,con_est,con_true,labour_est,labour_true,libdem_est,libdem_true,other_est,other_true,Winner,Predicted Winner,correct
0,E14000530,Aldershot,484884,155126,-0.7841,51.289,0b177e6c-7c86-4edf-8a86-d69d837c5c77,"POLYGON ((-86326.424 6680224.796, -86310.756 6...",36.624868,58.374363,18.411756,23.537512,10.673036,14.437119,9.733252,3.651006,Conservative,Conservative,1
1,E14000531,Aldridge-Brownhills,404723,302568,-1.93166,52.6209,82968be3-34da-4fce-b950-f72584f497c3,"POLYGON ((-212073.667 6917268.805, -212126.221...",28.851879,70.78949,18.71119,20.370089,11.687915,6.026639,15.788898,2.813787,Conservative,Conservative,1
2,E14000532,Altrincham and Sale West,374132,389051,-2.39049,53.3977,0a4c3913-e570-4150-a32d-c3f34d3943f0,"POLYGON ((-257814.999 7063805.992, -257817.058...",28.971533,48.045212,15.26182,36.835091,20.849231,11.02204,13.790519,4.097657,Conservative,Conservative,1
3,E14000533,Amber Valley,440478,349675,-1.3977,53.0428,8cf153a6-56e9-47b1-a4ae-4efe843f9c36,"POLYGON ((-148237.221 6997988.988, -148237.213...",30.747374,63.85323,23.20863,26.795706,7.743018,6.305002,14.753756,3.046064,Conservative,Conservative,1
4,E14000534,Arundel and South Downs,510686,115542,-0.42635,50.9287,5385035f-d9da-4043-80d8-b28a14dba9aa,"MULTIPOLYGON (((-62776.355 6592935.577, -62781...",44.879461,57.917534,13.132948,15.831813,11.543497,21.24316,9.872903,5.007491,Conservative,Conservative,1


### Predicted winners with the MRP model

In [None]:
# Define colors for each party
party_colors = {
    "Conservative": "darkblue",
    "Labour": "darkred",
    "Liberal Democrats": "darkorange",
    "Other": "darkgreen"
}

# Create a colormap using the party colors
custom_cmap = colors.ListedColormap([party_colors[party] for party in party_colors])

predicted_winners = merged_gdf.explore(
    column="Predicted Winner",  # make choropleth based on "predicted_winner" column
    tooltip=["Name"],  # show "Name", "winner", and "predicted winner" values in tooltip (on hover)
    popup=True,  # show all values in popup (on click)
    tiles="CartoDB positron",  # use "CartoDB positron" tiles
    cmap=custom_cmap,  # specify custom color map with colors for each party
    legend=True,  # show legend
)

outfp = "/Users/manasi/Desktop/stack-exercise/mrp-modelling/predicted_winners.html"

predicted_winners.save(outfp)

### How many constituencies did the model accurately predict the winning party?

In [None]:
import matplotlib.colors as colors


correct_predictions = merged_gdf.explore(
    column="correct",  # make choropleth based on "correct" column
    tooltip=["Name", "Winner", "Predicted Winner"],  # show "Name", "Winner", and "Predicted Winner" values in tooltip (on hover)
    popup=True,  # show all values in popup (on click)
    tiles="CartoDB positron",  # use "CartoDB positron" tiles
    cmap = "RdYlGn",  # specify custom color map with two colors
    legend=False,
)

outfp = "/Users/manasi/Desktop/stack-exercise/mrp-modelling/correct_predictions.html"
correct_predictions.save(outfp)