In [14]:
import pandas as pd

# Load the dataset
file_path = '../raw_data/raw_election_results.csv'  # Update with your file path
df = pd.read_csv(file_path)

# Filter the dataset for UK-national results for the specified election years ([1992, 1997, 2001, 2005, 2010, 2015, 2017, 2019)
filtered_df = df[(df['Country'] == 'United Kingdom') & (df['Year'].isin([1992, 1997, 2001, 2005, 2010, 2015, 2017, 2019]))]

# Drop the specified columns except 'Seats'
columns_to_drop = ['Vote Change', 'Percent Change', 'Seat Change', 'Percent Seat Change']
filtered_df_dropped = filtered_df.drop(columns=columns_to_drop)

# Pivot the table for votes
pivot_votes_df = filtered_df_dropped.pivot_table(
    index=['Year', 'Geography', 'Country', 'Region or Nation', 'County'],
    columns='Party',
    values='Votes',
    fill_value=0
).reset_index()

# Pivot the table for seats
pivot_seats_df = filtered_df_dropped.pivot_table(
    index=['Year', 'Geography', 'Country', 'Region or Nation', 'County'],
    columns='Party',
    values='Seats',
    fill_value=0
).reset_index()

# Flatten the column hierarchy created by pivoting
pivot_votes_df.columns.name = None
pivot_seats_df.columns.name = None

# Perform a join on the pivoted votes and seats dataframes
pivot_df = pivot_votes_df.join(pivot_seats_df.set_index(['Year', 'Geography', 'Country', 'Region or Nation', 'County']),
                               on=['Year', 'Geography', 'Country', 'Region or Nation', 'County'],
                               lsuffix='_Votes', rsuffix='_Seats')

# Add a column that tallies the total votes for each year
pivot_df['Total Votes'] = pivot_df.filter(like='_Votes').sum(axis=1)

# Calculate the percentage of total votes for each party
parties = [
    'The Brexit Party', 'Conservative', 'Green Party', 'Liberal Democrats',
    'Labour', 'Plaid Cymru', 'SNP', 'UK Independence Party (UKIP)']

for party in parties:
    pivot_df[f'{party} Percentage'] = (pivot_df[f'{party}_Votes'] / pivot_df['Total Votes']) * 100

# Select only the relevant columns for the specified parties and the associated data
selected_parties = [
    'The Brexit Party', 'Conservative', 'Green Party', 'Liberal Democrats',
    'Labour', 'Plaid Cymru', 'SNP', 'UK Independence Party (UKIP)', 'Total Votes'
]

# Columns to select: votes, seats, and percentage columns for the specified parties
selected_columns = ['Year', 'Geography', 'Country']  # Base columns
for party in selected_parties:
    if party == 'Total Votes':
        selected_columns.append(party)
    else:
        selected_columns.append(f'{party}_Votes')
        selected_columns.append(f'{party}_Seats')
        selected_columns.append(f'{party} Percentage')

# Filter the dataframe to include only the selected columns
filtered_pivot_df = pivot_df[selected_columns]
#Renames DF
df = filtered_pivot_df
# Define a dictionary to rename the columns
rename_dict = {
    'Conservative_Votes': 'CON_ACTUAL_VOTES',
    'Conservative_Seats': 'CON_ACTUAL_SEATS',
    'Conservative Percentage': 'CON_ACTUAL_PERCENTAGE',
    'Labour_Votes': 'LABOUR_ACTUAL_VOTES',
    'Labour_Seats': 'LABOUR_ACTUAL_SEATS',
    'Labour Percentage': 'LABOUR_ACTUAL_PERCENTAGE',
    'Liberal Democrats_Votes': 'LIB_ACTUAL_VOTES',
    'Liberal Democrats_Seats': 'LIB_ACTUAL_SEATS',
    'Liberal Democrats Percentage': 'LIB_ACTUAL_PERCENTAGE',
    'The Brexit Party_Votes': 'BRX_ACTUAL_VOTES',
    'The Brexit Party_Seats': 'BRX_ACTUAL_SEATS',
    'The Brexit Party Percentage': 'BRX_ACTUAL_PERCENTAGE',
    'Green Party_Votes': 'GRE_ACTUAL_VOTES',
    'Green Party_Seats': 'GRE_ACTUAL_SEATS',
    'Green Party Percentage': 'GRE_ACTUAL_PERCENTAGE',
    'Plaid Cymru_Votes': 'PLC_ACTUAL_VOTES',
    'Plaid Cymru_Seats': 'PLC_ACTUAL_SEATS',
    'Plaid Cymru Percentage': 'PLC_ACTUAL_PERCENTAGE',
    'SNP_Votes': 'SNP_ACTUAL_VOTES',
    'SNP_Seats': 'SNP_ACTUAL_SEATS',
    'SNP Percentage': 'SNP_ACTUAL_PERCENTAGE',
    'UK Independence Party (UKIP)_Votes': 'UKI_ACTUAL_VOTES',
    'UK Independence Party (UKIP)_Seats': 'UKI_ACTUAL_SEATS',
    'UK Independence Party (UKIP) Percentage': 'UKI_ACTUAL_PERCENTAGE'
}

In [15]:
# Renames columns, then dataframe
filtered_pivot_df.rename(columns=rename_dict, inplace=True)
df = filtered_pivot_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_pivot_df.rename(columns=rename_dict, inplace=True)


In [16]:
# Calculates other
df['OTH_ACTUAL_VOTES'] = df ['Total Votes'] - df['BRX_ACTUAL_VOTES'] - df['CON_ACTUAL_VOTES'] - df['GRE_ACTUAL_VOTES'] - df['LIB_ACTUAL_VOTES'] - df['LABOUR_ACTUAL_VOTES'] - df['PLC_ACTUAL_VOTES'] - df['SNP_ACTUAL_VOTES'] - df['UKI_ACTUAL_VOTES']
df['OTH_SEATs'] = 650 - df['BRX_ACTUAL_SEATS']  - df['CON_ACTUAL_SEATS'] - df['GRE_ACTUAL_SEATS'] - df['LIB_ACTUAL_SEATS'] - df['LABOUR_ACTUAL_SEATS'] - df['PLC_ACTUAL_SEATS'] - df['SNP_ACTUAL_SEATS'] - df['UKI_ACTUAL_SEATS']
df['OTH_PERCENTAGE'] = 100 - df['BRX_ACTUAL_PERCENTAGE'] - df['CON_ACTUAL_PERCENTAGE'] - df['GRE_ACTUAL_PERCENTAGE'] - df['LIB_ACTUAL_PERCENTAGE'] - df['LABOUR_ACTUAL_PERCENTAGE'] - df['PLC_ACTUAL_PERCENTAGE'] - df['SNP_ACTUAL_PERCENTAGE'] - df['UKI_ACTUAL_PERCENTAGE']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['OTH_ACTUAL_VOTES'] = df ['Total Votes'] - df['BRX_ACTUAL_VOTES'] - df['CON_ACTUAL_VOTES'] - df['GRE_ACTUAL_VOTES'] - df['LIB_ACTUAL_VOTES'] - df['LABOUR_ACTUAL_VOTES'] - df['PLC_ACTUAL_VOTES'] - df['SNP_ACTUAL_VOTES'] - df['UKI_ACTUAL_VOTES']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['OTH_SEATs'] = 650 - df['BRX_ACTUAL_SEATS']  - df['CON_ACTUAL_SEATS'] - df['GRE_ACTUAL_SEATS'] - df['LIB_ACTUAL_SEATS'] - df['LABOUR_ACTUAL_SEATS'] - df['PLC_ACTUAL_SEATS'] - df['SNP_ACTUAL_SEATS'] - df['UKI_ACTUAL_SEATS

In [17]:
# Renames dataframe
election_results_df = df

In [18]:
# Trims dataframe down to just percentage column so that it can be used in the next step
percentage_columns = ['Year', 'Geography', 'Country']
percentage_columns += [col for col in filtered_pivot_df.columns if 'PERCENTAGE' in col]

# Filter the dataframe to include only the selected columns
percentage_df = election_results_df[percentage_columns]

In [22]:
percentage_df.rename(columns={'OTH_PERCENTAGE': 'OTH_ACTUAL_PERCENTAGE'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  percentage_df.rename(columns={'OTH_PERCENTAGE': 'OTH_ACTUAL_PERCENTAGE'}, inplace=True)


In [24]:
percentage_df['REF_ACTUAL_PERCENTAGE'] = percentage_df['BRX_ACTUAL_PERCENTAGE'] + percentage_df['UKI_ACTUAL_PERCENTAGE']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  percentage_df['REF_ACTUAL_PERCENTAGE'] = percentage_df['BRX_ACTUAL_PERCENTAGE'] + percentage_df['UKI_ACTUAL_PERCENTAGE']


In [25]:
percentage_df

Unnamed: 0,Year,Geography,Country,BRX_ACTUAL_PERCENTAGE,CON_ACTUAL_PERCENTAGE,GRE_ACTUAL_PERCENTAGE,LIB_ACTUAL_PERCENTAGE,LABOUR_ACTUAL_PERCENTAGE,PLC_ACTUAL_PERCENTAGE,SNP_ACTUAL_PERCENTAGE,UKI_ACTUAL_PERCENTAGE,OTH_ACTUAL_PERCENTAGE,REF_ACTUAL_PERCENTAGE
0,1992,UK,United Kingdom,0.0,41.932251,0.479721,17.808172,34.384286,0.466376,1.873134,0.0,3.05606,0.0
1,1997,UK,United Kingdom,0.0,30.684755,0.197289,16.737701,43.225813,0.514661,1.988811,0.340182,6.310788,0.340182
2,2001,UK,United Kingdom,0.0,31.69375,0.614058,18.247609,40.671313,0.742868,1.760778,1.480745,4.788879,1.480745
3,2005,UK,United Kingdom,0.0,32.359595,0.94909,22.025555,35.187187,0.64403,1.51862,2.232152,5.083771,2.232152
4,2010,UK,United Kingdom,0.0,36.053929,0.893429,23.028931,28.989564,0.557103,1.655156,3.098588,5.7233,3.098588
5,2015,UK,United Kingdom,0.0,36.810989,3.621042,7.8705,30.450615,0.591942,4.738156,12.643503,3.273254,12.643503
6,2017,UK,United Kingdom,0.0,42.342718,1.590911,7.36541,39.989322,0.51071,3.035607,1.844735,3.320586,1.844735
7,2019,UK,United Kingdom,2.012681,43.631688,2.61203,11.547741,32.080872,0.478805,3.881238,0.071281,3.683662,2.083962


In [26]:
# Exports to CSV - option 1
percentage_df.to_csv('../processed_data/1988_to_2019_results_clean.csv', index=True)