In [8]:
import pandas as pd
import plotly.express as px

# Load the CSV data
df = pd.read_csv('/Users/miriam/Documents/GitHub/dsa_hw4/uk_data_202405051509.csv')

# List of columns representing political party votes
party_columns = [
    'ConVote19', 'LabVote19', 'LDVote19', 'SNPVote19', 'PCVote19',
    'UKIPVote19', 'GreenVote19', 'BrexitVote19'
]

# Including total votes for clarity
df['TotalVote19'] = df[party_columns].sum(axis=1)



In [9]:
regions = df['region'].unique()

# Create and save a bar plot for each region
for region in regions:
    region_data = df[df['region'] == region]
    # Sum up all votes for each category
    summed_data = region_data[party_columns + ['TotalVote19']].sum()
    fig = px.bar(summed_data, x=summed_data.index, y=summed_data.values, title=f'Voter Turnout in {region}')

    # Show the plot (in development) or save it to file

    fig.show()  # Uncomment this during development to see the plot



In [10]:


# List of party columns
party_columns = [
    'ConVote19', 'LabVote19', 'LDVote19', 'SNPVote19', 'PCVote19',
    'UKIPVote19', 'GreenVote19', 'BrexitVote19'
]

# Melt the DataFrame to long format
df_long = df.melt(id_vars=['region'], value_vars=party_columns, var_name='Party', value_name='Votes')

# Display the head of the reshaped DataFrame to confirm
print(df_long.head())


     region      Party    Votes
0     Wales  ConVote19   6518.0
1     Wales  ConVote19  14687.0
2  Scotland  ConVote19   7535.0
3  Scotland  ConVote19  16398.0
4  Scotland  ConVote19  22752.0


In [11]:
fig = px.bar(df_long, x='Party', y='Votes', color='region', barmode='group',
             labels={'Votes': 'Total Votes'}, title='Voter Turnout by Party and Region')

# Customize the layout
fig.update_layout(
    xaxis_title="Political Parties",
    yaxis_title="Voter Turnout",
    legend_title="Regions",
    xaxis={'categoryorder':'total descending'}
)

# Show the plot
fig.show()


In [16]:
import pandas as pd
import plotly.graph_objects as go

# Load the dataset
file_path = '/Users/miriam/Documents/GitHub/dsa_hw4/uk_data_202405051509.csv'
df = pd.read_csv(file_path)

# Example region to visualize
selected_region = 'Wales'  # Replace with a real region name from your dataset

# Check if the selected region exists in the data
if selected_region not in df['region'].values:
    print(f"No data available for {selected_region}")
else:
    # Filter data for the selected region
    region_data = df[df['region'] == selected_region].iloc[0]

    # Identify the last six columns for demographic data
    last_six_columns = df.columns[-6:]

    # Create the two-way bar plot
    fig = go.Figure()
    for col in last_six_columns:
        primary_value = region_data[col]
        complementary_value = 100 - primary_value  # Assuming the data are percentages

        # Adding primary demographic data
        fig.add_trace(go.Bar(
            x=[col],
            y=[primary_value],
            name=f'{col} %',
            orientation='h'
        ))

        # Adding complementary demographic data
        fig.add_trace(go.Bar(
            x=[f'Non-{col}'],
            y=[complementary_value],
            name=f'Non-{col} %',
            orientation='h'
        ))

    # Update the plot layout
    fig.update_layout(
        title=f'Demographic Profiles for {selected_region}',
        barmode='overlay',  # Use 'group' if you prefer separated bars
        xaxis_title='Demographic Categories',
        yaxis=dict(title='Percentage', range=[0, 100])
    )

    # Display the plot
    fig.show()


In [19]:
import pandas as pd
import plotly.graph_objects as go

# Load the dataset
file_path = '/Users/miriam/Documents/GitHub/dsa_hw4/uk_data_202405051509.csv'
df = pd.read_csv(file_path)

# Example region to visualize
selected_region = 'Wales'  # Replace with an actual region name from your dataset

# Check if the selected region exists in the data
if selected_region not in df['region'].values:
    print(f"No data available for {selected_region}")
else:
    region_data = df[df['region'] == selected_region].iloc[0]

    # Assume the last 6 columns are demographic data
    demographic_columns = df.columns[-5:]

    # Create a figure
    fig = go.Figure()

    for col in demographic_columns:
        value = region_data[col]
        complement = 100 - value  # Calculate the complement for each category

        # Adding the demographic data as a positive bar
        fig.add_trace(go.Bar(
            y=[col],
            x=[value],
            orientation='h',
            name=col,
            text=str(value) + '%',  # Optionally add % sign
            textposition='inside',
            insidetextanchor='start',  # Text inside at start of the bar
            marker=dict(color='blue')
        ))

        # Adding the complementary demographic data as a negative bar
        fig.add_trace(go.Bar(
            y=[col],
            x=[-complement],
            orientation='h',
            name='Non-' + col,
            text=str(complement) + '%',  # Optionally add % sign
            textposition='inside',
            insidetextanchor='end',  # Text inside at end of the bar
            marker=dict(color='red')
        ))

    # Update the layout to suit the population pyramid style
    fig.update_layout(
        title=f'Demographic Profile for {selected_region}',
        barmode='overlay',  # Overlaid bars to show both directions from the center
        xaxis=dict(
            title='Percentage',
            range=[-100, 100],  # Set the range from -100 to 100 for symmetry
            tickvals=[-100, -50, 0, 50, 100],
            ticktext=['100%', '50%', '0%', '50%', '100%']  # Customize tick marks to show positive numbers
        ),
        yaxis=dict(title='Demographic Categories')
    )

    # Display the plot
    fig.show()
