In [10]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import json

# Importing the data

In [11]:
candid13 = pd.read_csv("candid13.csv", index_col=0) # data of candidates in 2013 elections
candid18 = pd.read_csv("candid18.csv", index_col=0) # data of candidates in 2018 elections
candid18post = pd.read_csv("candid18_post.csv", index_col=0) # data of candidates in 2018 elections after considering bye elections
candid23 = pd.read_csv("candid23.csv", index_col=0) # data of candidates in 2023 elections

const13 = pd.read_csv("const13.csv", index_col=0) # data of constituencies in 2013 elections
const18 = pd.read_csv("const18.csv", index_col=0) # data of constituencies in 2018 elections
const18post = pd.read_csv("const18_post.csv", index_col=0) # data of constituencies in 2018 elections after considering bye elections
const23 = pd.read_csv("const23.csv", index_col=0) # data of constituencies in 2023 elections

constnum = pd.read_csv("const_num.csv", index_col=0) # mapping of constituency number to constituency name

## Basic Parliament Seats Data

# Creating Plots

In [15]:
def get_total_voteshare_percent():   
    def process_year(candid_df, const_df, year):
        # Create a copy to avoid modifying the original DataFrame
        temp = candid_df.copy()
        
        # Calculate the total votes across all parties
        temp['total votes'] = temp['votes'].sum()
        
        # Group by party and total votes to calculate party totals
        grouped = temp.groupby(['party', 'total votes'])['votes'].sum().reset_index()
        grouped = grouped.rename(columns={'votes': 'party_total'})
        
        # Calculate vote share percentage
        grouped['vote share percent'] = (grouped['party_total'] / grouped['total votes']) * 100
        grouped['vote share percent'] = grouped['vote share percent'].round(2)
        
        # Add the election year
        grouped['year'] = year
        
        # Filter for specific parties
        grouped = grouped[grouped['party'].isin(['BJP', 'INC', 'JD(S)'])]
        
        # Calculate the number of constituencies won by each party
        constituencies_won = const_df.groupby('party').size().reset_index(name='constituencies won')
        
        # Merge the party totals with constituencies won
        result = pd.merge(grouped, constituencies_won, on='party')
        
        return result

    # Process data for each election year
    result13 = process_year(candid13, const13, 2013)
    result18 = process_year(candid18, const18, 2018)
    result23 = process_year(candid23, const23, 2023)

    # Combine the results from all years
    final_result = pd.concat([result13, result18, result23], ignore_index=True)
    return final_result

In [16]:
get_total_voteshare_percent()

Unnamed: 0,party,total votes,party_total,vote share percent,year,constituencies won
0,BJP,30694374.0,6231660.0,20.3,2013,40
1,INC,30694374.0,11254487.0,36.67,2013,122
2,JD(S),30694374.0,6091769.0,19.85,2013,40
3,BJP,35930149.0,13297224.0,37.01,2018,104
4,INC,35930149.0,13652276.0,38.0,2018,80
5,JD(S),35930149.0,6723011.0,18.71,2018,37
6,BJP,38823869.0,14210103.0,36.6,2023,66
7,INC,38823869.0,16616300.0,42.8,2023,135
8,JD(S),38823869.0,5268788.0,13.57,2023,19


In [153]:
def process_BJY_year(candid_df, const_df, year, BJY_const_list):
    # Filter candidate DataFrame for constituencies in BJY_const_list
    temp = candid_df[candid_df['constituency'].isin(BJY_const_list)].copy()
    
    # Calculate the total votes across all parties in these constituencies
    total_votes = temp['votes'].sum()
    temp['total votes'] = total_votes
    
    # Group by party and total votes to calculate party totals
    grouped = temp.groupby(['party', 'total votes'])['votes'].sum().reset_index()
    grouped = grouped.rename(columns={'votes': 'party_total'})
    
    # Calculate vote share percentage
    grouped['vote share percent'] = (grouped['party_total'] / grouped['total votes']) * 100
    grouped['vote share percent'] = grouped['vote share percent'].round(2)
    
    # Add the election year
    grouped['year'] = year
    
    # Filter for the 'INC' party
    grouped = grouped[grouped['party'] == 'INC']
    
    # Filter constituency DataFrame for constituencies in BJY_const_list
    temp_const = const_df[const_df['constituency'].isin(BJY_const_list)].copy()
    
    # Calculate the number of constituencies won by 'INC'
    constituencies_won = temp_const[temp_const['party'] == 'INC'].groupby('party').size().reset_index(name='constituencies won')
    
    # Merge the party totals with constituencies won
    result = pd.merge(grouped, constituencies_won, on='party')
    
    return result

# List of constituencies the Bharat Jodo Yatra passed through
BJY_const_list = [
    'Raichur', 'Raichur Rural', 'Bellary', 'Bellary City', 'Molakalmuru', 'Challakere', 'Hiriyur', 'Chiknayakanhalli',
    'Tiptur', 'Turuvekere', 'Nagamangala', 'Melukote', 'Mandya', 'Shrirangapattana', 'Chamundeshwari', 'Krishnaraja',
    'Chamaraja', 'Narasimharaja', 'Varuna', 'Nanjangud', 'Gundlupet'
]

# Process data for each election year
result13 = process_BJY_year(candid13, const13, 2013, BJY_const_list)
result18 = process_BJY_year(candid18post, const18post, 2018, BJY_const_list)
result23 = process_BJY_year(candid23, const23, 2023, BJY_const_list)

# Combine the results from all years
final_result = pd.concat([result13, result18, result23], ignore_index=True)
final_result

Unnamed: 0,party,total votes,party_total,vote share percent,year,constituencies won
0,INC,2988027.0,1026181.0,34.34,2013,11
1,INC,3532035.0,1183936.0,33.52,2018,5
2,INC,3769675.0,1632169.0,43.3,2023,15


## Karnataka's Electoral History

In [3]:
# Define the list of major parties and their corresponding colors
major_parties = ['BJP', 'INC', 'JD(S)']
party_colors = {
    'BJP': '#FF7500',
    'INC': '#0D5BE1',
    'JD(S)': 'green'
}

# Define the years
years = [2013, 2018, 2023]

# Candidate dataframes for each year (assumed to be already loaded)
candid_dfs = {
    2013: candid13,
    2018: candid18,
    2023: candid23
}

# Constituency dataframes for each year (assumed to be already loaded)
const_dfs = {
    2013: const13,
    2018: const18,
    2023: const23
}


### Plot 1: Vote Share Percent by Major Party and Year

In [None]:
# Prepare the data for vote share percentages
vote_share_data = []

for year in years:
    candid_df = candid_dfs[year]
    
    # Calculate total votes cast in the election
    total_votes = candid_df['votes'].sum()
    
    # Calculate total votes received by each party
    votes_by_party = candid_df.groupby('party')['votes'].sum().reset_index()
    
    # Filter for major parties
    votes_by_party = votes_by_party[votes_by_party['party'].isin(major_parties)]
    
    # Calculate vote share percentage
    votes_by_party['vote_share_percent'] = (votes_by_party['votes'] / total_votes) * 100
    
    # Add the year information
    votes_by_party['year'] = str(year)  # Convert year to string for categorical x-axis
    
    # Append to the list
    vote_share_data.append(votes_by_party[['year', 'party', 'vote_share_percent']])

# Combine the data for all years into a single dataframe
vote_share_df = pd.concat(vote_share_data, ignore_index=True)

# Create the plot for vote share percentages
fig_vote_share = px.bar(
    vote_share_df,
    x='year',
    y='vote_share_percent',
    color='party',
    barmode='group',
    labels={'vote_share_percent': 'Vote Share (%)', 'year': 'Year', 'party': 'Party'},
    color_discrete_map=party_colors
)

fig_vote_share.update_traces()

# Update layout as per your specifications
fig_vote_share.update_layout(
    # font=dict(family='Cambria'),
    dragmode = False,
    xaxis=dict(
        title='Year',
        tickmode='linear',
        showgrid=False,
        tickfont=dict(family="Cambria", size=14, color="black"),
        titlefont=dict(family="Cambria", size=16, color="black")
    ),
    yaxis=dict(
        title='Vote Share (%)',
        gridcolor='lightgrey',
        showgrid=True,
        zeroline=True,
        tickvals = [0,5,10,15,20,25,30,35,40,45,50],
        tickfont=dict(family="Cambria", size=14, color="black"),
        titlefont=dict(family="Cambria", size=16, color="black")
    ),
    title = dict(
        font=dict(family="Cambria", size=18, color="black"),
        text = "Vote Share Percentages of Major Parties over the years",
        xanchor = 'center',
        x = 0.5
    ),
    margin = dict(pad=10,t=70,b=70,r=70,l=70), 
    legend_title_font=dict(family="Cambria", size=14, color="black"),
    legend_font=dict(family="Cambria", size=13, color="black"),
    legend_title_text='Party',
    plot_bgcolor='#f8f8f8',
    paper_bgcolor='#f8f8f8',
    autosize=True,
)

# Only show horizontal gridlines
fig_vote_share.update_xaxes(showgrid=False)
fig_vote_share.update_yaxes(showgrid=True)

# Save the figure as JSON
pio.write_json(fig_vote_share, 'plots/keh_voteshare.json')
fig_vote_share


### Plot 2: Constituencies Won by Major Parties by Year

In [None]:
# Prepare the data for constituencies won
seats_won_data = []

for year in years:
    const_df = const_dfs[year]
    
    # Calculate the number of constituencies won by each party
    seats_won = const_df['party'].value_counts().reset_index()
    seats_won.columns = ['party', 'seats_won']
    
    # Filter for major parties
    seats_won = seats_won[seats_won['party'].isin(major_parties)]
    
    # Add the year information
    seats_won['year'] = str(year)  # Convert year to string for categorical x-axis
    
    # Append to the list
    seats_won_data.append(seats_won[['year', 'party', 'seats_won']])

# Combine the data for all years into a single dataframe
seats_won_df = pd.concat(seats_won_data, ignore_index=True)

# Create the plot for constituencies won
fig_seats_won = px.bar(
    seats_won_df,
    x='year',
    y='seats_won',
    color='party',
    barmode='group',
    labels={'seats_won': 'Constituencies Won', 'year': 'Year', 'party': 'Party'},
    color_discrete_map=party_colors
)

# Update layout as per your specifications
fig_seats_won.update_layout(
    dragmode = False,
    xaxis=dict(
        title='Year',
        tickmode='linear',
        showgrid=False,
        tickfont=dict(family="Cambria", size=14, color="black"),
        titlefont=dict(family="Cambria", size=16, color="black")
    ),
    yaxis=dict(
        title='Constituencies Won',
        gridcolor='lightgrey',
        showgrid=True,
        zeroline=True,
        tickvals = [0,20,40,60,80,100,120,140],
        tickfont=dict(family="Cambria", size=14, color="black"),
        titlefont=dict(family="Cambria", size=16, color="black")
    ),
    title = dict(
        font=dict(family="Cambria", size=18, color="black"),
        text = "Constituencies Won by Major Parties over the years",
        xanchor = 'center',
        x = 0.5
    ),
    margin = dict(pad=10,t=70,b=70,l=70,r=70), 
    legend_title_font=dict(family="Cambria", size=14, color="black"),
    legend_font=dict(family="Cambria", size=13, color="black"),
    legend_title_text='Party',
    plot_bgcolor='#f8f8f8',
    paper_bgcolor='#f8f8f8',
    autosize=True,
)

# Only show horizontal gridlines
fig_seats_won.update_xaxes(showgrid=False)
fig_seats_won.update_yaxes(showgrid=True)

# Save the figure as JSON
pio.write_json(fig_seats_won, 'plots/keh_constwon.json')
fig_seats_won

## Bharat Jodo Yatra

### Maps plots

In [13]:
# Color mapping for parties
party_colors = {
    'INC': '#0D5BE1',
    'BJP': '#FF7500',
    'IND': 'grey',
    'JD(S)': 'green',
    'KRPP': 'brown',
    'SKP': 'yellow'
}

# List of constituencies the Bharat Jodo Yatra passed through
BJY_const_list = [
    'Raichur', 'Raichur Rural', 'Bellary', 'Bellary City', 'Molakalmuru', 'Challakere', 'Hiriyur', 'Chiknayakanhalli',
    'Tiptur', 'Turuvekere', 'Nagamangala', 'Melukote', 'Mandya', 'Shrirangapattana', 'Chamundeshwari', 'Krishnaraja',
    'Chamaraja', 'Narasimharaja', 'Varuna', 'Nanjangud', 'Gundlupet'
]

# Load GeoJSON file for Karnataka constituencies
with open('output.json', 'r', encoding='utf-8') as f:
    geojson = json.load(f)

# Function to create the map for a given year
def create_election_map(df_const, year):
    # Prepare the data
    df = df_const[['constituency', 'const_num', 'party']].copy()
    
    # Replace parties not in party_colors with 'OTHERS'
    df['party'] = df['party'].apply(lambda x: x if x in party_colors else 'OTHERS')
    
    # Create BJY_status column
    df['BJY_status'] = df['constituency'].isin(BJY_const_list)
    
    # Split data into BJY and non-BJY constituencies
    df_bjy = df[df['BJY_status']]
    df_non_bjy = df[~df['BJY_status']]
    
    # Create the base figure with non-BJY constituencies (reduced opacity)
    fig_non_BJY = px.choropleth_map(
        df_non_bjy,
        geojson=geojson,
        featureidkey='properties.AC_CODE',
        locations='const_num',
        color='party',
        color_discrete_map=party_colors,
        hover_name='constituency',
        category_orders={'party': list(party_colors.keys())},
        map_style='white-bg',
        opacity=0.1,
        center={"lat": 15.3173, "lon": 76.7139},
        zoom=5.6,
    )
    fig_non_BJY.update_traces(
        patch=dict(
            showlegend=False,
            showscale=False,
        )
    )
    fig_non_BJY.update_layout(
        dict(
            dragmode=False
        )
    )
    
    # Add BJY constituencies on top with full opacity
    fig_bjy = px.choropleth_map(
        df_bjy,
        geojson=geojson,
        featureidkey='properties.AC_CODE',
        locations='const_num',
        color='party',
        color_discrete_map=party_colors,
        hover_name='constituency',
        category_orders={'party': list(party_colors.keys())},
        map_style='white-bg',
        opacity=1,
        center={"lat": 15.3173, "lon": 76.7139},
        zoom=5.6,
    )
    fig_bjy.update_traces(
        patch=dict(
            # showlegend=False,
            showscale=False,
        )
    )
    fig_bjy.update_layout(
        dict(
            dragmode=False
        )
    )
    
    # Add BJY trace to the figure
    for trace in fig_non_BJY.data:
        fig_bjy.add_trace(trace)
    
    # Update the layout
    fig_bjy.update_layout(
        geo=dict(
            fitbounds="locations",
            visible=False,
            center={"lat": 15.3173, "lon": 76.7139},  # Center coordinates for Karnataka
            # lonaxis={"range": [73.9, 79.3]},
            # lataxis={"range": [18.6, 11.5]},
        ),
        autosize=True,  # Responsive images
        uirevision='lock',  # Disable user-driven changes in the view
        dragmode=False,  # Disable panning and zooming
        plot_bgcolor='white',
        paper_bgcolor='white',
        title = dict(
            font=dict(family="Cambria", size=18, color="black"),
            text = f'Bharat Jodo Yatra Route - {year}' if year == 2023 else f'Bharat Jodo Yatra Route - projected for {year}',
            xanchor = 'center',
            x = 0.5,
            yanchor = 'top',
            y = 0.99
        ),
        margin = dict(pad=10,t=10,b=10,l=10,r=40),
        legend = dict(valign="middle", yanchor = 'middle', y = 0.8), 
        legend_title_font=dict(family="Cambria", size=14, color="black"),
        legend_title_text='Party',
        legend_font=dict(family="Cambria", size=14, color="black"),
        legend_itemclick=False,  # Disable legend click actions
        legend_itemdoubleclick=False,
    )
    
    # Display the figure
    return fig_bjy

# Convert the figure to a dictionary format
fig_18 = json.loads(pio.to_json(create_election_map(const18, 2018)))
fig_23 = json.loads(pio.to_json(create_election_map(const23, 2023)))

# Recursively search for "geojson" and replace its value with a placeholder (null)
def remove_geojson_data(data):
    if isinstance(data, dict):
        for key, value in data.items():
            if key == "geojson":
                data[key] = None  # Replace with null
            elif isinstance(value, (dict, list)):
                remove_geojson_data(value)  # Recurse into sub-dictionaries or lists
    elif isinstance(data, list):
        for item in data:
            remove_geojson_data(item)

remove_geojson_data(fig_18)
remove_geojson_data(fig_23)

# Save the modified figure JSON
with open("plots/BJYmap2018.json", "w") as f:
    json.dump(fig_18, f)
with open("plots/BJYmap2023.json", "w") as f:
    json.dump(fig_23, f)


# # Create the map for 2018
# pio.write_json(
#     create_election_map(const18, 2018),
#     file='plots/BJYmap2018.json',
# )
# # Create the map for 2023
# pio.write_json(
#     create_election_map(const23, 2023),
#     file='plots/BJYmap2023.json',
# )


### Bar charts

In [113]:
# List of constituencies the Bharat Jodo Yatra passed through
BJY_const_list = [
    'Raichur', 'Raichur Rural', 'Bellary', 'Bellary City', 'Molakalmuru', 'Challakere', 'Hiriyur', 'Chiknayakanhalli',
    'Tiptur', 'Turuvekere', 'Nagamangala', 'Melukote', 'Mandya', 'Shrirangapattana', 'Chamundeshwari', 'Krishnaraja',
    'Chamaraja', 'Narasimharaja', 'Varuna', 'Nanjangud', 'Gundlupet'
]

# Map years to their corresponding DataFrames
years_data = {
    2013: {'candid': candid13, 'const': const13},
    2018: {'candid': candid18post, 'const': const18post},
    2023: {'candid': candid23, 'const': const23}
}

# Initialize an empty list to collect results
results = []

# Define the scenarios and their parameters
scenarios = [
    {'const_type': 'BJY Passed through', 'include': True},
    {'const_type': "BJY didn't pass through", 'include': False},
    {'const_type': 'Overall', 'include': None}
]

# Loop over scenarios and years to compute results
for scenario in scenarios:
    const_type = scenario['const_type']
    include = scenario['include']
    
    for year, data in years_data.items():
        candid_df = data['candid']
        const_df = data['const']
        
        # Filter constituencies based on the scenario
        if include is True:
            candid_filtered = candid_df[candid_df['constituency'].isin(BJY_const_list)]
            const_filtered = const_df[const_df['constituency'].isin(BJY_const_list)]
        elif include is False:
            candid_filtered = candid_df[~candid_df['constituency'].isin(BJY_const_list)]
            const_filtered = const_df[~const_df['constituency'].isin(BJY_const_list)]
        else:
            # Overall scenario, no filtering
            candid_filtered = candid_df
            const_filtered = const_df
        
        # Calculate total votes
        total_votes = candid_filtered['votes'].sum()
        
        # Sum votes per party and calculate vote share percent
        party_votes = candid_filtered.groupby('party')['votes'].sum().reset_index()
        party_votes.rename(columns={'votes': 'party_total'}, inplace=True)
        party_votes['total votes'] = total_votes
        party_votes['vote share percent'] = (party_votes['party_total'] / total_votes * 100).round(2)
        
        # Count constituencies won per party
        const_wins = const_filtered.groupby('party').size().reset_index(name='constituencies won')
        
        # Keep only 'INC' party
        party_votes_inc = party_votes[party_votes['party'] == 'INC']
        const_wins_inc = const_wins[const_wins['party'] == 'INC']
        
        # Merge party votes and constituencies won
        result = pd.merge(party_votes_inc, const_wins_inc, on='party')
        result['year'] = str(year)
        result['const type'] = const_type
        
        # Rearrange columns
        result = result[['party', 'party_total', 'total votes', 'vote share percent', 'year', 'constituencies won', 'const type']]
        
        # Append to results list
        results.append(result)

# Concatenate all results into a single DataFrame
df_combined = pd.concat(results, ignore_index=True)

# Convert 'year' and 'const type' to categorical data type if needed
df_combined['year'] = df_combined['year'].astype('category')
df_combined['const type'] = df_combined['const type'].astype('category')

# Final DataFrame
df_combined

Unnamed: 0,party,party_total,total votes,vote share percent,year,constituencies won,const type
0,INC,1026181.0,2988027.0,34.34,2013,11,BJY Passed through
1,INC,1183936.0,3532035.0,33.52,2018,5,BJY Passed through
2,INC,1632169.0,3769675.0,43.3,2023,15,BJY Passed through
3,INC,10228306.0,27706347.0,36.92,2013,111,BJY didn't pass through
4,INC,12229231.0,33181254.0,36.86,2018,64,BJY didn't pass through
5,INC,14984131.0,35054194.0,42.75,2023,120,BJY didn't pass through
6,INC,11254487.0,30694374.0,36.67,2013,122,Overall
7,INC,13413167.0,36713289.0,36.53,2018,69,Overall
8,INC,16616300.0,38823869.0,42.8,2023,135,Overall


In [None]:
df = df_combined.copy()
fig = px.bar(df, 
            x='year', 
            y='vote share percent', 
            color='const type', 
            labels={'vote share percent': 'Vote Share Percent','const type':'Constituency Type'},
            # title="Vote Share Percent by Year for INC",
            color_discrete_sequence=px.colors.qualitative.Plotly,
            barmode='group',
            hover_data=['const type','vote share percent'],
            custom_data=['const type','vote share percent','year']
            ) #category_orders={"year": ["2013", "2018", "2023"]}

# Adjust the layout to set tickvals for x-axis and the gap between bars
fig.update_layout(
    bargap=0.5,  # Adjust the gap between bars
    bargroupgap=0.2,
    xaxis_title="Year",
    yaxis_title="Total Vote Share (%)",
    xaxis=dict(
        tickvals=df['year'].unique(),
        tickfont=dict(family="Cambria", size=14, color="black"),
        titlefont=dict(family="Cambria", size=16, color="black"),
        fixedrange=True,
        showgrid=False,
    ),
    yaxis=dict(
        tickvals = [0,10,20,30,40,50],
        tickfont=dict(family="Cambria", size=14, color="black"),
        titlefont=dict(family="Cambria", size=16, color="black"),
        fixedrange=True,
        showgrid=True,
        zeroline=True,
        gridcolor='lightgrey',
    ),
    title = dict(
        font = dict(family="Cambria", size=16, color="black"),
        text = "Total Vote Share (%) in constituencies for INC<br>based on whether B.J.Y. passed through",  
        xanchor="center",
        x = 0.5,
        yanchor="top",
        y = 0.95
    ),
    legend = dict(
        title = dict(font = dict(family="Cambria", size=14, color="black")),
        font=dict(family="Cambria", size=14, color="black"),
        orientation="h",
        xanchor = 'center',
        x = 0.5,
        yref = 'container',
        yanchor = 'top',
        y = 0.85
    ),
    title_font=dict(family="Cambria", size=18, color="black"),
    autosize=True,
    dragmode=False,
    plot_bgcolor = "#f8f8f8",
    paper_bgcolor = "#f8f8f8",
    margin = dict(pad=10, t=70, b=20, l=20, r=20), 
    legend_itemclick=False,  # Disable legend click actions
    legend_itemdoubleclick=False,
)

fig.update_traces(
hovertemplate="<b>const type:</b> %{customdata[0]}<br><b>Vote Share Percent:</b> %{customdata[1]}"
)

pio.write_json(fig, "plots/BJYvoteshare.json") 


### Pie Charts

In [128]:
df18 = const18.loc[const18.constituency.isin(BJY_const_list)]['party'].value_counts().to_frame().copy()
df18.reset_index(inplace=True)
df18['year'] = 2018

df23 = const23.loc[const23.constituency.isin(BJY_const_list)]['party'].value_counts().to_frame().copy()
df23.reset_index(inplace=True)
df23['year'] = 2023

df_combined = pd.concat([df18, df23], ignore_index=True)
df_combined['color'] = df_combined.party.map({'INC': '#0D5BE1', 'BJP': '#FF7500', 'JD(S)': 'green', 'SKP' : 'yellow'})
df_combined

Unnamed: 0,party,count,year,color
0,BJP,11,2018,#FF7500
1,JD(S),5,2018,green
2,INC,5,2018,#0D5BE1
3,INC,15,2023,#0D5BE1
4,JD(S),3,2023,green
5,BJP,2,2023,#FF7500
6,SKP,1,2023,yellow


In [None]:
from plotly.subplots import make_subplots

fig = make_subplots(rows=1, cols=2, specs=[[{"type": "domain"}, {"type": "domain"}]])

# 2018 Pie Chart
fig.add_trace(
    go.Pie(
        values=df_combined.loc[df_combined['year'] == 2018, 'count'],
        labels=df_combined.loc[df_combined['year'] == 2018, 'party'],
        marker=dict(colors=df_combined.loc[df_combined['year'] == 2018, 'color']),
        hole=0.6,
        texttemplate="%{label}\n%{value}/21",
        textposition="inside",
        textinfo="label+text",  # Only show label and text on pie sectors
        hoverinfo="label+text"  # Remove trace name from hover
    ),
    row=1, col=1
)

# 2023 Pie Chart
fig.add_trace(
    go.Pie(
        values=df_combined.loc[df_combined['year'] == 2023, 'count'],
        labels=df_combined.loc[df_combined['year'] == 2023, 'party'],
        marker=dict(colors=df_combined.loc[df_combined['year'] == 2023, 'color']),
        hole=0.6,
        texttemplate="%{label}\n%{value}/21",
        textposition="inside",
        textinfo="label+text",  # Only show label and text on pie sectors
        hoverinfo="label+text"  # Remove trace name from hover
    ),
    row=1, col=2
)

# Custom hover templates
fig.data[0].hovertemplate = "%{label}<br>Won %{value} of 21"
fig.data[1].hovertemplate = "%{label}<br>Won %{value} of 21"

# Annotations for year labels
fig.add_annotation(
    text="2018",
    x=0.225, y=0.5,
    xref="paper",
    xanchor="center",
    showarrow=False,
    font=dict(size=18, family="Cambria", color="black")
)

fig.add_annotation(
    text="2023",
    x=0.775, y=0.5,
    xref="paper",
    xanchor="center",
    showarrow=False,
    font=dict(size=18, family="Cambria", color="black")
)

# Layout and Title Update
fig.update_layout(
    legend=dict(
        orientation="h",
        yanchor="top",
        y=0.85,
        yref="container",
        xanchor="center",
        x=0.5,
        font=dict(family="Cambria", size=14, color="black"),
        itemclick=False,
        itemdoubleclick=False,
        title=dict(
            text="Party",
            font=dict(family="Cambria", size=15, color="black")
        )
    ),
    title=dict(
        text="Constituency Wins in Bharat Jodo Yatra Path (2018 vs 2023)",
        font=dict(family="Cambria", size=18, color="black"),
        xanchor="center",
        x=0.5,
        yanchor="top",
        y=0.95,
        yref="container"
    ),
    plot_bgcolor='#f8f8f8',
    paper_bgcolor='#f8f8f8',
    margin=dict(t=70, pad=10, b=20, l=20, r=20),
    autosize=True,
    dragmode=False,
)

pio.write_json(fig, "plots/BJYconstshare.json")


### Hypothesis Testing for significant differences in vote share percentages

In [None]:
import pandas as pd

# List of constituencies the Bharat Jodo Yatra passed through
BJY_const_list = [
    'Raichur', 'Raichur Rural', 'Bellary', 'Bellary City', 'Molakalmuru', 'Challakere', 'Hiriyur', 'Chiknayakanhalli',
    'Tiptur', 'Turuvekere', 'Nagamangala', 'Melukote', 'Mandya', 'Shrirangapattana', 'Chamundeshwari', 'Krishnaraja',
    'Chamaraja', 'Narasimharaja', 'Varuna', 'Nanjangud', 'Gundlupet'
]

# Step 1: Calculate vote share for INC in each constituency for 2018 and 2023

# Function to calculate vote share per constituency
def calculate_vote_share(candid_df, year):
    # Filter for INC candidates
    inc_votes = candid_df[candid_df['party'] == 'INC'][['constituency', 'votes']]
    inc_votes.rename(columns={'votes': 'inc_votes'}, inplace=True)
    
    # Total votes per constituency
    total_votes = candid_df.groupby('constituency')['votes'].sum().reset_index()
    total_votes.rename(columns={'votes': 'total_votes'}, inplace=True)

    # Merge INC votes with total votes
    vote_share = pd.merge(inc_votes, total_votes, on='constituency', how='right')
    vote_share['inc_votes'] = vote_share['inc_votes'].fillna(0)
    vote_share['inc_vote_share'] = (vote_share['inc_votes'] / vote_share['total_votes']) * 100
    vote_share['year'] = year

    print(vote_share[['constituency', 'inc_vote_share', 'year']], '\n\n')
    
    return vote_share[['constituency', 'inc_vote_share', 'year']]

# Calculate vote share for 2018 and 2023
vote_share_2018 = calculate_vote_share(candid18, 2018)
vote_share_2023 = calculate_vote_share(candid23, 2023)

# Step 2: Merge the vote share data for 2018 and 2023
vote_share = pd.merge(
    vote_share_2018, 
    vote_share_2023, 
    on='constituency', 
    suffixes=('_2018', '_2023')
)

# Step 3: Calculate the change in vote share for INC from 2018 to 2023
vote_share['inc_vote_share_change'] = vote_share['inc_vote_share_2023'] - vote_share['inc_vote_share_2018']

# Step 4: Label constituencies based on BJY passage
vote_share['BJY_Passed_Through'] = vote_share['constituency'].apply(
    lambda x: 'BJY Passed Through' if x in BJY_const_list else 'BJY Did Not Pass Through'
)

# Step 5: Preview the prepared data
vote_share.drop_duplicates(subset=['constituency'], keep='first', inplace=True)
vote_share


## Emerging Bi-Polarity in Karnataka

### Power Shift: National vs. Regional Parties (Vote Share of Top two Parties)

In [169]:
def compute_party_stats(candid_df, const_df, year, parties=['BJP', 'INC']):
    # Calculate the total votes in the election
    total_votes = candid_df['votes'].sum()

    # Filter candidate data for the specified parties
    candid_parties = candid_df[candid_df['party'].isin(parties)]

    # Compute total votes obtained by each party
    party_votes = candid_parties.groupby('party')['votes'].sum().reset_index(name='party_total')

    # Add total votes and calculate vote share percentage
    party_votes['total_votes'] = total_votes
    party_votes['vote_share_percent'] = round((party_votes['party_total'] / total_votes) * 100, 2)
    party_votes['year'] = year

    # Compute the number of constituencies won by each party
    const_parties = const_df[const_df['party'].isin(parties)]
    const_won = const_parties.groupby('party').size().reset_index(name='constituencies_won')

    # Merge party votes and constituencies won data
    party_stats = pd.merge(party_votes, const_won, on='party')
    return party_stats

# Compute stats for each year
party_stats_2013 = compute_party_stats(candid13, const13, 2013)
party_stats_2018 = compute_party_stats(candid18post, const18post, 2018)
party_stats_2023 = compute_party_stats(candid23, const23, 2023)

# Combine the results for all years
party_stats_all_years = pd.concat([party_stats_2013, party_stats_2018, party_stats_2023], ignore_index=True)

# Reorder columns to match the SQL output
party_stats_all_years = party_stats_all_years[['party', 'party_total', 'total_votes', 'vote_share_percent', 'year', 'constituencies_won']]

# Display the final DataFrame
party_stats_all_years

# Compute combined stats per year
combined_stats = party_stats_all_years.groupby('year').agg({
    'vote_share_percent': 'sum',
    'constituencies_won': 'sum'
}).reset_index()

# Rename columns for clarity
combined_stats.rename(columns={
    'vote_share_percent': 'combined_vote_share_percent',
    'constituencies_won': 'combined_constituencies_won'
}, inplace=True)

combined_stats['combined_vote_share_percent'] = round(combined_stats['combined_vote_share_percent'], 2)

# Merge combined stats back into the main DataFrame
party_stats_all_years = pd.merge(party_stats_all_years, combined_stats, on='year')

party_stats_all_years


Unnamed: 0,party,party_total,total_votes,vote_share_percent,year,constituencies_won,combined_vote_share_percent,combined_constituencies_won
0,BJP,6231660.0,30694374.0,20.3,2013,40,56.97,162
1,INC,11254487.0,30694374.0,36.67,2013,122,56.97,162
2,BJP,14036584.0,36713289.0,38.23,2018,118,74.76,187
3,INC,13413167.0,36713289.0,36.53,2018,69,74.76,187
4,BJP,14210103.0,38823869.0,36.6,2023,66,79.4,201
5,INC,16616300.0,38823869.0,42.8,2023,135,79.4,201


In [171]:
fig = px.bar(
    party_stats_all_years,
    y='vote_share_percent',
    x='year',
    title='Vote Share of Top Two Parties',
    color='party',
    labels={'vote_share_percent': 'Vote Share Percent'},
    color_discrete_map={'INC': '#0062C6', 'BJP': '#FF7500'},
    custom_data=['combined_vote_share_percent', 'combined_constituencies_won']
)

fig.update_traces(
    hovertemplate=(
        'Year: %{x}<br>'
        'Combined Vote Share: %{customdata[0]}%<br>'
        'Combined Constituencies Won: %{customdata[1]} of 224<extra></extra>'
    )
)


# Adjust the layout to set tickvals for x-axis and the gap between bars
fig.update_layout(
    bargap=0.9,  # Adjust the gap between bars
    bargroupgap=0.2,
    xaxis_title="Year",
    yaxis_title="Total Vote Share (%)",
    xaxis=dict(
        tickvals=df['year'].unique(),
        tickfont=dict(family="Cambria", size=14, color="black"),
        titlefont=dict(family="Cambria", size=16, color="black"),
        fixedrange=True,
        showgrid=False,
    ),
    yaxis=dict(
        tickvals = [0,20,40,60,80,100],
        range = [0,100],
        tickfont=dict(family="Cambria", size=14, color="black"),
        titlefont=dict(family="Cambria", size=16, color="black"),
        fixedrange=True,
        showgrid=True,
        zeroline=True,
        gridcolor='lightgrey',
    ),
    title = dict(
        font = dict(family="Cambria", size=18, color="black"),
        text = "Combined Total Vote Share (%) Of Top 2 Parties Over the Years",  
        xanchor="center",
        x = 0.5,
        yanchor="top",
        y = 0.95
    ),
    legend = dict(
        title = dict(
            text = 'Party',
            font = dict(family="Cambria", size=14, color="black")
        ),
        font=dict(family="Cambria", size=14, color="black"),
        orientation="h",
        xanchor = 'center',
        x = 0.5,
        yref = 'container',
        yanchor = 'top',
        y = 0.85
    ),
    autosize=True,
    dragmode=False,
    plot_bgcolor = "#f8f8f8",
    paper_bgcolor = "#f8f8f8",
    margin = dict(pad=10, t=50, b=20, l=20, r=20), 
    legend_itemclick=False,  # Disable legend click actions
    legend_itemdoubleclick=False,
)

pio.write_json(fig, 'plots/votesharetoptwoparties.json')
fig

### ENOP (Effective number of Parties)

In [4]:
def calculate_enop_vote_share(candid_df, const_df, year):
    # Total votes in the election
    total_votes = candid_df['votes'].sum()
    
    # Total votes per party
    party_votes = candid_df.groupby('party')['votes'].sum().reset_index()
    party_votes['vote_share_percent'] = party_votes['votes'] / total_votes
    
    # Number of constituencies won by each party
    constituencies_won = const_df.groupby('party').size().reset_index(name='constituencies_won')
    
    # Merge party votes and constituencies won
    party_stats = pd.merge(party_votes, constituencies_won, on='party', how='inner')
    party_stats['year'] = year
    party_stats['total_votes'] = total_votes
    
    # Calculate the maximum vote share percent for the year
    max_vote_share_percent = party_stats['vote_share_percent'].max()
    party_stats['max_vote_share_percent'] = max_vote_share_percent
    
    # Apply the ENOP formula for vote share
    party_stats['ENOP'] = 1 / (1 + (max_vote_share_percent**2 / party_stats['vote_share_percent']) - party_stats['vote_share_percent'])
    
    return party_stats[['year', 'ENOP']]

def calculate_enop_seats(const_df, year):
    total_constituencies = 224  # Total number of constituencies in Karnataka
    
    # Number of constituencies won by each party
    party_constituencies = const_df.groupby('party').size().reset_index(name='constituencies_won')
    party_constituencies['constituency_percent'] = party_constituencies['constituencies_won'] / total_constituencies
    party_constituencies['year'] = year
    
    # Calculate the maximum constituency percent for the year
    max_constituency_percent = party_constituencies['constituency_percent'].max()
    party_constituencies['max_constituency_percent'] = max_constituency_percent
    
    # Apply the ENOP formula for number of seats
    party_constituencies['ENOP'] = 1 / (1 + (max_constituency_percent**2 / party_constituencies['constituency_percent']) - party_constituencies['constituency_percent'])
    
    return party_constituencies[['year', 'ENOP']]

# Calculate ENOP with respect to vote share for each election year
enop_vote_share_2013 = calculate_enop_vote_share(candid13, const13, 2013)
enop_vote_share_2018 = calculate_enop_vote_share(candid18post, const18post, 2018)
enop_vote_share_2023 = calculate_enop_vote_share(candid23, const23, 2023)

# Concatenate ENOP vote share results
enop_vote_share = pd.concat([enop_vote_share_2013, enop_vote_share_2018, enop_vote_share_2023], ignore_index=True)

# Sum ENOP per year and round to two decimal places
enop_vote_share = enop_vote_share.groupby('year')['ENOP'].sum().reset_index()
enop_vote_share['ENOP'] = enop_vote_share['ENOP'].round(2)
enop_vote_share['Type'] = 'With respect to vote share'

# Calculate ENOP with respect to number of seats for each election year
enop_seats_2013 = calculate_enop_seats(const13, 2013)
enop_seats_2018 = calculate_enop_seats(const18post, 2018)
enop_seats_2023 = calculate_enop_seats(const23, 2023)

# Concatenate ENOP seats results
enop_seats = pd.concat([enop_seats_2013, enop_seats_2018, enop_seats_2023], ignore_index=True)

# Sum ENOP per year and round to two decimal places
enop_seats = enop_seats.groupby('year')['ENOP'].sum().reset_index()
enop_seats['ENOP'] = enop_seats['ENOP'].round(2)
enop_seats['Type'] = 'With respect to number of seats'

# Combine both ENOP results
enop_results = pd.concat([enop_vote_share, enop_seats], ignore_index=True)

# Display the final ENOP results
enop_results

Unnamed: 0,year,ENOP,Type
0,2013,3.4,With respect to vote share
1,2018,2.82,With respect to vote share
2,2023,2.56,With respect to vote share
3,2013,2.11,With respect to number of seats
4,2018,2.05,With respect to number of seats
5,2023,1.76,With respect to number of seats


In [5]:
fig2 = px.bar(enop_results, y='ENOP', x='year',
            title='Effective Number of Parties Over the Years',
            labels={'ENOP':'Effective Number of Parties'},
            barmode='group',
            color='Type',
            color_discrete_sequence=px.colors.qualitative.Plotly,)

fig2.update_layout(
    bargap=0.65,
    bargroupgap=0.2,
    xaxis_title="Year",
    yaxis_title="Effective Number of Parties",
    xaxis=dict(
        tickvals=enop_results['year'].unique(),
        tickfont=dict(family="Cambria", size=14, color="black"),
        titlefont=dict(family="Cambria", size=16, color="black"),
        fixedrange=True,
        showgrid=False,
    ),
    yaxis=dict(
        range=[0, 3.5],
        tickvals = [0,0.5,1.0,1.5,2,2.5,3,3.5],
        tickfont=dict(family="Cambria", size=14, color="black"),
        titlefont=dict(family="Cambria", size=16, color="black"),
        fixedrange=True,
        showgrid=True,
        zeroline=True,
        gridcolor='lightgrey',
    ),
    title = dict(
        font = dict(family="Cambria", size=18, color="black"),
        text = "Effective Number of Parties Over the Years",  
        xanchor="center",
        x = 0.5,
        yanchor="top",
        y = 0.95
    ),
    legend = dict(
        title = dict(
            text = 'Type',
            font = dict(family="Cambria", size=14, color="black")
        ),
        font=dict(family="Cambria", size=14, color="black"),
        orientation="h",
        xanchor = 'center',
        x = 0.5,
        yref = 'container',
        yanchor = 'top',
        y = 0.85
    ),
    autosize=True,
    dragmode=False,
    plot_bgcolor = "#f8f8f8",
    paper_bgcolor = "#f8f8f8",
    margin = dict(pad=10, t=50, b=20, l=20, r=20), 
    legend_itemclick=False,  # Disable legend click actions
    legend_itemdoubleclick=False,
)

pio.write_json(fig2, 'plots/enop.json')

fig2

### Paradox of Participation

In [7]:
# Calculate unique number of parties for each year
unique_parties_13 = candid13['party'].nunique()
unique_parties_18 = candid18['party'].nunique()
unique_parties_23 = candid23['party'].nunique()

# Create a dataframe with the results
party_counts_df = pd.DataFrame({
    'Year': [2013, 2018, 2023],
    'Number of Parties': [unique_parties_13, unique_parties_18, unique_parties_23]
})

# Display the resulting dataframe
party_counts_df

Unnamed: 0,Year,Number of Parties
0,2013,59
1,2018,91
2,2023,91


In [9]:
# Count the number of independent candidates for each year
independent_count_13 = candid13[candid13['party'].isin(['IND', 'Independent'])].shape[0]
independent_count_18 = candid18[candid18['party'].isin(['IND', 'Independent'])].shape[0]
independent_count_23 = candid23[candid23['party'].isin(['IND', 'Independent'])].shape[0]

# Create a dataframe with the results
independent_counts_df = pd.DataFrame({
    'Year': [2013, 2018, 2023],
    'Number of Independent Candidates': [independent_count_13, independent_count_18, independent_count_23]
})

# Display the resulting dataframe
independent_counts_df


Unnamed: 0,Year,Number of Independent Candidates
0,2013,1157
1,2018,1107
2,2023,905
