In [37]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [38]:
# suppress warnings
import warnings
warnings.filterwarnings('ignore')

In [39]:
data = pd.read_csv('clustered.csv')
data.head()

Unnamed: 0,Constituency_Number,Constituency_Name,Districts,Classification,Caste Majority,1983,1985,1989,1994,1999,VM_1999,2004,VM_2004,2009,VM_2009,2014,VM_2014,2019,VM_2019,predicted_cluster
0,1,ICHCHAPURAM,SRIKAKULAM,RURAL,OBC,TDP,TDP,TDP,TDP,TDP,0,INC,1,TDP,0,TDP,2,TDP,1,0
1,2,PALASA,SRIKAKULAM,RURAL,OBC,INC,TDP,IND,TDP,TDP,2,TDP,2,INC,1,TDP,2,YSRCP,2,4
2,3,TEKKALI,SRIKAKULAM,RURAL,OBC,TDP,TDP,TDP,TDP,TDP,1,INC,2,INC,0,TDP,1,TDP,1,0
3,4,PATHAPATNAM,SRIKAKULAM,RURAL,FC,TDP,INC,TDP,TDP,TDP,2,TDP,0,INC,2,YSRCP,0,YSRCP,2,2
4,5,SRIKAKULAM,SRIKAKULAM,SEMI_URBAN,OBC,TDP,TDP,TDP,TDP,TDP,2,INC,1,INC,0,TDP,2,YSRCP,1,0


In [40]:
DF = pd.DataFrame(data)
# make clusters
cluster_0 = DF[DF['predicted_cluster']==0]
cluster_1 = DF[DF['predicted_cluster']==1]
cluster_2 = DF[DF['predicted_cluster']==2]
cluster_3 = DF[DF['predicted_cluster']==3]
cluster_4 = DF[DF['predicted_cluster']==4]
cluster_5 = DF[DF['predicted_cluster']==5]

In [41]:
# generate absolute metrics
Seats = 162
Years = ['1999','2004','2009','2014','2019']

# overall seat share [TDP,YSRCP,OTHERS] by years
TDP_SS = {}
YSRCP_SS = {}
INC_SS = {}
OTHERS_SS = {}
for year in Years:
    df_TDP = DF[DF[year]=='TDP']
    df_BJP = DF[DF[year]=='BJP']
    TDP_SS[year] = (len(df_TDP.index) + len(df_BJP.index))/Seats
    df_YSRCP = DF[DF[year]=='YSRCP']
    YSRCP_SS[year] = len(df_YSRCP.index)/Seats
    df_INC = DF[DF[year]=='INC']
    INC_SS[year] = len(df_INC.index)/Seats
    OTHERS_SS[year] = 1 - (TDP_SS[year]+YSRCP_SS[year]+INC_SS[year])

# margin buckets by year
close_margins = {}
intermediate_margins = {}
large_margins = {}
for year in Years:
    m1 = DF[DF['VM_'+year]==0]
    close_margins['VM_'+year] = len(m1.index)/Seats
    m2 = DF[DF['VM_'+year]==1]
    intermediate_margins['VM_'+year] = len(m2.index)/Seats
    m3 = DF[DF['VM_'+year]==2]
    large_margins['VM_'+year] = len(m3.index)/Seats

# caste distribution
Castes = {}
Castes['FC'] = len(DF[DF['Caste Majority']=='FC'].index)/Seats
Castes['OBC'] = len(DF[DF['Caste Majority']=='OBC'].index)/Seats
Castes['SC'] = len(DF[DF['Caste Majority']=='SC'].index)/Seats
Castes['ST'] = len(DF[DF['Caste Majority']=='ST'].index)/Seats


In [48]:
# set plotting theme
import matplotlib as mpl
COLOR = 'white'
mpl.rcParams['text.color'] = COLOR
mpl.rcParams['axes.labelcolor'] = COLOR
mpl.rcParams['xtick.color'] = COLOR
mpl.rcParams['ytick.color'] = COLOR
import matplotlib.pyplot as plt
plt.rcParams['axes.facecolor'] = '#323A48'
import numpy as np

# Cluster Analysis Function
def Analyze(cluster_no):
    # Generate Seat-share index
    TDP_no = {}
    YSRCP_no = {}
    OTHERS_no = {}
    INC_no = {}
    for year in Years:
        # TDP
        TDP_no[year] = len(cluster_no[cluster_no[year]=='TDP'].index)
        TDP_no[year] += len(cluster_no[cluster_no[year]=='BJP'].index)
        TDP_no[year] /= len(cluster_no.index)
        if TDP_SS[year] != 0:
            TDP_no[year] /= TDP_SS[year]
        # YSRCP
        YSRCP_no[year] = len(cluster_no[cluster_no[year]=='YSRCP'].index) 
        YSRCP_no[year] /= len(cluster_no.index)
        if YSRCP_SS[year] != 0:
            YSRCP_no[year] /= YSRCP_SS[year]
        # INC
        INC_no[year] = len(cluster_no[cluster_no[year]=='INC'].index)/len(cluster_no.index)
        if INC_SS[year] != 0:
            INC_no[year] /= INC_SS[year]
        # Others
        df_TDP = cluster_no[cluster_no[year]=='TDP']
        df_BJP = cluster_no[cluster_no[year]=='BJP']
        df_YSRCP = cluster_no[cluster_no[year]=='YSRCP']
        df_INC = cluster_no[cluster_no[year]=='INC']
        rem = len(cluster_no.index) 
        rem -= (len(df_TDP.index)+len(df_BJP.index)+len(df_YSRCP.index)+len(df_INC.index))
        OTHERS_no[year] = rem/len(cluster_no.index)
        if OTHERS_SS[year] != 0:
            OTHERS_no[year] /= OTHERS_SS[year]

    # Generating Margin index
    close_no = {}
    intermediate_no = {}
    large_no = {}
    for year in Years:
        close_no[year] = len(cluster_no[cluster_no['VM_'+year]==0].index)/len(cluster_no.index)
        if close_margins['VM_'+year] != 0:
            close_no[year] /= close_margins['VM_'+year]
        intermediate_no[year] = len(cluster_no[cluster_no['VM_'+year]==1].index)/len(cluster_no.index)
        if intermediate_margins['VM_'+year] != 0:
            intermediate_no[year] /= intermediate_margins['VM_'+year]
        large_no[year] = len(cluster_no[cluster_no['VM_'+year]==2].index)/len(cluster_no.index)
        if large_margins['VM_'+year] != 0:
            intermediate_no[year] /= large_margins['VM_'+year]

    # Generating Caste index
    FC_no = len(cluster_no[cluster_no['Caste Majority']=='FC'].index)/len(cluster_no.index)
    FC_no /= Castes['FC']
    OBC_no = len(cluster_no[cluster_no['Caste Majority']=='OBC'].index)/len(cluster_no.index)
    OBC_no /= Castes['OBC']
    SC_no = len(cluster_no[cluster_no['Caste Majority']=='SC'].index)/len(cluster_no.index)
    SC_no /= Castes['SC']
    ST_no = len(cluster_no[cluster_no['Caste Majority']=='ST'].index)/len(cluster_no.index)
    ST_no /= Castes['ST']
    
    # plotting Seat_Share index (Yearwise) for cluster
    fig, ax = plt.subplots(figsize = (10,7.5))
    fig.suptitle('Cluster Size: '+str(len(cluster_no.index)), fontsize=20)
    fig.subplots_adjust(top = 0.85)
    N = 5
    ind = np.linspace(1,10,5)
    width = 0.2
    p1 = ax.bar(ind,TDP_no.values(),width,color='yellow')
    p2 = ax.bar(ind+width,YSRCP_no.values(),width,color='#0b6095')
    p3 = ax.bar(ind+2*width,INC_no.values(),width,color='#83ade0')
    p4 = ax.bar(ind+3*width,OTHERS_no.values(),width,color='grey')
    
    ax.set_title("Seat Share Index Over The Years")
    xticks = [(i+1.5*width) for i in ind]
    ax.set_xticks(xticks)
    ax.set_xticklabels(Years)
    ax.legend((p1[0],p2[0],p3[0],p4[0]),('TDP','YSRCP','INC','OTHERS'),loc = 'upper right',bbox_to_anchor=(1.25,0.5))
    ax.autoscale_view()
    plt.show()

    # plotting Margin-bucket index (Yearwise) for cluster
    fig, ax = plt.subplots(figsize = (10,7.5))
    p1 = ax.bar(ind,close_no.values(),width,color='yellow')
    p2 = ax.bar(ind+width,intermediate_no.values(),width,color='orange')
    p3 = ax.bar(ind+2*width,large_no.values(),width,color='red')
    ax.set_title("Margin-bucket Index Over The Years")
    xticks = [(i+width) for i in ind]
    ax.set_xticks(xticks)
    ax.set_xticklabels(Years)
    # BUCKETS
    close = 'CLOSE: <5000'
    intermediate = 'INTERMEDIATE: <10000'
    large = 'LARGE: >10000'
    ax.legend((p1[0],p2[0],p3[0]),(close,intermediate,large),loc = 'upper right',bbox_to_anchor=(1.4,0.5))
    ax.autoscale_view()
    plt.show()

    # plotting Caste-Wise index of cluster
    ind = np.arange(4)
    fig, ax = plt.subplots(figsize = (10,7.5))
    p1 = ax.bar(ind[0],FC_no,width,color='#78aae1')
    p2 = ax.bar(ind[1],OBC_no,width,color='#c993dd')
    p3 = ax.bar(ind[2],SC_no,width,color='#686db3')
    p4 = ax.bar(ind[3],ST_no,width,color='white')
    ax.set_title("Caste Index")
    ax.set_xticks(ind)
    ax.set_xticklabels(Castes.keys())
    ax.legend((p1[0],p2[0],p3[0],p4[0]),('FC','OBC','SC','ST'),loc = 'upper right',bbox_to_anchor=(1.25,0.5))
    ax.autoscale_view()
    plt.show()
