In [1]:
import os
import pandas as pd
import numpy as np
from operator import itemgetter
from datetime import datetime
start_time = datetime.now()

In [2]:
current_path = os.getcwd()
directory = 'Data'
path = os.path.join(current_path,directory)
if not os.path.exists(path):
    os.makedirs(path)

In [3]:
def read_file(file_name,path):
    file_path = path+ '\\'+file_name
    if file_path.endswith('.xlsx'):
        data = pd.read_excel(file_path)
        print('Input is read succesfully')
        return data
    elif file_path.endswith('.csv'):
        data = pd.read_csv(file_path)
    else:
        print('Input file not supported')
        
data= read_file(file_name='MH_Retro_Data.xlsx',path=path)

Input is read succesfully


In [4]:

data.rename(columns={'Election Type': 'Election_Type', 'District':'District_Name','PC Name':'PC_Name', 'Vote Share Percentage':'Vote_Share_Percentage'},inplace=True)

In [5]:
def length_of_column(df):
    try:
        return df.shape[0]
    except:
        print('Default Length of column is 0')

In [6]:
def election_column(data):
    number_of_rows = length_of_column(data)
    for i in range(number_of_rows):
        data.loc[i,'election'] = str(data.loc[i,'Year'].astype(float)) + data.loc[i,'Election_Type'] 
    
    return data

In [7]:
def unique_elections_of_the_state(dataframe):
    unique_elections = dataframe['election'].unique().tolist()
    return unique_elections

In [8]:
# filters out unique districts/acs/pcs based on parameter given

def unique_values(parameter,data):
    
    if parameter.lower() == 'district':
        unique_params = data['District_Name'].unique().tolist()
       
    
    elif parameter.lower() == 'pc':
        unique_params = data['PC_Name'].unique().tolist()
        

    elif parameter.lower() == 'ac':
        unique_params = data['AC Name'].unique().tolist()
    
    return unique_params

In [9]:
def rename_the_columns(parameter,data):
    
    if parameter.lower() == 'district':
        column_name = 'District_Name'
     
    elif parameter.lower() == 'pc': 
        column_name = 'PC_Name'
    
    elif parameter.lower() == 'ac':
        column_name = 'AC Name'
        
    
    return column_name

In [10]:
# given parameter and election this filter's out vote share percentages

def list_of_vote_share(election,parameter,data,place):
    vs_percentages = []
    column_name = rename_the_columns(parameter,data)
    if parameter.lower() == 'district' or  parameter.lower() == 'pc':
        
        unique_parties = data[(data[column_name]==place) & (data['election'] == election)]['Party Name'].unique().tolist()
        if len(unique_parties) >=1:
            for i in unique_parties:
                vs_percentages.append(data[(data[column_name]==place) & (data['election'] == election) & (data['Party Name']==i)]['Vote_Share_Percentage'].mean())
                
            
    elif parameter.lower() == 'ac':
        vs_percentages = data[(data[column_name]==place) & (data['election'] == election)]['Vote_Share_Percentage']
        
    return vs_percentages

In [11]:
# calculates and returns enop
def Effective_Number_of_Parties(vs_percentages):
    denom = [(x/100)**2 for x in vs_percentages]
    denom = np.sum(denom)
    if denom != 0:
        enop = 1/denom
        
    return enop

In [12]:
def enop_appender(data, parameter):
    i = 0
    resp = []
    data = election_column(data)
    unique_elections = unique_elections_of_the_state(data)
    unique_params = unique_values(parameter,data)
    
    while i <= len(unique_elections)-1:
        election_val = unique_elections[i]
        for j in unique_params:
            vs_perc = list_of_vote_share(election_val,parameter,data,j)
            if len(vs_perc)>=1:
                val = Effective_Number_of_Parties(vs_perc)
                resp.append({'Election': election_val,
                             f'{parameter}': j,
                             'ENOP': val})
         
        i+=1
        
    return resp

In [13]:
# calculates all parties state vs percentage mean for that particular election

def state_average_of_parties(data, election):
    unique_parties = data[data['election'] == election]['Party Name'].unique().tolist()
    state_party_averages = {}
    for j in unique_parties:
        party_state_average = data[data['Party Name'] == j]['Vote_Share_Percentage'].mean()
        state_party_averages[j] = party_state_average
    return state_party_averages

In [14]:
def winner_runner_of_ac(election,data):
    
    winruns = data[data['election'] == election]['Vote_Share_Percentage'].sort_values(ascending=False).tolist()
    winner_party =  data[(data['election'] == election) & (data['Vote_Share_Percentage'] == winruns[0])]['Party Name'].iloc[0]
    winner_vs_percentage = winruns[0]
    runner_party = data[(data['election'] == election) & (data['Vote_Share_Percentage'] == winruns[1] )]['Party Name'].iloc[0]
    runner_vs_percentage = winruns[1]
     
    return (winner_party,winner_vs_percentage,runner_party,runner_vs_percentage)

In [15]:
def winner_runner_of_districts_or_pcs(election,data):
      # calculates winner and runners of district/pc
    vs = []
    parties = data[data['election'] == election]['Party Name'].unique().tolist()
    for i in parties:
        vs_mean= data[(data['election'] == election) & (data['Party Name']==i)]['Vote_Share_Percentage'].mean()
        vs.append({'Party Name':i,
                          'Vote Share': vs_mean,
                          'Election':election})



    winsruns = sorted(vs,key=itemgetter('Vote Share'),reverse=True)
    winner_party = winsruns[0]['Party Name']
    winner_vs_percentage = winsruns[0]['Vote Share'] 
    runner_party = winsruns[1]['Party Name']
    runner_vs_percentage = winsruns[1]['Vote Share']
    
    return (winner_party,winner_vs_percentage,runner_party,runner_vs_percentage)

In [16]:
# return cpvi value of party
def party_cpvi(party, party_vs_percentage,election,data, savg):
    delta = round((party_vs_percentage) - (savg.get(party)),2)
    if delta >= 0:
        sign = '+'

    elif delta < 0:
        sign = '-'

    cpvi_val = party + sign + str(abs(delta))

    return cpvi_val,delta

In [17]:
def cpvi_appender(data,parameter):
    data = election_column(data)
    unique_elections = unique_elections_of_the_state(data)
    unique_params = unique_values(parameter, data)
    column_name = rename_the_columns(parameter,data)
    j = 0
    resp = []
    while j < len(unique_elections):
        election = unique_elections[j]
        

        for i in unique_params:
           
            savg = state_average_of_parties(data,election)
           
            try:
                if parameter.lower() == 'ac':
                    winner_party_name, winner_vs_percentage, runner_party_name,runner_vs_percentage = winner_runner_of_ac(election, data[data[column_name] == i])
                    
                elif parameter.lower()=='district' or parameter.lower()=='pc':
                    winner_party_name, winner_vs_percentage, runner_party_name,runner_vs_percentage = winner_runner_of_districts_or_pcs(election, data[data[column_name] == i])
               
                winner_cpvi, winner_delta = party_cpvi(winner_party_name,winner_vs_percentage,election, data[data[column_name] == i], savg)
              
                runner_cpvi, runner_delta = party_cpvi(runner_party_name,runner_vs_percentage, election, data[data[column_name] == i], savg)
            
                resp.append({'Winner CPVI': winner_cpvi,
                             'Winner Delta': winner_delta,
                             'Runner CPVI': runner_cpvi,
                             'Runner Delta': runner_delta,
                             'Election': election,
                             f'{parameter}': i,
                            'winner_vs_percentage':winner_vs_percentage,
                            'runner_vs_percentage':runner_vs_percentage})
            
            except IndexError:
                continue

        j += 1

    return resp

In [18]:
cpvi_out = pd.DataFrame()
enop_out = pd.DataFrame()
for m in ['pc','district', 'ac']:
    cpvi_out = pd.concat([cpvi_out,pd.DataFrame(cpvi_appender(data,m))])
    enop_out = pd.concat([enop_out,pd.DataFrame(enop_appender(data,m))])

In [19]:
enop_out

Unnamed: 0,Election,pc,ENOP,district,ac
0,2009.0AE,Nandurbar,1.295777,,
1,2009.0AE,Dhule,1.196506,,
2,2009.0AE,Raver,1.586241,,
3,2009.0AE,Jalgaon,1.636502,,
4,2009.0AE,Buldhana,1.428257,,
...,...,...,...,...,...
1733,2019.0Bypolls,,2.031976,,Patan
1734,2019.0Bypolls,,1.991080,,Satara
1735,2021.0Bypolls,,2.213554,,Deglur
1736,2021.0Bypolls,,2.229723,,Pandharpur


In [20]:
cpvi_out

Unnamed: 0,Winner CPVI,Winner Delta,Runner CPVI,Runner Delta,Election,pc,winner_vs_percentage,runner_vs_percentage,district,ac
0,NCP+23.15,23.15,SP+40.16,40.16,2009.0AE,Nandurbar,54.880000,42.9400,,
1,JSS+38.51,38.51,LKSGM+35.14,35.14,2009.0AE,Dhule,47.040000,43.6700,,
2,NCP+11.67,11.67,BJP-0.4,-0.40,2009.0AE,Raver,43.400000,40.5575,,
3,NCP+15.41,15.41,SHS+12.45,12.45,2009.0AE,Jalgaon,47.132500,47.0225,,
4,SHS+11.46,11.46,NCP+10.46,10.46,2009.0AE,Buldhana,46.033333,42.1850,,
...,...,...,...,...,...,...,...,...,...,...
1733,NCP+24.33,24.33,BJP+1.2,1.20,2019.0Bypolls,,56.060000,42.1600,,Patan
1734,BJP+19.45,19.45,NCP+5.29,5.29,2019.0Bypolls,,60.410000,37.0200,,Satara
1735,INC+26.5,26.50,BJP-5.9,-5.90,2021.0Bypolls,,57.030000,35.0600,,Deglur
1736,BJP+7.19,7.19,NCP+14.78,14.78,2021.0Bypolls,,48.150000,46.5100,,Pandharpur


In [21]:
end_time = datetime.now()
print('Duration: {}'.format(end_time - start_time))

Duration: 0:06:47.013395
