# EXPLORATORY DATA ANALYSIS (EDA) OF WEST BENGAL POLL 2021

### DATA SOURCE 
https://www.kaggle.com/sugandhkhobragade/west-bengal-election-data

### Gitgub Link of this project is available at 
https://github.com/diptaraj23/EXPLORATORY-DATA-ANALYSIS-EDA-OF-WEST-BENGAL-POLL-2021

### Importing required Libraries

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

### Reading the Data

In [None]:
data= pd.read_csv('../input/west-bengal-election-data/west_bengal.csv')
data

In [None]:
data.shape #Shape of the dataset

In [None]:
data.info() #information about the dataset

In [None]:
data.isna().sum() #Checking for null values

In [None]:
data.describe() #description of the dataset

### Constituency 

In [None]:
data['constituency'].nunique() # Total Constituency 

In [None]:
constituency = data['constituency'].value_counts().head(20)

fig = px.bar(x=constituency.index, y=constituency, height=500, width=900, title='Top 20 Constituency Names')

fig.update_traces(marker = dict(color = "rgb(200, 200, 80)",
                            line = dict(color = "rgb(200, 0, 200)", width = 0.5)))
fig.show()

### Total number of candidates by each party

In [None]:
total_candidates_by_party = data.groupby('party')['candidate'].nunique().reset_index(name='total candidates').sort_values(by='total candidates' , ascending=False)
total_candidates_by_party.index = pd.RangeIndex(len(total_candidates_by_party.index))
total_candidates_by_party.style.background_gradient(cmap='rainbow')

#### So IND is having the maximum nuymber of candidates which is 137 followed by AITC and BJP each of which having 90 candidates.

In [None]:
party_list = total_candidates_by_party['party'].to_list() # listing all the party names
total_candidates_list = total_candidates_by_party['total candidates'].to_list() # listing number of candidates by each party

## Crime Analysis

### Parties having the total number of criminal cases and criminals (most to least)

In [None]:
crime = data.groupby('party')['criminal_cases'].sum().reset_index().sort_values(by='criminal_cases', ascending=False).head(30)

criminal_count =[]
for i in range(len(crime)):
    count = 0
    for j in range (len(data)):
        if(crime['party'].values[i] == data['party'].values[j]) and (data['criminal_cases'].values[j] > 0):
            count = count + 1
    criminal_count.append(count)
criminal_count  
crime['Total Criminals'] = criminal_count


crime.style.background_gradient(cmap='Reds')

#### BJP candidadtes have the highest number of criminal cases (adding number of criminal cases of all its candidates) which is 192 and also the maximum number of criminals are from BJP which is 48 followed by CPI(M) and AITC

### Parties having the maximum criminal cases againt a single person (most to least)

In [None]:
criminal = data[data['criminal_cases']!=0]
criminal =criminal.groupby(['candidate','party'])['criminal_cases'].max().reset_index().sort_values(by='criminal_cases',ascending=False)
criminal.style.background_gradient(cmap='Blues')

#### Barun Pramanik (Chitta) from BJP has the highest number of criminal cases against him which is 27 followed by Himangshu Das from CPI(M) having 24 cases agianst him.

## Education Analysis

## Bar chart showing the distribution of educational degrees (considering all the candidates)

In [None]:
line_colors = ['#521262' ,'#6639a6' , '#3490de' , '#6fe7dd' , '#a8e6cf' ,'#fdffab' , '#ffd3b6' , '#f73859' , '#155263' ,'#222831' ,'#ffc93c' ]
fig = px.pie(values=data['education'].value_counts(), names=data['education'].value_counts().index, height=500)

fig.update_traces(pull=[0.0,0.0,0.1,0.0], hole=.2, hoverinfo="label+percent", marker=dict(colors=line_colors, line=dict(color='black', width=2)))

fig['layout'].update(title='Education Distribution using Pie Cahrt', titlefont_size=20)

fig.show()

### Bar chart showing the distribution of educational degrees (considering all the candidates)

In [None]:
a4_dims = (19, 7.5)
fig, ax = plt.subplots(figsize=a4_dims)
sns.set(style='darkgrid')
sns.countplot(x ='education', 
              data = data , 
              order = data['education'].value_counts().index ,
              palette ="prism")
fig.suptitle('Education Distribution using Bar Chart', fontsize=25)
plt.show()

### Count of the number of unique degrees(by candidates) by each party along with the percentage value of its total candidates

In [None]:
party_wise_education = data.groupby(['party','education'])['education'].count().reset_index(name='education_count').sort_values(by='education_count', ascending=False)


total_education_percentage = []
for i in range (len(party_list)):
    for j in range (len(party_wise_education)):
        if (party_list[i] == party_wise_education['party'].values[j]):
            TP = ((party_wise_education['education_count'].values[j] / total_candidates_list[i])*100)
            total_education_percentage.append(TP)
total_education_percentage
party_wise_education['percentage_value'] = total_education_percentage
party_wise_education


party_wise_education.style.background_gradient(cmap='inferno_r')

#### AITC party has 3 candidates with a Doctorate(highest education qualification degree), which is the highest amongst all parties.
#### AITC party has 26 graduates and 17 post graduates.


#### BJP party has 2 candidates with a Doctorate , 27 graduates and 20 post graduates

#### CPIM party has 1 cabdidate with a Doctorate , 15 graduates , 13 post graduates

## ASSET ANALYSIS

### Sum of all the assets held by the candidates of each party (most to least)

In [None]:
Total_Asset = data.groupby('party')['total_assets'].sum().reset_index().sort_values(by='total_assets', ascending=False)
Total_Asset.style.background_gradient(cmap='summer_r')

#### AITC candidates's assset total is the largest among all the parties which is INR 1335172668

### Asset holders of all parties along with Asset amount (most to least)

In [None]:
Asset_holder = data[data['total_assets']!=0]
Asset_holder =Asset_holder.groupby(['candidate','party'])['total_assets'].max().reset_index().sort_values(by='total_assets',ascending=False)
Asset_holder.style.background_gradient(cmap='prism')

### Sum of all the liabilities held by the candidates of each party (most to least)

In [None]:
Total_Liabilities = data.groupby('party')['liabilities'].sum().reset_index().sort_values(by='liabilities', ascending=False)
Total_Liabilities.style.background_gradient(cmap='inferno')

### Liabilities holders of all parties along with Asset amount (most to least)

In [None]:
Liabilities_holder = data[data['liabilities']!=0]
Liabilities_holder =Liabilities_holder.groupby(['candidate','party'])['liabilities'].max().reset_index().sort_values(by='liabilities',ascending=False)
Liabilities_holder.style.background_gradient(cmap='prism_r')

### Liabilities and assets

In [None]:
assets_liabilities = data.groupby(['candidate','party'])[['liabilities','total_assets']].sum().reset_index().sort_values(by=['liabilities','total_assets'], ascending=[False,False]).head(50)
assets_liabilities.style.background_gradient(cmap='RdPu_r')

## Correlation Heatmap

In [None]:
sns.heatmap(data.corr(), annot=True, cmap='plasma')