In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns #
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder #data convertion from staring to numeric

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
pd.set_option("display.max_rows",None)
pd.set_option("display.max_columns",None)

# Reading Governors Data using Pandas

In [None]:
governors_county = pd.read_csv("/kaggle/input/us-election-2020/governors_county.csv")
governors_candidate = pd.read_csv("/kaggle/input/us-election-2020/governors_county_candidate.csv")
governors_state = pd.read_csv("/kaggle/input/us-election-2020/governors_state.csv")
print(governors_county.head())
print(governors_candidate.head())
print(governors_state.head())

# Grouping the 3 datasets to single dataset using Left Join Concept

In [None]:
governors_data = governors_county.merge(governors_candidate,how="left",on=["state","county"])
governors_data = governors_data.merge(governors_state,how="left",on="state")
governors_data_df = governors_data
print(governors_data.head())

# Creating the new columns with existing data

In [None]:
governors_data["state_county_reported_votes"] = governors_data ["current_votes"]
governors_data["state_county_total_votes"] = governors_data["total_votes"]
governors_data["state_county_candiate_votes"] = governors_data["votes_x"]
governors_data["state_votes"] = governors_data["votes_y"]
governors_data.head()

# Dropping the columns which are not used 

In [None]:
governors_data.drop(columns=["current_votes","total_votes","votes_x","votes_y"])

# Defining the columns sequence array to construct the columns in required sequence manner 

In [None]:
columns_sequence = ["state","county","candidate","party","state_county_candiate_votes","state_county_reported_votes","state_county_total_votes","state_votes","percent","won"]

# Re-indexing the columns as per the defined columns sequence 

In [None]:
governors_data = governors_data.reindex(columns=columns_sequence)

# Transforming the string data as categorical data using LabelEncoder Library

In [None]:
lben = LabelEncoder()
governors_data["state"] = lben.fit_transform(governors_data["state"])
governors_data["candidate"] = lben.fit_transform(governors_data["candidate"])
governors_data["county"] = lben.fit_transform(governors_data["county"])
governors_data["party"] = lben.fit_transform(governors_data["party"])
governors_data["won"] = lben.fit_transform(governors_data["won"])
governors_data.head()

In [None]:
from sklearn.ensemble import AdaBoostClassifier,GradientBoostingClassifier
from sklearn.model_selection import KFold,cross_val_score,train_test_split
from sklearn.metrics import confusion_matrix

array = governors_data.values
X = array[:,0:8]
Y = array[:,9]

#Splitting the dataset into the Training set and Test set
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.2,random_state=0)
classifier = GradientBoostingClassifier()

classifier.fit(x_train,y_train)
#Predicting the Test set results
y_pred = classifier.predict(x_test)

#Making the Confusion Matrix
cm = confusion_matrix(y_test,y_pred)
print(cm)

# Checking accuracy of the data using KFlod,AdaBoostClassifier and Cross Val Score

In [None]:
kflod = KFold(n_splits=10,shuffle=True,random_state=7)
model = AdaBoostClassifier(n_estimators=30,random_state=7)
results = cross_val_score(model,X,Y,cv=kflod)

#print(results)
print(round(results.mean()*100),"%")

In [None]:
won_state_details  = pd.pivot_table(governors_data_df,index=["state"],columns="won",values="party",fill_value=0,aggfunc=lambda x: len(x))
#print(winners_details)
sns.heatmap(won_state_details,annot=True,fmt=".0f")
plt.show()

In [None]:
plt.figure(figsize=(25,5))
party_state_details = pd.pivot_table(governors_data_df[governors_data_df["won"]==True],index="state",columns="party",values="state_county_candiate_votes",aggfunc=sum,fill_value=0)
sns.heatmap(party_state_details,annot=True,fmt=".0f")
plt.show()

In [None]:
plt.figure(figsize=(25,5))
plt.bar("state","state_county_candiate_votes",data=governors_data_df[governors_data_df["won"]==True].sort_values("state_county_candiate_votes",ascending=False))
plt.show()

In [None]:
plt.figure(figsize=(25,5))
plt.bar("party","state_county_candiate_votes",data=governors_data_df.sort_values("state_county_candiate_votes",ascending=False))
plt.show()

In [None]:
plt.figure(figsize=(30,5))
state_party_wise_votes = governors_data_df[governors_data_df["won"]==True].loc[:,["state","candidate","party","state_county_candiate_votes"]].groupby(["state","party","candidate"]).sum()
state_party_wise_votes.sort_values("state_county_candiate_votes",ascending=False).plot.bar(figsize=(30,5))
plt.show()

In [None]:
president_state = pd.read_csv("/kaggle/input/us-election-2020/president_state.csv")
president_county = pd.read_csv("/kaggle/input/us-election-2020/president_county.csv")
president_candidate = pd.read_csv("/kaggle/input/us-election-2020/president_county_candidate.csv")

In [None]:
president_state.head()

In [None]:
president_state["president_state_total_votes"] = president_state["total_votes"]
president_state.head()

In [None]:
president_state = president_state.drop(columns=["total_votes"])
president_state.head()

In [None]:
president_county.head()

In [None]:
president_county["president_current_votes"] = president_county["current_votes"]
president_county["president_county_total_values"] = president_county["total_votes"]
president_county["president_percent"] = president_county["percent"]
president_county.head()

In [None]:
president_county = president_county.drop(columns=["current_votes","total_votes","percent"])
president_county.head()

In [None]:
president_candidate.head()

In [None]:
president_candidate["president_candidate"] = president_candidate["candidate"]
president_candidate["president_candidate_total_votes"] = president_candidate["total_votes"]
president_candidate.head()

In [None]:
president_candidate = president_candidate.drop(columns=["candidate","total_votes"])
president_candidate.head()

In [None]:
president_data = president_county.merge(president_candidate,how="left",on=["state","county"])
president_data = president_data.merge(president_state,how="left",on=["state"])
president_data.head()

In [None]:
senate_state = pd.read_csv("/kaggle/input/us-election-2020/senate_state.csv")
senate_candidate = pd.read_csv("/kaggle/input/us-election-2020/senate_county_candidate.csv")
senate_county = pd.read_csv("/kaggle/input/us-election-2020/senate_county.csv")

In [None]:
senate_state.head()

In [None]:
senate_state["senate_state_total_votes"] = senate_state["total_votes"]
senate_state.head()

In [None]:
senate_state = senate_state.drop(columns=["total_votes"])
senate_state.head()

In [None]:
senate_candidate.head()

In [None]:
senate_candidate["senate_candidate"] = senate_candidate["candidate"]
senate_candidate["senate_candidate_total_votes"] = senate_candidate["total_votes"]
senate_candidate.head()

In [None]:
senate_candidate = senate_candidate.drop(columns=["candidate","total_votes"])
senate_candidate.head()

In [None]:
senate_county.head()

In [None]:
senate_county["senate_current_votes"] = senate_county["current_votes"]
senate_county["senate_county_total_values"] = senate_county["total_votes"]
senate_county["senate_percent"] = senate_county["percent"]
senate_county.head()

In [None]:
senate_county = senate_county.drop(columns=["current_votes","total_votes","percent"])
senate_county.head()

In [None]:
senate_data = senate_county.merge(senate_candidate,how="left",on=["state","county"])
senate_data = senate_data.merge(senate_state,how="left",on="state")
senate_data.head()

In [None]:
governors_county = pd.read_csv("/kaggle/input/us-election-2020/governors_county.csv")
governors_candidate = pd.read_csv("/kaggle/input/us-election-2020/governors_county_candidate.csv")
governors_state = pd.read_csv("/kaggle/input/us-election-2020/governors_state.csv")

In [None]:
governors_state.head()

In [None]:
governors_state["governors_state_total_votes"] = governors_state["votes"]
governors_state.head()

In [None]:
governors_state = governors_state.drop(columns=["votes"])
governors_state.head()

In [None]:
governors_county.head()

In [None]:
governors_county["governors_current_votes"] = governors_county["current_votes"]
governors_county["governors_county_total_values"] = governors_county["total_votes"]
governors_county["governors_percent"] = governors_county["percent"]
governors_county.head()

In [None]:
governors_county = governors_county.drop(columns=["current_votes","total_votes","percent"])
governors_county.head()

In [None]:
governors_candidate.head()

In [None]:
governors_candidate["governors_candidate"] = governors_candidate["candidate"]
governors_candidate["governors_candidate_votes"] = governors_candidate["votes"]
governors_candidate.head()

In [None]:
governors_candidate = governors_candidate.drop(columns=["candidate","votes"])
governors_candidate.head()

In [None]:
governors_data = governors_county.merge(governors_candidate,how="left",on=["state","county"])
governors_data = governors_data.merge(governors_state,how="left",on=["state"])
governors_data.head()

In [None]:
election_data = governors_data.merge(senate_data,how="left",on=["state","county","party"])
election_data = election_data.merge(president_data,how="left",on=["state","county","party"])
election_data.head(10)

In [None]:
election_data.isna().sum()

In [None]:
election_data = election_data.fillna(value=0)
election_data.head()

lben = LabelEncoder()
election_data["state"] = lben.fit_transform(election_data["state"])
election_data["county"] = lben.fit_transform(election_data["county"])
election_data["party"] = lben.fit_transform(election_data["party"])
election_data["won_x"] = lben.fit_transform(election_data["won_x"])
election_data["governors_candidate"] = lben.fit_transform(election_data["governors_candidate"])
election_data["senate_candidate"] = lben.fit_transform(election_data["senate_candidate"])
election_data["won_y"] = lben.fit_transform(election_data["won_y"])
election_data["president_candidate"] = lben.fit_transform(election_data["president_candidate"])