## Let us analyze the dataset for US presidential election from 1976

### This kernel/notebook does the following


1. *Performs EDA on US presidential election data consolidated from all states from 1976-2020*
1. *Does some analysis on various parties*
1. *Perfoems analysis at a state level*
1. *Uses animation from Plotly*
1. *Performs evaluation of some models using scikit-learn*



### First - Let us load the Kaggle Summary

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    count = 0
    for filename in filenames:
        print(os.path.join(dirname, filename))
        count += 1
print(count)
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### Import the libraries

In [None]:
from bokeh.plotting import figure, output_file, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, NumeralTickFormatter
from bokeh.models.tools import HoverTool
from bokeh.transform import dodge
import numpy as np
import pandas as pd
import random
import math


In [None]:
import seaborn  as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go
from plotly import tools
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode()


In [None]:
#Let us define a random color function 
def rand_color():
    return "#" + "".join(random.sample("0123456789abcdef", 6))


In [None]:
#Let us define the output to a notebook
output_notebook()


#### *Bokeh Case Study - US presidential elections data 1976-2020* 

* Let us use a freely available dataset at Harvard dataverse site. 
* We shall be leveraging the US presidential vote data set from 1976 to 2020 across various US States. 
* We shall be tabulating data for the top two parties across the years.

## Load the dataset

In [None]:
#We shall be using the US Presidential vote data set from 1976 to 2016 across states
#The source file is available at Harvard Dataverse site
# The downloaded and modifed Using the downloaded version for this example
#df  = pd.read_csv('../input/python-data-visualization-essentials-guide/19762016USPrez.csv')
df  = pd.read_csv('../input/python-data-visualization-essentials-guide/1976-2020-president.csv')
#Let us drop the irrelevant columns
df.drop(['notes'],axis=1)
df['party'] = df['party_detailed'].str.lower()
df.info()

In [None]:
print(df['party'].unique())
print(df['party_simplified'].unique())

## Check for a heatmap on Null values

In [None]:
import seaborn as sns
#Checking null values
sns.heatmap(df.isnull())


In [None]:
df.dtypes

In [None]:
cols=["state","year","party"]
for i in cols:
    print("Number of unique values in ", i ," are : ",len(df[i].unique()), " : " ,df[i].unique())

In [None]:
cols = ["party_simplified","state"]
fig,axes=plt.subplots(nrows=2,ncols=1,figsize=[30,24])
for i in range(0,len(cols)):
    axes[i]=sns.countplot(x = cols[i],data = df,ax=axes[i])
    axes[i].set_xticklabels(axes[i].get_xticklabels(), rotation=75)
    axes[i].set_title("Count plot of "+cols[i] + " Candidates / Parties")
    print("*"*50)
    

In [None]:
plt.figure(figsize=(18,10))
sns.heatmap(df.corr(),cmap='coolwarm',annot=True)

### Bivariate Analysis

In [None]:
df.columns

In [None]:
cols=['writein', 'candidatevotes',
       'totalvotes']

fig, axes = plt.subplots(nrows=1,ncols=3,figsize=[20,12])
for i in range(0,len(cols)):
    axes[i]=sns.scatterplot( x= cols[i], y="year", 
                            data = df,hue="writein", 
                            size = "totalvotes",
                            sizes=(50,200), 
                            hue_norm=(0, 6),
                            cmap="viridis",
                            ax=axes[i])
    axes[i].set_title("Total Votes vs "+cols[i])

In [None]:
df.plot(kind="box",subplots=True,layout=(3,3),figsize=(30,30))

In [None]:
df.hist(figsize=(15,15), layout=(4,4), bins=10)

In [None]:
df.skew()

### Let us see how many parties have contested in each state since 1976 elections

In [None]:
parties_per_state = df.groupby('state')['party_detailed'].nunique().reset_index().sort_values('party_detailed',ascending = False)
fig = px.bar(parties_per_state, x='state', y='party_detailed', color='party_detailed', height=600)
fig.show()

### Let us see how many votes have been polled by 3rd to 20th parties since 1976 elections

### Top-5

In [None]:
vote_per_party= df.groupby('party_detailed')['candidatevotes'].sum().reset_index().sort_values('candidatevotes',ascending = False)
fig = px.bar(vote_per_party.head(5), x='party_detailed', y='candidatevotes', color='candidatevotes', height=600)
fig.show()

### 3rd to 20th

In [None]:
vote_per_party_nodemrep= vote_per_party[~vote_per_party["party_detailed"].isin(["DEMOCRAT","REPUBLICAN","INDEPENDENT","LIBERTARIAN"])]
fig = px.bar(vote_per_party_nodemrep.head(20), x='party_detailed', y='candidatevotes', color='candidatevotes', height=600)
fig.show()

## Let us see top-25 vote getters across 40 years

In [None]:
vote_per_candidate= df.groupby('candidate')['candidatevotes'].sum().reset_index().sort_values('candidatevotes',ascending = False)
fig = px.bar(vote_per_candidate.head(25), x='candidate', y='candidatevotes', color='candidatevotes', height=600)
fig.show()

### Try a pie chart

In [None]:
df2=df.groupby('state')['totalvotes'].sum().reset_index().sort_values('totalvotes',ascending = False)
df2.loc[df2['totalvotes'] < 2.e8, 'state'] = 'Other States' # Represent only large states
fig = px.pie(df2, values='totalvotes', names='state', title='Total Votes')
fig.show()

## Let us try a Map

In [None]:
fig = px.choropleth(df, locations='state_po', color="totalvotes",
                           range_color=(0, 8000000),
                           locationmode = 'USA-states',  
                           scope="usa",
                           title='USA Presidential Vote Counts' 
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
vote_per_party= df.groupby('party')['candidatevotes'].sum().reset_index().sort_values('candidatevotes',ascending = False)

for index, row in vote_per_party.head(5).iterrows():
    party_name = row['party']
    title_head = 'USA Presidential Vote Counts - ' + party_name
    print(title_head)
    df_r = df.loc[df['party'] == party_name]
    fig = px.choropleth(df_r,
                        locations='state_po',
                        color="candidatevotes",
                        range_color=(0, 8000000),
                        locationmode = 'USA-states',
                        title=title_head,
                        scope="usa")
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    fig.show()


In [None]:
df_temp = df
df_temp = df_temp[df_temp.party.isin(['democrat','republican','independent',
                                      'reform','libertarian','alliance'])]
df_temp = df_temp.groupby(['state','state_po','party','year'])["candidatevotes"].sum()
df_temp = df_temp.reset_index()
datadump = df_temp[df_temp['party'] == 'democrat']
px.choropleth(datadump, 
              locations = 'state_po',
              color="candidatevotes", 
              animation_frame="year",
              color_continuous_scale="Inferno",
              locationmode='USA-states',
              scope="usa",
              range_color=(100000, 5000000),
              title='Total Votes by Party - Democrats',
              height=600
             )


In [None]:
datadump = df_temp[df_temp['party'] == 'independent'].sort_values('year', ascending=True)
px.choropleth(datadump, 
              locations = 'state_po',
              color="candidatevotes", 
              animation_frame="year",
              color_continuous_scale="Inferno",
              locationmode='USA-states',
              scope="usa",
              range_color=(1000, 100000),
              title='Total Votes by Party - Independents',
              height=600
             )

In [None]:
datadump = df_temp[df_temp['party'] == 'libertarian'].sort_values('year', ascending=True)
px.choropleth(datadump, 
              locations = 'state_po',
              color="candidatevotes", 
              animation_frame="year",
              color_continuous_scale="Inferno",
              locationmode='USA-states',
              scope="usa",
              range_color=(1000, 100000),
              title='Total Votes by Party - Libertarian Party',
              height=600
             )

In [None]:
datadump = df_temp[df_temp['party'] == 'republican'].sort_values('year', ascending=True)
px.choropleth(datadump, 
              locations = 'state_po',
              color="candidatevotes", 
              animation_frame="year",
              color_continuous_scale="Inferno",
              locationmode='USA-states',
              scope="usa",
              range_color=(100000, 5000000),
              title='Total Votes by Party - Republican',
              height=600
             )

In [None]:
datadump = df_temp[df_temp['party'] == 'alliance'].sort_values('year', ascending=True)
px.choropleth(datadump, 
              locations = 'state_po',
              color="candidatevotes", 
              animation_frame="year",
              color_continuous_scale="Inferno",
              locationmode='USA-states',
              scope="usa",
              range_color=(1000, 50000),
              title='Total Votes by Party - Alliance Party',
              height=600
             )

## Let us try other visualizations

In [None]:
df_temp = df
state_group = df_temp.groupby(['year','state','state_po', 'party','candidate']).agg({'candidatevotes': 'sum'})
state_pcts = state_group.groupby(level=0).apply(lambda x:
                                                 100 * x / float(x.sum()))
state_pcts = state_pcts.reset_index()

In [None]:
datadump = state_pcts[state_pcts['party'] == 'democrat']
px.choropleth(datadump, 
              locations = 'state_po',
              color="candidatevotes", 
              animation_frame="year",
              color_continuous_scale="Inferno",
              locationmode='USA-states',
              scope="usa",
              range_color=(0.01, 0.90),
              title='Total Votes by Party - Democrat Party',
              height=600
             )

In [None]:
#Let us define parties we want to track - We will pick the top-2 parties 
parties = ['democrat', 'republican','libertarian','independent']
top2_df=df.loc[df['party'].isin(parties)]
#Let us build pivot tables we shall use for various examples
#Pivot-1 indexed on year and state on the aggregated sum on party votes
table = pd.pivot_table(top2_df, values='candidatevotes', index=['year', 'state'],
                    columns=['party'], aggfunc=np.sum)
#Pivot-2 indexed on year for the aggregated sum on party votes
table2 = pd.pivot_table(top2_df, values='candidatevotes', index=['year'],
                    columns=['party'], aggfunc=np.sum)
#Pivot-3 indexed on state for the aggregated sum on party votes
table3 = pd.pivot_table(top2_df, values='candidatevotes', index=['state'],
                    columns=['party'], aggfunc=np.sum)
source = ColumnDataSource(df)


In [None]:
#Let us build a line graph for republican and democrat votes 
p = figure()
p.line(x='year', y='republican', source=table2,
         line_width=2, color=rand_color(),legend_label='Republican')
p.circle_dot(x='year', y='republican', source=table2,
         size=10, color=rand_color(),legend_label='Republican')
p.line(x='year', y='democrat', source=table2,
         line_width=2, color=rand_color(),legend_label='Democrat')
p.diamond_cross(x='year', y='democrat', source=table2,
         size=10, color=rand_color(),legend_label='Democrat')
p.line(x='year', y='libertarian', source=table2,
         line_width=2, color=rand_color(),legend_label='libertarian')
p.diamond(x='year', y='libertarian', source=table2,
         size=10, color=rand_color(),legend_label='libertarian')
p.line(x='year', y='independent', source=table2,
         line_width=2, color=rand_color(),legend_label='Independents')
p.diamond(x='year', y='independent', source=table2,
         size=10, color=rand_color(),legend_label='Independents')
p.title.text = 'A Sample Line Chart of total votes colllected by Republicans , Democrats, Green Party and Independents in the US Presidential Elections'
p.xaxis.axis_label = 'Year'
p.yaxis.axis_label = 'Votes'
p.legend.location = 'top_left'
p.legend.title ='Parties'
#Let us remove the scientific formatting
p.yaxis.formatter=NumeralTickFormatter(format="00")
#Let us add a hovering tool
hover = HoverTool()
hover.tooltips=[
    ('Year', '@year')
]
p.add_tools(hover)
#show the plot
show(p)

* For the Second exercise, we shall build a pivot table on the data and plot the trends. Bar Chart and Difference in Data
* Following program leverages the power of pandas and builds a new column for difference in votes and uses it for plotting the vertical bar chart to show the trend over the years.

In [None]:
#Define a function to calculate absolute difference
def abs_min(a, b): # a, b are input arrays
    return np.abs(a[:,None]-b).min(axis=0)
#Define a function to calculate difference
def diff_min(a, b): # a, b are input arrays
    return (a-b)


In [None]:
#Convert the pivot table to a pandas datafrmae
table31_df = pd.DataFrame(table3.to_records())
#Find the difference between democrats and republicans
diffs = diff_min(table31_df.republican.values, table31_df.democrat.values)
#Create a new column with difference in votes
table31_df.insert(3, "difference", diffs, True)
# Let us choose the top Republican states
table3_df = table31_df.sort_values(by='republican', ascending=False).head(15)
states = table3_df['state']
# Convert the vots as multiples of millions 
republican = table3_df['republican']/1000000
democrat = table3_df['democrat'] / 1000000
difference = table3_df['difference'] / 1000000
# Build a dataset for plotting


In [None]:
data = {'states' : states, 'republican' : republican, 
        'democrat' : democrat, 'difference' : difference }
source = ColumnDataSource(data=data)
#Plot a vertical bar chart with dodge by a parameter
p2 = figure(plot_height=1500, plot_width=1200,x_range=states, 
            y_range=(difference.min(),max(republican.max(), democrat.max())),
            title="State wise Votes - vote size in millions")
p2.vbar(x=dodge('states', -0.25, range=p2.x_range), top='republican', width=0.2, 
        source=source, color="#ff0011", legend_label="Republican")
p2.vbar(x=dodge('states', +0.0, range=p2.x_range), top='democrat', width=0.2, 
        source=source, color="#1100ff", legend_label="Democrat")
p2.vbar(x=dodge('states', +0.25, range=p2.x_range), top='difference', width=0.2, 
        source=source, color="gold", legend_label="Difference")
#Plot a line plots
p2.line(x=dodge('states', -0.25, range=p2.x_range), y='republican',
         source=source, line_width=2, color='red',legend_label='Republican')
p2.circle_dot(x=dodge('states', -0.25, range=p2.x_range), y='republican',
         source=source, size=4, color=rand_color(),legend_label='Republican')
p2.line(x='states', y='democrat',
         source=source, line_width=2, color='navy',legend_label='Democrat')
p2.diamond_cross(x='states', y='democrat',
         source=source, size=4, color=rand_color(),legend_label='Democrat')
p2.line(x=dodge('states', +0.25, range=p2.x_range), y='difference',
         source=source, line_width=2, color='gold',legend_label='Difference')
p2.diamond_dot(x=dodge('states', +0.25, range=p2.x_range), y='difference',
         source=source, size=4, color=rand_color(),legend_label='Difference')
#Add Formatting aspects
p2.x_range.range_padding = 0.1
p2.xgrid.grid_line_color = None
p2.legend.location = "top_right"
p2.legend.orientation = "vertical"
p2.yaxis.formatter=NumeralTickFormatter(format="00")
p2.xaxis.major_label_orientation = math.pi/2
#Add Hover
hover = HoverTool()
hover.tooltips=[('States', '@states')]
p2.add_tools(hover)
#Show the plot
show(p2)

* For the third exercise, let us see how both the parties performed in one of their bellwether states over the years. 
* We shall take one state for each party to plot the performance and show the trend. 


In [None]:
from bokeh.models import FixedTicker
from bokeh.palettes import Turbo256
table41_df = pd.DataFrame(table.to_records())
diffs = abs_min(table41_df.republican.values, table41_df.democrat.values)
table41_df.insert(3, "difference", diffs, True)
table4_df = table41_df.sort_values(by='democrat', ascending=False)
states = table4_df['state']
#Change the values in 1000s of vote
republican = table4_df['republican']/1000
democrat = table4_df['democrat'] / 1000
difference = table4_df['difference'] / 1000
year = table4_df['year'].sort_values(ascending=True).unique()
table4_df.republican.fillna(0)
table4_df.difference.fillna(0)
table4_df.democrat.fillna(0)
tab4_pivot = pd.pivot_table(table4_df, values=['republican','democrat','difference'], 
                            index=['year'], columns=['state'], aggfunc=np.sum, margins=True)
flat_tab4_df = pd.DataFrame(tab4_pivot.to_records())
tabcols = [flat_tab4_df.columns]
years = table4_df['year']
states = table4_df['state']
republican = table4_df['republican']/1000
democrat = table4_df['democrat'] / 1000
difference = table4_df['difference'] / 1000
votegroup = ['democrat', 'republican','difference']
source = ColumnDataSource(data=dict(x=tabcols, democrat=democrat, republican=republican, difference=difference,))
p4 = figure(width=900, height=800) #, x_axis_type="datetime") 
years = flat_tab4_df.year
values = flat_tab4_df["('democrat', 'CALIFORNIA')"]
rvalues = flat_tab4_df["('republican', 'CALIFORNIA')"]
#Plotting for a democrat state - California
p4.vbar(years, top = values, width = .9, fill_alpha = .5,line_alpha = .5,
        fill_color = rand_color(), line_color=rand_color(), line_dash='dashed')
p4.line(years,rvalues,line_width=4,line_color="red",line_dash="dotted")
p4.circle(years,rvalues,radius=.2,fill_color='yellow',line_color=rand_color())
hover = HoverTool()
hover.tooltips=[('Votes', '@top'),('Year',  '@x')]
p4.x_range.range_padding = 0.1
p4.xgrid.grid_line_color = None
p4.yaxis.formatter=NumeralTickFormatter(format="00")
p4.xaxis.major_label_orientation = math.pi/2
p4.add_tools(hover)
show(p4)

p5 = figure(width=900, height=800)  
years = flat_tab4_df.year
#Plotting for a republican state - Texas
values = flat_tab4_df["('republican', 'TEXAS')"]
dvalues = flat_tab4_df["('democrat', 'TEXAS')"]
divalues = flat_tab4_df["('difference', 'TEXAS')"]
p5.vbar(years, top = values, width = .9, fill_alpha = .5,line_alpha = .5,
        fill_color = rand_color(), line_color=rand_color(), line_dash='dotted')
p5.line(years,dvalues,line_width=4,line_color="navy",line_dash="dotted")
p5.circle(years,dvalues,radius=.2,fill_color='yellow',line_color=rand_color())
p5.line(years,divalues,line_width=2,line_color=rand_color(),line_dash="dashdot")
hover = HoverTool()
hover.tooltips=[('Votes', '@top'),('Year',  '@x')]
p5.x_range.range_padding = 0.1
p5.xgrid.grid_line_color = None
p5.yaxis.formatter=NumeralTickFormatter(format="00")
p5.xaxis.major_label_orientation = math.pi/2
p5.add_tools(hover)
show(p5)

### Let us try a headmap through correlation

In [None]:
corr = df.corr()

In [None]:
df.info

In [None]:
df.columns

In [None]:

corr.drop("version",axis=1)
corr.drop("notes",axis=1)
corr.head()

In [None]:
data = [go.Bar(
    x=df['year'].unique(),
    y=df.groupby(['year','state'])['candidate'].count(),
    textposition = 'auto',
    marker=dict(
        color=df['totalvotes'],
                line=dict(
                    color='rgb(8,48,107)',
                    width=1.5),
            ),
            opacity=0.6
    )]
layout = {
  'xaxis': {'title': 'Year'},
  'yaxis': {'title': 'No. of Candidates'},
  'barmode': 'relative',
  'title': 'Total Number of Candidates'
};
iplot({'data': data, 'layout': layout})

In [None]:
data = [go.Bar(
    x=df['state'].unique(),
    y=df.groupby(['year','party'])['totalvotes'].sum(),
    textposition = 'auto',
    marker=dict(
        color='mediumvioletred',
                line=dict(
                    color='rgb(8,48,107)',
                    width=1.5),
            ),
            opacity=0.6
    )]
layout = {
  'xaxis': {'title': 'Year'},
  'yaxis': {'title': 'Total Votes'},
  'barmode': 'relative',
  'title': 'Total Number of Candidates'
};
iplot({'data': data, 'layout': layout})


### Let us do some Machine Learning Practice

In [None]:
print(df.columns)
x = df.drop(columns=['totalvotes','state', 'state_po','party', 'state_fips', 'office', 
                     'state_cen', 'state_ic','office', 'candidate', 'version',
                     'party_simplified','writein','party_detailed',
                     'notes'])
y = df[["totalvotes"]]

In [None]:
#Importing warnings
import warnings
warnings.filterwarnings('ignore')

from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
from sklearn.linear_model import LogisticRegression,Lasso,LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn import metrics
from scipy.stats import zscore
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor,GradientBoostingRegressor
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet
from sklearn.gaussian_process.gpr import GaussianProcessRegressor
from  sklearn.isotonic import IsotonicRegression
from sklearn.linear_model.bayes import ARDRegression
from sklearn.linear_model.huber import HuberRegressor
from sklearn.linear_model.base import LinearRegression
from sklearn.linear_model.passive_aggressive import PassiveAggressiveRegressor 
#from sklearn.linear_model.randomized_l1 import RandomizedLogisticRegression
from sklearn.linear_model.stochastic_gradient import SGDRegressor
from sklearn.linear_model.theil_sen import TheilSenRegressor
from sklearn.linear_model.ransac import RANSACRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.neighbors.regression import KNeighborsRegressor
from sklearn.neighbors.regression import RadiusNeighborsRegressor
from sklearn.neural_network.multilayer_perceptron import MLPRegressor
from sklearn.tree.tree import DecisionTreeRegressor
from sklearn.tree.tree import ExtraTreeRegressor
from sklearn.svm.classes import SVR
from sklearn.linear_model import BayesianRidge
from sklearn.cross_decomposition import CCA
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import ElasticNetCV
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import Lars
from sklearn.linear_model import LarsCV
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV
from sklearn.linear_model import LassoLars
from sklearn.linear_model import LassoLarsIC
from sklearn.linear_model import LassoLarsCV
from sklearn.linear_model import MultiTaskElasticNet
from sklearn.linear_model import MultiTaskElasticNetCV
from sklearn.linear_model import MultiTaskLasso
from sklearn.linear_model import MultiTaskLassoCV
from sklearn.svm import NuSVR
from sklearn.linear_model import OrthogonalMatchingPursuit
from sklearn.linear_model import OrthogonalMatchingPursuitCV
from sklearn.cross_decomposition import PLSCanonical
from sklearn.cross_decomposition import PLSRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import RidgeCV
from sklearn.svm import LinearSVR

In [None]:
sc = StandardScaler()
a = sc.fit_transform(x)
df_x = pd.DataFrame(a,columns=x.columns)
df_x.head()

In [None]:
#Splitting the data into training and testing data
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(df_x,y,test_size=0.20,random_state=45)


## Let us try all types of regression using Scikit Learn library

> model = [AdaBoostRegressor(),
>         ARDRegression(),
>         BayesianRidge(),
>         CCA(),
>         DecisionTreeRegressor(),
>         ElasticNet(),
>         ElasticNetCV(),
>         ExtraTreeRegressor(),
>         GaussianProcessRegressor(),
>         GradientBoostingRegressor(),
>         HuberRegressor(),
>         #IsotonicRegression(), #Not feasible with the dataset
>         KernelRidge(),
>         KNeighborsRegressor(),
>         #LabelEncoder(), #Not feasible with the dataset
>         Lars(),
>         LarsCV(),
>         Lasso(),
>         LassoCV(),
>         LassoLars(),
>         LassoLarsCV(),
>         LassoLarsIC(),
>         LinearRegression(),
>         LinearSVR(),
>         MLPRegressor(),
>         #MultiOutputRegressor(), #Not feasible with the dataset
>         MultiTaskElasticNet(),
>         MultiTaskElasticNetCV(),
>         MultiTaskLasso(),
>         MultiTaskLassoCV(),
>         OrthogonalMatchingPursuit(),
>         OrthogonalMatchingPursuitCV(),
>         PassiveAggressiveRegressor(),
>         #PCA(), #Not feasible with the dataset
>         PLSCanonical(),
>         PLSRegression(),
>         RadiusNeighborsRegressor(),
>         #RandomizedLogisticRegression(), #Not feasible with the dataset
>         RANSACRegressor(),
>         Ridge(),
>         RidgeCV(),
>         SGDRegressor(),
>         #StandardScaler(),#Not feasible with the dataset
>         SVR(),
>         TheilSenRegressor(),
>         NuSVR()
>]

In [None]:
model = [AdaBoostRegressor(),
         ARDRegression(),
         BayesianRidge(),
         CCA(),
         DecisionTreeRegressor(),
         ElasticNet(),
         ElasticNetCV(),
         ExtraTreeRegressor(),
         GaussianProcessRegressor(),
         GradientBoostingRegressor(),
         HuberRegressor(),
         #IsotonicRegression(), #Not feasible with the dataset
         KernelRidge(),
         KNeighborsRegressor(),
         #LabelEncoder(), #Not feasible with the dataset
         Lars(),
         LarsCV(),
         Lasso(),
         LassoCV(),
         LassoLars(),
         LassoLarsCV(),
         LassoLarsIC(),
         LinearRegression(),
         LinearSVR(),
         MLPRegressor(),
         #MultiOutputRegressor(), #Not feasible with the dataset
         MultiTaskElasticNet(),
         MultiTaskElasticNetCV(),
         MultiTaskLasso(),
         MultiTaskLassoCV(),
         OrthogonalMatchingPursuit(),
         OrthogonalMatchingPursuitCV(),
         PassiveAggressiveRegressor(),
         #PCA(), #Not feasible with the dataset
         PLSCanonical(),
         PLSRegression(),
         RadiusNeighborsRegressor(),
         #RandomizedLogisticRegression(), #Not feasible with the dataset
         RANSACRegressor(),
         Ridge(),
         RidgeCV(),
         SGDRegressor(),
         #StandardScaler(),#Not feasible with the dataset
         SVR(),
         TheilSenRegressor(),
         NuSVR()
]
model_name = []
model_score = []
model_mae = []
model_mse = []
model_rmse = []
model_r2= []

for m in model:
    m.fit(x_train,y_train)
    score=m.score(x_train,y_train)
    predm=m.predict(x_test)
    model_name.append(m) 
    model_score.append(score) 
    model_mae.append(mean_absolute_error(y_test,predm))
    model_mse.append(mean_squared_error(y_test,predm))
    model_rmse.append(np.sqrt(mean_squared_error(y_test,predm)))
    model_r2.append(r2_score(y_test,predm))

list_of_models = list(zip(model_name,
                          model_score,
                          model_mae,
                          model_mse,
                          model_rmse,
                          model_r2
                         )
                     )
list_of_models
dfm = pd.DataFrame(list_of_models,columns=['Model','Score','MAE','MSE','RMSE','R2 Score'])
dfm.sort_values('Score', ascending=False)
dfm.style.background_gradient(cmap ='RdYlGn')

In [None]:
print("*" * 100)
print("The Best Model is : ")
print("*" * 100)
print(dfm.loc[dfm['Score'].idxmax()])
print("*" * 100)

In [None]:
print("*" * 100)
print("The Best R2 Score is : ")
print("*" * 100)
print(dfm.loc[dfm['R2 Score'].idxmax()])
print("*" * 100)

## As per the Scikit Learn Models -Best Model is Decision Tree Regression