In [None]:
import numpy as np 
import pandas as pd 
import folium

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.cluster import SpectralClustering, DBSCAN, KMeans
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score
from sklearn.metrics.pairwise import euclidean_distances

import networkx as nx
from community import best_partition

import plotly.offline as py
import plotly.graph_objs as go
import networkx as nx
import matplotlib.pyplot as plt

import os

py.init_notebook_mode(connected=True)

In [None]:
df = pd.read_csv(r'../input/israelielections2019/expc.csv', encoding='iso8859_8')

In [None]:
old = pd.read_csv(r'../input/israeli-elections-2015-2013/israeli_elections_results_1996_to_2015.csv', encoding='iso8859_8')

In [None]:
old.head()

In [None]:
df = df[df.columns[:22]].fillna(0).head(1212)

In [None]:
locations = df['Name']
sizes = df['Voters']

In [None]:
normalized_df = df.drop(['lng','lat', 'Name', 'Symbol', 'Registered', 'Voters', 'Disqualified', 'Qualified'], axis=1)
normalized_df = normalized_df[normalized_df.sum().sort_values(ascending=False).index]
normalized_df = normalized_df.truediv(normalized_df.sum(axis=1), axis=0)

In [None]:
len(df)

In [None]:
normalized_df = normalized_df.fillna(0)
plt.figure(figsize=(7, 7))
pca_df = pd.DataFrame(PCA(n_components=2).fit_transform(normalized_df))
pca_df['locations'] = locations
pca_df['sizes'] = sizes

plt.scatter(pca_df[0], pca_df[1], s=50, alpha=0.6)
plt.xlabel('PC 1')
plt.ylabel('PC 2')
plt.show()

In [None]:
score = []
for i in np.arange(2,12):
    kmeans = KMeans(n_clusters=i).fit(normalized_df)
    score.append(silhouette_score(normalized_df, kmeans.predict(normalized_df)))
    
plt.plot(np.arange(2, 12), score)

In [None]:
kmeans = KMeans(n_clusters=5, random_state=42).fit(normalized_df)

pca_df = pd.DataFrame(PCA(n_components=2).fit_transform(normalized_df))
pca_df['label'] = kmeans.predict(normalized_df)
pca_df['locations'] = locations
pca_df['sizes'] = sizes

In [None]:
trace1 = go.Bar(
        x=normalized_df.columns,
        y=kmeans.cluster_centers_[0],
        name='Cluster 1'
        )

trace2 = go.Bar(
        x=normalized_df.columns,
        y=kmeans.cluster_centers_[1],
        name='Cluster 2'
        )

trace3 = go.Bar(
        x=normalized_df.columns,
        y=kmeans.cluster_centers_[2],
        name='Cluster 3'
        )

trace4 = go.Bar(
        x=normalized_df.columns,
        y=kmeans.cluster_centers_[3],
        name='Cluster 4'
        )

trace5 = go.Bar(
        x=normalized_df.columns,
        y=kmeans.cluster_centers_[4],
        name='Cluster 5'
        )

layout = go.Layout(
     title='<b>Cluster Centers</b>',
     titlefont=dict(size = 30, color='#7f7f7f'),
     hovermode='closest'
)

fig = go.Figure(data=[trace1, trace2, trace3, trace4, trace5], layout=layout)
py.iplot(fig)

In [None]:
traces = []

for label in sorted(pca_df.label.unique()):
    traces.append(go.Scatter(
            x=pca_df[pca_df.label == label][0],
            y=pca_df[pca_df.label == label][1],
            text=pca_df[pca_df.label == label]['locations'],
            mode='markers',
            hoverinfo='text',
            marker=dict(
                size=[np.sqrt(a)/5 for a in (pca_df[pca_df.label == label]['sizes'])],
                opacity=0.3,
          )
           )
                     )
    
layout = go.Layout(
        width=1100,
        height=900,
        title= 'Likud voting clusters',
        hovermode='closest',
        xaxis=dict(
            autorange=True,
            showgrid=False,
            zeroline=False,
            showline=False,
            ticks='',
            showticklabels=False
        ),
        yaxis=dict(
            autorange=True,
            showgrid=False,
            zeroline=False,
            showline=False,
            ticks='',
            showticklabels=False
        ))
fig = go.Figure(data=traces, layout=layout)
py.iplot(fig)

In [None]:
pca_df['lat'] = df.lat
pca_df['lng'] = df.lng

In [None]:
m = folium.Map(location=[32.13,34.8],zoom_start=9, tiles="CartoDB dark_matter" )

colors = ['blue', 'orange', 'green', 'crimson', 'purple']
for row in pca_df.iterrows():
    folium.Circle(
              location= (row[1].lat, row[1].lng),    
              radius=0.03*row[1].sizes,
              popup= '<b>' + str(row[1].name),
              color=colors[row[1].label],
              fill=True,
              fill_color=colors[row[1].label]
        ).add_to(m)
    
m

In [161]:
m.save('plot_data.html')


In [None]:
old = old[old.year == 2015]
m_old = old.groupby('settlement_name_hebrew').sum()[['Meretz']].reset_index()
df['label'] = pca_df['label']

In [None]:
t = pd.merge(m_old[['settlement_name_hebrew', 'Meretz']], df[['Name', 'meretz', 'label']], left_on='settlement_name_hebrew', right_on='Name')
t = t[t.label == 3].reset_index()

In [None]:
traces = []

for i in range(len(t)):
    traces.append(go.Scatter(
        x=[0],
        y=[t.loc[i, 'Meretz']],
    mode='markers',
    text=t.loc[i, 'settlement_name_hebrew']  + ' ' + str(t.loc[i, 'Meretz']),
    hoverinfo='text',
    marker = dict(
        color='green',
        opacity=0.3,
        size=10)))
    traces.append(go.Scatter(
        x=[1],
        y=[t.loc[i, 'meretz']],
    mode='markers',
    text=t.loc[i, 'Name']  + ' ' + str(t.loc[i, 'meretz']),
    hoverinfo='text',
     marker = dict(
        color='green',
        opacity=0.3,
         size=10)))
    traces.append(go.Scatter(
        x=[0, 1],
        y=[t.loc[i, 'Meretz'], t.loc[i, 'meretz']],
    mode='lines',
    line=dict(
        color='gray',
        width=0.5
        ),
    hoverinfo=None))
    
layout = go.Layout(
            title='מרצ בישובים הערביים',
            hovermode='closest',
            showlegend=False,
            yaxis=dict(
                autorange=True,
                showgrid=False,
                zeroline=False,
                showline=False,
                ticks='',
                showticklabels=False),
            xaxis=dict(
                range=[-1, 2],
                autorange=False,
                showgrid=False,
                zeroline=False,
                showline=False,
                ticks='',
                showticklabels=False
            ))

fig = go.Figure(data=traces, layout=layout)
py.iplot(fig)   

In [None]:
t.meretz.sum()

In [None]:
t.Meretz.sum()

In [None]:
130737.0/4054829

In [None]:
147532/4054829