# **(WORK IN PROGRESS... Round 2 is coming!)**

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import geopandas as gpd
import json
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as pgo
import plotly.io as pio
import plotly.subplots as psp

pd.set_option('display.max_columns', None)

pio.templates['custom'] = pgo.layout.Template(
    layout=pgo.Layout(**dict(
        width=600,
        title=dict(
            x=.5,
            xanchor='center',
            xref='paper',
        ),
        legend=dict(
            bgcolor='rgba(0,0,0,0)',  # transparent background
        ),
        margin=dict(
            l=50,
            r=50,
            b=50,
        )
    ))
)
pio.templates.default = 'custom'

pop_df = pd.read_csv('/kaggle/input/polish-demographics-20182019/population_2019.csv')

with open('/kaggle/input/poland-presidential-elections-2020-results/polish_provinces.geojson', 'r', encoding='utf-8') as f:
    geojson = json.loads(f.read())
    """
    geojson['features'] = [
        {
          "type": "Feature",
          "properties": {
              'id': '381',
              'powiat': 'statki',
              'teryt_code': 149800
          },
          "geometry": {
            "type": "Polygon",
            "coordinates": [
              [
                [
                  21.075210571289062,
                  52.153293169484385
                ],
                [
                  21.14593505859375,
                  52.153293169484385
                ],
                [
                  21.14593505859375,
                  52.20129455694109
                ],
                [
                  21.075210571289062,
                  52.20129455694109
                ],
                [
                  21.075210571289062,
                  52.153293169484385
                ]
              ]
            ]
          }
        },
        {
            "type": "Feature",
            "properties": {
              'id': '382',
              'powiat': 'zagranica',
              'teryt_code': 149900
            },
            "geometry": {
            "type": "Polygon",
            "coordinates": [
              [
                [
                  21.150741577148438,
                  52.19498154435874
                ],
                [
                  21.223526000976562,
                  52.19498154435874
                ],
                [
                  21.223526000976562,
                  52.24041522350549
                ],
                [
                  21.150741577148438,
                  52.24041522350549
                ],
                [
                  21.150741577148438,
                  52.19498154435874
                ]
              ]
            ]
            }
        }
    ] + geojson['features']
    """

def bold(txt):
    return '<b>' + str(txt) + '</b>'

def make_title(txt, **kwargs):
    return dict(
        text=bold(str(txt).upper()),
        **kwargs
    )

def multiline_text(*args):
    return '<br>'.join(map(str, args))

def make_hover_text(lines):
    return '<br>'.join([
        (bold(label) + ': ' + value).strip(' :')
        for label, value in lines 
    ])

def make_simple_colorscale(hash_color):
    hash_color = hash_color.lstrip('#')
    return [
        # Let first 10% (0.1) of the values have color rgb(0, 0, 0)
        [0, "rgb(255, 255, 255)"],
        [1, f"rgb({int(hash_color[:2], 16)}, {int(hash_color[2:4], 16)}, {int(hash_color[4:], 16)})"]]

TRAD_COLS = [
    'electoral_district_id',
    'teryt_code', 
    'powiat',
    'voivodeship',
    'n_received_ballot_papers',
    'n_entitled_voters',
    'n_unused_voting_cards',
    'n_voters_issued_ballot_papers',
    'n_voters_by_proxy',
    'n_voters_with_displacement_certificate',
    'n_voters_issued_voting_pack',
    'n_received_envelopes',
    'n_envelopes_withouth_vote',
    'n_envelopes_unsigned',
    'n_envelopes_withouth_voting_card',
    'n_improperly_sealed_envelopes',
    'n_envelopes_put_in_ballot_box',
    'n_cards_in_ballot_box',
    'n_cards_taken_from_envelope',
    'n_invalid_cards',
    'n_valid_cards',
    'n_invalid_votes',
    'multiple_candidates_crossed',
    'no_candidate_crossed',
    'candidate_name_crossed',
    'n_valid_votes',
    'n_counties']

CANDIDATES_COLORS = dict(zip(
    ['duda', 'trzaskowski', 'hołownia', 'bosak', 'kosiniak-kamysz',
     'biedroń', 'żółtek', 'jakubiak', 'tanajno', 'witkowski', 'piotrowski'],
    ['#df0000', '#009bdf', '#eca900', '#0d2e87', '07b895',
     '#5704f1', '#841fc2', '#192966', '#ed6ec8', '#f15a04', '#f1044d']))

CITIES_TERYT_CODES = list(set(pop_df['teryt_code']) - set(pop_df[pop_df['area_type'] == 'rural']['teryt_code']))

In [None]:
votes1_df = pd.read_csv('/kaggle/input/poland-presidential-elections-2020-results/results_provinces_round1.csv', sep=';', low_memory=False)

votes1_df.columns = TRAD_COLS[:-1] + votes1_df.columns[len(TRAD_COLS)-1:-1].str.split().str[-1].str.lower().tolist() + TRAD_COLS[-1:]
votes1_df['powiat'] = votes1_df['powiat'].str.lower().str.strip()
votes1_df['teryt_code'] = votes1_df['teryt_code'].replace({146501: 146500})
votes1_df['is_city'] = votes1_df['teryt_code'].isin(CITIES_TERYT_CODES)
votes1_df = votes1_df.set_index('teryt_code')

votes1_df['turnout'] = votes1_df['n_cards_in_ballot_box'] / votes1_df['n_entitled_voters']

all_candidates = list(CANDIDATES_COLORS.keys())
others = [c for c in all_candidates if c not in ['duda', 'trzaskowski']]
votes1_df['others'] = votes1_df[others].sum(axis=1)

votes1_df[[f'{c}_perc' for c in all_candidates]] = (
    votes1_df[all_candidates]
    .apply(lambda row: row / row.sum(), axis=1))
votes1_df['others_perc'] = votes1_df[others].sum(axis=1) / votes1_df[all_candidates].sum(axis=1)

votes1_df['winner'] = votes1_df[all_candidates].apply(lambda row: row.idxmax(), axis=1)
votes1_df['duda_trzaskowski_diff'] = votes1_df['duda'] / votes1_df[['duda', 'trzaskowski']].sum(axis=1) - 0.5

votes1_df['by_proxy_perc'] = votes1_df['n_voters_by_proxy'] / votes1_df['n_valid_votes']
votes1_df['by_post_perc'] = votes1_df['n_voters_issued_voting_pack'] / votes1_df['n_entitled_voters']
votes1_df['post_efficiency'] = votes1_df['n_received_envelopes'] / votes1_df['n_voters_issued_voting_pack']

votes1_df_l = votes1_df[~votes1_df['powiat'].isin(['statki', 'zagranica'])]

# The Context

## The Generals: Meet the Candidates

![](https://cdn.galleries.smcloud.net/t/galleries/gf-4V5b-ddzH-1iFk_wybory-2020-kandydaci-664x442-nocrop.jpg)

These are the 11 presidential candidates:
- **Robert Biedroń** <br>
  the candidate of the Left (*Lewica*). Currently a Member of the European Parliament, former mayor of Słupsk and Poland′s first openly gay mayor. Robert Biedroń is 44 years old, has higher political science education and is an LGBT activist. <br>Key points: minimum pension PLN 1600 net, 7.2% of GDP for healthcare, one million cheap apartments to rent.
  <img src='https://i.iplsc.com/kandydat-lewicy-w-wyborach-prezydenckich-lider-wiosny-robert/000A7DKG210WMF7R-C123-F4.webp' width='400'>

- **Krzysztof Bosak** <br>
  the candidate of the Confederation (*Konfederacja), the nationalistic far-right party. 38-year-old, currently a member of the Polish Parliament. Bosak has a secondary education. <br>Key points: "economic freedom" (i.e. entrepreneur has complete freedom in the selection of associates, contractors, employees and clients), radical simplification of economic law and national ownership of natural resources.
  <img src='https://i.iplsc.com/kandydat-konfederacji-na-prezydenta-rp-krzysztof-bosak/000A7DKQO4GXCXQT-C123-F4.webp' width='400'>

- **Andrzej Duda** <br>
  the current President of the Republic of Poland, technically a non-party candidate, practically a member of the conservative and populist party Law and Justice (*Prawo i Sprawiedliwość*). 48 years old, higher legal education. <br>Key points: maintaining the "500+" program and other social programs, introducing a solidarity allowance for people who have lost their jobs as a result of the Coronavirus pandemic; ban on privatization of health care, investments for the war against smog.
  <img src='https://i.iplsc.com/prezydent-rp-andrzej-duda/000A7DL4NUAUFMVJ-C123-F4.webp' width='400'>

- **Szymon Hołownia** <br>
  a non-party candidate. 43-year-old publicist, secondary education. <br>Key points: Poland without coal by 2050, joining the Green Deal project, preserving the abortion compromise.
  <img src='https://i.iplsc.com/kandydat-na-prezydenta-rp-szymon-holownia/000A7DLC126Q8VR8-C123-F4.webp' width='400'>

- **Marek Jakubiak** <br>
  candidate for the Federation for the Republic. 61-years old, secondary education. <br>Key points: strengthening the role of the president, tax-free amount of 12 minimum salaries, reform of the judiciary system.
  <img src='https://i.iplsc.com/kandydat-na-prezydenta-rp-przedsiebiorca-marek-jakubiak/000A7DLI0QPFXGGX-C123-F4.webp' width='400'>

- **Władysław Kosiniak-Kamysz** <br>
  candidate of the Polish People's Party (*PSL*), a conservative and agrarian party. 38-year-old, higher medical education, currently a member of the Polish Parliament. <br>Key points: president over divisions, 7.2% of GDP for healthcare, 30% higher salaries to employees of medical facilities, 0 percent VAT on healthy food, increasing the tax-free amount to PLN 8,000.
  <img src='https://i.iplsc.com/prezes-polskiego-stronnictwa-ludowego-kandydat-w-wyborach-pr/000A7DLR4QABQQKJ-C123-F4.webp' width='400'>

- **Mirosław Piotrowski** <br>
  candidate of the True Europe Movement (*Ruchu Prawdziwa Europa*). 54 years old, higher historical education, university teacher. <br>Key points: radical tax cuts, reform of the Social Insurance Institution system, protection of human life from conception to natural death.
  <img src='https://i.iplsc.com/kandydat-w-wyborach-prezydenckich-prof-miroslaw-piotrowski/000A7DMB6W0CQMWR-C123-F4.webp' width='400'>

- **Paweł Tanajno** <br>
  a non-party candidate. 45 years old, higher education in management. <br>Key points: improving the image of Poland abroad, ensuring prosperity, low taxes and freedom, returning sovereignty to citizens.
  <img src='https://i.iplsc.com/kandydat-w-wyborach-prezydenckich-pawel-tanajno/000A7DMLNF9087AV-C123-F4.webp' width='400'>

- **Rafał Trzaskowski** <br>
  candidate of the second biggest party in Poland, Civic Coalition (*PO*, *Platforma Obywatelska*), and the current Mayor of Warsaw. 48 years old, higher education in political science, studied abroad in Oxford and Paris and speaks 5 languages. <br>Key points: equal opportunities for men and women, mantaining the "500+" program, introduction of civil unions, investments in clean air.
  <img src='https://i.iplsc.com/prezydent-warszawy-kandydat-w-wyborach-prezydenckich-rafal-t/000A7DMRIGTQ8WY2-C123-F4.webp' width='400'>

- **Waldemar Witkowski** <br>
  66 years old, university education, member of the Labor Union. <br>Key points: a seven-hour working day, environmental protection, privileged cooperation with the European Union, instead of the USA.
  <img src='https://i.iplsc.com/kandydat-na-prezydenta-rp-waldemar-witkowski/000A7DMYMS6GTP4F-C123-F4.webp' width='400'>
  
- **Stanisław Żółtek**<br> 
  candidate of the New Right Congress. 64-year-old, secondary education. Member of the Congress of the New Right and Polexit.<br>Key points: abolition of the election threshold, strengthening of the president's authority (similarly to the USA), liquidation of counties, right to possess weapons.
  <img src='https://i.iplsc.com/kandydat-na-urzad-prezydenta-rp-stanislaw-zoltek/000A7DNCPWFH2WE9-C123-F4.webp' width='400'>

## The Battlefield: Poland

Now that we know who is in for it, let's take a look at where it takes place.

In [None]:
fig = psp.make_subplots(
    rows=1, cols=2, 
    specs=[[dict(type='choropleth'), dict(type='choropleth')]],
    subplot_titles=list(map(bold, ['Registered Voters per Province'.upper(), 'Provinces Type'.upper()]))
)

fig.add_trace(
    row=1, col=1,
    trace=pgo.Choropleth(
        geojson=geojson,
        locations=votes1_df_l.index,
        featureidkey="properties.teryt_code",
        z=votes1_df['n_entitled_voters'],
        customdata=votes1_df_l[['powiat', 'voivodeship']].applymap(str.title),
        name='',
        colorscale='Reds',
        hoverinfo='text',
        hovertemplate=make_hover_text([
            ('Voivodeship', '%{customdata[1]}'),
            ('Powiat',  '%{customdata[0]}'),
            ('', ''),
            ('Entitled Voters', '%{z:.0f}')
        ]),
        marker_line_width=2.,
        colorbar=dict(x=.53, xanchor='right', len=.7, tickprefix='<b>', ticksuffix='</b>')
    )
)

data = votes1_df_l[votes1_df_l['is_city']]
fig.add_trace(
    row=1, col=2,
    trace=pgo.Choropleth(
        geojson=geojson,
        locations=data.index,
        featureidkey="properties.teryt_code",
        z=[1] * len(data),
        customdata=data[['powiat', 'voivodeship']].applymap(str.title),
        name='City',
        colorscale=[(0, '#c6c6c7',), (1, '#c6c6c7',)],
        hoverinfo='text',
        hovertemplate=make_hover_text([
            ('Voivodeship', '%{customdata[1]}'),
            ('Powiat',  '%{customdata[0]}'),
            ('', ''),
            ('Type', 'City')
        ]),
        marker_line_width=2.,
        showscale=False,
        showlegend=True
    )
)
data = votes1_df_l[~votes1_df_l['is_city']]
fig.add_trace(
    row=1, col=2,
    trace=pgo.Choropleth(
        geojson=geojson,
        locations=data.index,
        featureidkey="properties.teryt_code",
        z=[1] * len(data),
        customdata=data[['powiat', 'voivodeship']].applymap(str.title),
        name='Countryside',
        colorscale=[(0, '#59a80c',), (1, '#59a80c',)],
        hoverinfo='text',
        hovertemplate=make_hover_text([
            ('Voivodeship', '%{customdata[1]}'),
            ('Powiat',  '%{customdata[0]}'),
            ('', ''),
            ('Type', 'Countryside')
        ]),
        marker_line_width=2.,
        showscale=False,
        showlegend=True
    )
)

fig.update_layout(
    width=800,
    margin={"r":0,"t":80,"l":0,"b":80},
    legend=dict(x=.9, xanchor='left', y=1),
    geo=dict(showframe=False,
             fitbounds="locations",
             visible=False,
             projection=pgo.layout.geo.Projection(type = 'mercator')),
    geo2=dict(showframe=False,
             fitbounds="locations",
             visible=False,
             projection=pgo.layout.geo.Projection(type = 'mercator')),
)

fig

In [None]:
pie_data = (
    votes1_df_l
    .groupby('is_city')
    ['n_entitled_voters']
    .sum()
    .rename(index={False: 'Countryside', True: 'City'}))

pie_data['Abroad'] = votes1_df[votes1_df['powiat'] == 'zagranica']['n_entitled_voters'].sum()
pie_data['Ships'] = votes1_df[votes1_df['powiat'] == 'statki']['n_entitled_voters'].sum()

fig = pgo.Figure()

fig.add_trace(
    trace=pgo.Pie(
        labels=pie_data.index,
        values=pie_data.values,
        marker_colors=['#59a80c', '#c6c6c7', '#d57d01', '#0189d5'],
        marker_line_width=2,
        name='',
        texttemplate=multiline_text(bold('%{label}'), '%{percent}', '%{value}')
    )
)

fig.update_layout(
    title=make_title('Registered Voters by Area Type'))

fig

Here we can see that Poland is largely made by rural areas and that's were the battle has to be won: cities make only 1/3 of the electorate. Also, (to my surprise, I must say), Poles abroad are only 370k+ (a record, anyway) a bit more than 1% and happen to have very little impact.

In [None]:
pie_data = (
    votes1_df_l[votes1_df_l['is_city']]
    .reset_index()
    .groupby(['powiat'])
    ['n_entitled_voters']
    .sum()
    .sort_values()
    .iloc[::-1]
)

pie_data = pd.concat([
    pie_data[pie_data >= 250_000],
    pd.Series([pie_data[pie_data < 250_000].sum()], index=['others'])
])

fig = pgo.Figure()

fig.add_trace(
    pgo.Pie(
        labels=pie_data.index.str.title(),
        values=pie_data.values,
        marker_line_width=2, sort=False,
        texttemplate=multiline_text(bold('%{label}'), '%{percent}', '%{value}'),
        textposition='outside',
        showlegend=False
    )
)

fig.update_layout(
    title=make_title('Urban Electorate', yref='container', y=.98),
)

fig

Moreover, if we look into cities we can see that only 9 cities are above 250k voters and they are sort of outliers, while the rest are very small cities.

At this point we know that 75% of the voters live either in the countryside or in small cities. It is pretty clear that that's were one must harvest most votes. Controlling the countryside really means controlling the whole country.

# Round 1

## Did I Miss Something? Winners and Losers

The first round has already come to an end. Let's now look at the winners and losers.

In [None]:
fig = psp.make_subplots(
    rows=2, cols=7, 
    specs=[[dict(type='domain', rowspan=2), dict(type='domain', colspan=2), None, dict(type='domain', colspan=2), None, dict(type='domain', colspan=2), None],
           [None, None, dict(type='domain', colspan=2), None, dict(type='domain', colspan=2), None, None]],
    horizontal_spacing=0.005, vertical_spacing=.05,
    column_widths=[.75,1/6,1/6,1/6,1/6,1/6,1/6],
    subplot_titles=list(map(bold, ['Overall', 'Big Cities', 'Cities', 'Countryside', 'Abroad', 'Ships'])))

pie_data = (
    votes1_df
    [all_candidates].sum()
)

fig.add_trace(
    row=1, col=1,
    trace=pgo.Pie(
        labels=pie_data.index.str.title(),
        values=pie_data.values,
        marker_line_width=2,
        marker_colors=[CANDIDATES_COLORS.get(c) for c in pie_data.index],
        name=''
    )
)

pie_data = (
    votes1_df
    [votes1_df['is_city'] & votes1_df['n_entitled_voters'].ge(250_000)]
    [all_candidates].sum()
)

fig.add_trace(
    row=1, col=2,
    trace=pgo.Pie(
        labels=pie_data.index.str.title(),
        values=pie_data.values,
        marker_line_width=2,
        marker_colors=[CANDIDATES_COLORS.get(c) for c in pie_data.index],
        name='',
        textposition='inside',
    )
)

pie_data = (
    votes1_df
    [votes1_df['is_city'] & votes1_df['n_entitled_voters'].lt(250_000)]
    [all_candidates].sum()
)

fig.add_trace(
    row=1, col=4,
    trace=pgo.Pie(
        labels=pie_data.index.str.title(),
        values=pie_data.values,
        marker_line_width=2,
        marker_colors=[CANDIDATES_COLORS.get(c) for c in pie_data.index],
        name='',
        textposition='inside',
        domain=dict(row=2, column=1)
    )
)

pie_data = (
    votes1_df
    [~votes1_df['is_city'] & ~votes1_df['powiat'].isin(['zagranica', 'statki'])]
    [all_candidates].sum()
)

fig.add_trace(
    row=1, col=6,
    trace=pgo.Pie(
        labels=pie_data.index.str.title(),
        values=pie_data.values,
        marker_line_width=2,
        marker_colors=[CANDIDATES_COLORS.get(c) for c in pie_data.index],
        name='',
        textposition='inside',
        domain=dict(row=2, column=1)
    )
)

pie_data = (
    votes1_df
    [votes1_df['powiat'].isin(['zagranica'])]
    [all_candidates].sum()
)

fig.add_trace(
    row=2, col=3,
    trace=pgo.Pie(
        labels=pie_data.index.str.title(),
        values=pie_data.values,
        marker_line_width=2,
        marker_colors=[CANDIDATES_COLORS.get(c) for c in pie_data.index],
        name='',
        textposition='inside',
        domain=dict(row=2, column=1)
    )
)

pie_data = (
    votes1_df
    [votes1_df['powiat'].isin(['statki'])]
    [all_candidates].sum()
)

fig.add_trace(
    row=2, col=5,
    trace=pgo.Pie(
        labels=pie_data.index.str.title(),
        values=pie_data.values,
        marker_line_width=2,
        marker_colors=[CANDIDATES_COLORS.get(c) for c in pie_data.index],
        name='',
        textposition='inside',
        domain=dict(row=2, column=1)
    )
)

fig.update_layout(
    width=900, height=600,
    title=make_title('Votes Repartition'),
    margin=dict(b=120, t=120)
)

fig

So the winner really is Andzrej Duda with 43.5% of the votes, followed by Trzaskowski with 30.5%. Quite a defeat.

However, what really strikes about these plots is:
- firstly, it seems that Duda consensus drops as the size of the province/city increases: he goes from a stunning 49.5% in the countryside, to a fair 37.1% in the smaller cities and to a very meager 28.8% in the larger cities (with more than 250k voters); the very opposite can be said for Trzaskowski that gets a larger and larger share of votes as we move from countryside to the city.
- secondly, although Trzaskowski crushed Duda in the bigger cities, abroad and on the ships, took a draw in the cities and only succumbed in the countryside, he stil lost overall by almost 15%. This is a really brutal reminder that to win the war you certainly must win the battle in the countryside given the size of it.
- while Trzaskowski is the man of the cities and Duda is the man of the countryside, the third and the fourth major candidates Szymon Hołownia and Krzysztof Bosak are everyman's men, keeping a steady 14-17% and 6-8% in all the areas, respectively.
- also, the behavior of the Poles abroad (300k people) is really in line with the behaviour of a big city. We could really look at it as one big city outside Poland's borders. On the other hand, ships have a totally unique behavior and really look as they live on another... surface.

More in general, we can check if and how candidates' support is related to the province size.

## Two Very Different Battlefields: Cities vs Countryside

In [None]:
fig = pgo.Figure()

order = ['duda', 'bosak', 'kosiniak-kamysz', 'jakubiak', 'piotrowski',
         'trzaskowski', 'hołownia', 'biedroń', 'tanajno', 'witkowski',  'żółtek']
heatmap_df = (
    votes1_df_l[[f'{c}_perc' for c in all_candidates] + ['n_entitled_voters']]
    .corr()
    .loc[:, ['n_entitled_voters']]
    .iloc[:-1, :]
    .sort_values('n_entitled_voters')
)

fig.add_trace(
    pgo.Heatmap(
        z=heatmap_df.values,
        zmin=-1, zmax=1,
        x=[''],
        y=heatmap_df.index.str[:-5].str.title(),
        colorscale='RdYlGn',
        colorbar=dict(outlinewidth=2, tickprefix='<b>', ticksuffix='</b>',
                      tickmode='array', tickvals=[1, 0, -1], 
                      ticktext=list(map(bold, ['1 - Big Cities', '0 - Both Types', '-1 - Rural Areas'])),
                      tickangle=0),
        hovertemplate=multiline_text(
            bold('%{y}') + ': %{z:.3f}'
        ),
        xgap=3,ygap=3,
        name='',
    )
)

fig.update_layout(
    width=700,
    height=700,
    autosize=True,
    margin=dict(l=150, r=300, b=100),
    title=make_title('VOTES SHARE vs PROVINCE SIZE'),
    annotations=[*[
        dict(
            x=c,
            y=r,
            text=round(v,2),
            showarrow=False,
            font_color='#ffffff' if abs(v) > .5 else '#000000',
            font_size=10
        )
        for r, row in enumerate(heatmap_df.values)
        for c, v in enumerate(row)
    ], *[
        dict(x=.5, y=1.1, xref='paper', yref='paper', showarrow=False,
             text=multiline_text(
                 'How candidates\' results correlate to the province size?',
                 'Does a candidate have more success in the cities or in the rural areas?'
             ),
             font_size=15),
    ]]
)

fig

Cheching the correlation between candidates' success and size of the province (by number of registered voters) we confirm our feelings:
- Duda has a negative correlation, saying it gets better results in smaller, rural, areas; not surprisingly Kosiniak-Kamisz has the same fate, being also the leader of an agrarian party (PSL);
- Trzaskoswski, on the other hand has a the same but positive correlation with the province size, suggesting he gets better results in largely populated areas;
- interestingly, Biedroń has more success in larger centers, too: a possible reason for this is his sexual orientation. Being openly gay, it seems he finds more supports from bigger centers that have a more open mind and less prejudice, while in the countriside (where many people still believe that homosexuality is an illness) he faces more resistance;
- similarly to Biedroń, also Witkowski, due to his very progressive views (e.g. adoption for LGBT couples), finds more support in larger areas with possibly more open minds;
- as we've seen earlier, since Hołownia and Bosak seem to have a steady support accross all types of areas, they have a correlation very close to 0.

Still, it must be noted that having almost the same support in all area types does not mean having the same support throughout all the country. A candidate can still get 15% of all the votes in the cities but maybe thanks to a big win in only a couple of cities while taking almost no votes in the rest. Supporters can be mostly located in specific or strategic regions and provinces and the distribution accross provinces can be very skewed, still. So, let's now have a look at how candidates' success is actually spread across Poland.

## Another Battle Inside the Battle: North-West vs South-East

In [None]:
ncols=4
nrows=int(np.ceil(len(CANDIDATES_COLORS) / ncols))

fig = psp.make_subplots(
    rows=nrows, cols=ncols, 
    specs=[[dict(type='choropleth') for _ in range(ncols)] for _ in range(nrows)],
    horizontal_spacing=0, vertical_spacing=0)

fig.add_traces(
    rows=[(i // ncols) + 1 for i in range(len(CANDIDATES_COLORS))], 
    cols=[i % ncols + 1 for i in range(len(CANDIDATES_COLORS))], 
    data=[
        pgo.Choropleth(
            geojson=geojson,
            locations=votes1_df_l.index,
            featureidkey="properties.teryt_code",
            z=votes1_df_l[f'{candidate}_perc'] * 100,
            customdata=votes1_df_l[['powiat', 'voivodeship']].applymap(str.title),
            name='',
            colorscale=make_simple_colorscale(color),
            colorbar=dict(
                x=1/ncols*.85 + 1/ncols * (i % ncols), 
                y=1/nrows * (nrows - (i // ncols)) - 1/(2*nrows),
                yanchor='middle',
                len=1/nrows*.75, 
                thickness=20,
                outlinewidth=2, tickprefix='<b>', ticksuffix='%</b>'),
            hoverinfo='text',
            hovertemplate=make_hover_text([
                ('Voivodeship', '%{customdata[1]}'),
                ('Powiat',  '%{customdata[0]}'),
                ('', ''),
                ('Voters', '%{z:.1f}%')
            ]),
            marker_line_width=1.5
        )
        for i, (candidate, color) in enumerate(CANDIDATES_COLORS.items())
    ])

fig.update_geos(
    fitbounds="locations",
    visible=False,)

ratio=5/6
subplot_width=1300 // ncols
subplot_height=int(subplot_width*ratio)
fig.update_layout(
    width=subplot_width * ncols,
    height=subplot_height * nrows,
    margin={"r":0,"t":50,"l":0,"b":0},
    **{f'geo{i}': dict(showframe=False, projection=pgo.layout.geo.Projection(type = 'mercator'))
       for i in range(1, len(CANDIDATES_COLORS) +1)})
fig.update_layout(
    title=make_title('Candidates\' Votes Map'),
    annotations=[*[
        dict(x=1/ncols*.80 + 1/ncols * (i % ncols), 
             y=1/nrows * (nrows - (i // ncols)), 
             xanchor='right',
             yanchor='top',
             text=f'<b>{candidate.title()}</b>', 
             font_size=18,
             showarrow=False)
        for i, candidate in enumerate(CANDIDATES_COLORS.keys())
    ], dict(
        x=.5, y=1.035, xref='paper', yref='paper', 
        showarrow=False,
        text='How is candidates\' success distributed over the territory?',
         font_size=15
    )])

fig

What emerges from these maps is a real clash of clans. It is evident that there is a pretty strong contrast between two sets of candidates that dominate two different and very clear areas:
- on one side we have Duda and Bosak, together with Kosiniak-Kamysz, Zoltek, Jakubiak and Piotrowski, mainly representatives of right and far-right, populist parties in Poland, getting most of their support from the cities and the provinces in the East and South-East of the Country;
- on the other one Trzaskowski, Holownia and Biedron, together with Tanajno and Witkowski, representing the progressive right, center and left wings, respectively, taking on the West and North-West areas, plus the cities in the East (it is pretty evident in Warsaw);

To confirm our feelings we can also check the correlation between candidates' results. Does the success of a candidate come together with the success of another candidate?

In [None]:
fig = pgo.Figure()

order = ['duda', 'bosak', 'kosiniak-kamysz', 'żółtek', 'jakubiak', 'piotrowski',
         'trzaskowski', 'hołownia', 'biedroń', 'tanajno', 'witkowski']
heatmap_df = (
    votes1_df_l[[f'{p}_perc' for p in ['duda', 'trzaskowski'] + others]]
    .corr()
    .loc[[f'{p}_perc' for p in order], [f'{p}_perc' for p in order]]
    .iloc[::-1]
)

fig.add_trace(
    pgo.Heatmap(
        z=heatmap_df.values,
        zmin=-1, zmax=1,
        x=heatmap_df.columns.str[:-5].str.title(),
        y=heatmap_df.index.str[:-5].str.title(),
        colorscale='RdYlGn',
        colorbar=dict(outlinewidth=2, tickprefix='<b>', ticksuffix='</b>',
                      tickmode='array', tickvals=[1, 0, -1], 
                      tickangle=90,
                      ticktext=['                  <b>Agreement</b>', 
                                '<b>Indifference</b>', 
                                '<b>Contrast</b>             ']),
        hovertemplate=multiline_text(
            bold('%{x}') + ' vs ' + bold('%{y}'),
            bold('Correlation') + ': %{z}'
        ),
        xgap=3,ygap=3,
        name='',
    )
)

fig.update_layout(
    width=700,
    height=700,
    autosize=True,
    margin=dict(l=150, b=100),
    title=make_title('Candidates Results Correlation'),
    annotations=[*[
        dict(
            x=c,
            y=r,
            text=round(v,2),
            showarrow=False,
            font_color='#ffffff' if abs(v) > .5 else '#000000',
            font_size=10
        )
        for r, row in enumerate(heatmap_df.values)
        for c, v in enumerate(row)
    ], *[
        dict(x=.5, y=1.1, xref='paper', yref='paper', showarrow=False,
             text=multiline_text(
                 'How candidates\'s results in the different powiats relate to one another?',
                 'Does the success of a candidate imply the defeat of another one?'
             ),
             font_size=15),
    ]]
)

fig

Our guess was pretty accurate: the correlation matrix highlights even more the presence of a strong regional duality between two areas in the country: the progressiv East and the conservative West. Interestingly, the only candidate who according to numbers does not seem to have a strong regional support is Zoltek.

## The War is not Over Yet: What's Next?

So what's next? Well, none of the candidate could get more than the 50%, so a second round between the two most-voted candidates is required.
At this point it seems we can make a guess about how people will vote in the second round: we have Duda & Co. vs Trzaskowski & Co., plus the enygmatic Mr. Żółtek. Let's see what comes out.

In [None]:
group_a = ['duda', 'bosak', 'kosiniak-kamysz', 'jakubiak', 'piotrowski']
group_b = ['trzaskowski', 'hołownia', 'biedroń', 'tanajno', 'witkowski']
group_c =['żółtek']

labels = [*[f'{g[0]} & Co.'.title() for g in [group_a, group_b, group_c]], 
          *[v.title() for g in [group_a, group_b, group_c] for v in g]]
parents = ["", "", "", *[f'{g[0]} & Co.'.title() for g in [group_a, group_b, group_c] for _ in range(len(g))]]
values = [*[votes1_df[g].sum().sum() for g in [group_a, group_b, group_c]], 
          *[votes1_df[c].sum().sum() for g in [group_a, group_b, group_c] for c in g]]

fig = pgo.Figure()

fig.add_trace(
    pgo.Sunburst(
        labels=labels,
        parents=parents,
        values=values,
        branchvalues="total",
        marker_colors=[CANDIDATES_COLORS.get(c) for c in ['duda', 'trzaskowski', 'żółtek'] + group_a + group_b + group_c],
        texttemplate='<b>%{label}</b><br>%{percentEntry}',
        marker_line_width=3, marker_line_color='#444',
        leaf_opacity=1
    )
)

fig.update_layout(
    title=make_title('Round 2 Results Guess'))

fig

Here we have it. It seems Duda (theoretical) coalition is stronger than Trzaskowski's and will win again in Round 2. Even leaving Zoltek as a third random variable, it is clear that it has no influence whatsoever on the results.