# Processing of Party Text Programs to get Word Usage Count

## Import the relevant libraries

In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict

In [2]:
#Partidos = ["BE", "PAN", "PCP", "L", "CH", "PS", "IL", "PEV", "AD"]

In [3]:
ToExclude = pd.read_csv('2Exclude.txt')
ToExclude

Unnamed: 0,2Exclude
0,bloco
1,pp
2,vii
3,três
4,seis
...,...
172,t r
173,v capítulo
174,xi
175,projecto lei


## Picks-up data from Phase 1

In [4]:
ProcessedPast = pd.read_csv('Processed AD+BE+CH+IL+L+PAN+PCP+PS.txt')
FullWordList = ProcessedPast
ProcessedPast

Unnamed: 0.2,Unnamed: 0,Word,Partido,Q_Words,Count,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,Unnamed: 0.1.1.1.1,Unnamed: 0.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1.1.1
0,0,cuidar,PAN,1,1,,,,,,,,,
1,1,pessoas,PAN,1,1,,,,,,,,,
2,2,cuidar pessoas,PAN,2,1,,,,,,,,,
3,3,defender,PAN,1,1,,,,,,,,,
4,4,animais,PAN,1,1,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
721307,671571,mundo mais justo,PCP,3,1,598265.0,602116.0,538404.0,534553.0,538459.0,349403.0,273043.0,164802.0,164802.0
721308,671572,mais justo pacífico,PCP,3,1,598266.0,602117.0,538405.0,534554.0,538460.0,349404.0,273044.0,164803.0,164803.0
721309,671573,justo pacífico desenvolvido,PCP,3,1,598267.0,602118.0,538406.0,534555.0,538461.0,349405.0,273045.0,164804.0,164804.0
721310,671574,pacífico desenvolvido sustentável,PCP,3,1,598268.0,602119.0,538407.0,534556.0,538462.0,349406.0,273046.0,164805.0,164805.0


In [5]:
# Code to concatenate previous processing (as that is very slow execution pre-processing may by done in chunks)
#ProcessedPast = pd.read_csv('Processed AD+BE+CH+IL+L+PCP.txt')
#FullWordList = pd.concat([ProcessedInput,ProcessedPast])
#FullWordList.to_csv('Processed AD+BE+CH+IL+L+PAN+PCP+PS.txt', index=True, mode='w', header=True, encoding='utf-8-sig')
#FullWordList


## Calculate Party Program word size

In [6]:
PartidoStats = FullWordList[FullWordList.Q_Words==1.0].groupby(['Partido']).count()
PartidoStats.rename(columns={"Q_Words": "Count_Words"}, inplace=True)
PartidoStats.drop('Word', axis = 1, inplace=True)
PartidoStats.drop('Count', axis = 1, inplace=True)
PartidoStats

Unnamed: 0_level_0,Unnamed: 0,Count_Words,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,Unnamed: 0.1.1.1.1,Unnamed: 0.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1.1.1
Partido,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AD,22923,22923,22923,22923,22923,0,0,0,0,0,0
BE,36826,36826,36826,36826,36826,36826,36826,36826,36826,36826,36826
CH,32017,32017,32017,32017,32017,32017,32017,32017,0,0,0
IL,38357,38357,38357,38357,38357,38357,38357,38357,38357,38357,0
L,33815,33815,33815,33815,33815,33815,33815,33815,0,0,0
PAN,17323,17323,0,0,0,0,0,0,0,0,0
PCP,22598,22598,22598,22598,22598,22598,22598,22598,22598,22598,22598
PS,25760,25760,25760,0,0,0,0,0,0,0,0


## Synonyms are use to consolidate words with same "meaning"

In [7]:
SynonymsCustom = pd.read_csv('Synonyms Custom.txt')
SynonymsDictionary = pd.read_csv('Synonyms Dictionary.txt')

SynonymsDictionaryFiltered = SynonymsDictionary.loc[SynonymsDictionary['Word2']!=""]
SynonymsDictionaryFiltered = SynonymsDictionary.loc[SynonymsDictionaryFiltered['Word2'].notnull()]
SynonymsDictionaryFiltered = SynonymsDictionaryFiltered.loc[SynonymsDictionaryFiltered['Word2']!=SynonymsDictionaryFiltered["Word"]]
Synonyms = SynonymsCustom.append(SynonymsDictionaryFiltered, ignore_index=True)
Synonyms

Unnamed: 0,Word,Word2
0,€,euros
1,união europeia,ue
2,acção social,ação social
3,através do,através de
4,aumento da,aumento de
...,...,...
9564,vossa,vosso
9565,votou,votar
9566,xenófobos,xenófobo
9567,áridas,árido


In [8]:
WordList = FullWordList.merge(Synonyms, on=['Word'], how='left', indicator=True)
WordList

Unnamed: 0.2,Unnamed: 0,Word,Partido,Q_Words,Count,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,Unnamed: 0.1.1.1.1,Unnamed: 0.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1.1.1,Word2,_merge
0,0,cuidar,PAN,1,1,,,,,,,,,,,left_only
1,1,pessoas,PAN,1,1,,,,,,,,,,pessoa,both
2,2,cuidar pessoas,PAN,2,1,,,,,,,,,,,left_only
3,3,defender,PAN,1,1,,,,,,,,,,,left_only
4,4,animais,PAN,1,1,,,,,,,,,,animal,both
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
721334,671571,mundo mais justo,PCP,3,1,598265.0,602116.0,538404.0,534553.0,538459.0,349403.0,273043.0,164802.0,164802.0,,left_only
721335,671572,mais justo pacífico,PCP,3,1,598266.0,602117.0,538405.0,534554.0,538460.0,349404.0,273044.0,164803.0,164803.0,,left_only
721336,671573,justo pacífico desenvolvido,PCP,3,1,598267.0,602118.0,538406.0,534555.0,538461.0,349405.0,273045.0,164804.0,164804.0,,left_only
721337,671574,pacífico desenvolvido sustentável,PCP,3,1,598268.0,602119.0,538407.0,534556.0,538462.0,349406.0,273046.0,164805.0,164805.0,,left_only


In [9]:
def GetSynonym(a, b):
    if pd.isna(b):
        return a
    else:
        return b

In [10]:
WordList['Word'] = WordList.apply(lambda x: GetSynonym(x['Word'], x['Word2']), axis = 1)
WordList.drop('_merge', axis = 1, inplace=True)
WordList.drop('Word2', axis = 1, inplace=True)
WordList.head(20)

Unnamed: 0.2,Unnamed: 0,Word,Partido,Q_Words,Count,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,Unnamed: 0.1.1.1.1,Unnamed: 0.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1.1.1
0,0,cuidar,PAN,1,1,,,,,,,,,
1,1,pessoa,PAN,1,1,,,,,,,,,
2,2,cuidar pessoas,PAN,2,1,,,,,,,,,
3,3,defender,PAN,1,1,,,,,,,,,
4,4,animal,PAN,1,1,,,,,,,,,
5,5,defender animais,PAN,2,1,,,,,,,,,
6,6,proteger,PAN,1,1,,,,,,,,,
7,7,natureza,PAN,1,1,,,,,,,,,
8,8,proteger natureza,PAN,2,1,,,,,,,,,
9,9,índice,PAN,1,1,,,,,,,,,


In [11]:
WordListCount = WordList.groupby(['Partido', 'Word'])['Count'].sum().reset_index(name="Count")
WordListMin = WordList.groupby(['Partido', 'Word'])['Q_Words'].min().reset_index(name="Q_Words")
WordListCount = WordListCount.merge(WordListMin, on=['Partido', 'Word'], how='left', indicator=True)
WordListCount.drop('_merge', axis = 1, inplace=True)
WordListCount

Unnamed: 0,Partido,Word,Count,Q_Words
0,AD,PRR,113,3
1,AD,#NAME?,9,1
2,AD,% pib,1,2
3,AD,% pib alargamento,1,3
4,AD,% produto,1,2
...,...,...,...,...
488374,PS,€ saldo positivo,1,3
488375,PS,€ sede,1,2
488376,PS,€ sede irs,1,3
488377,PS,€ serviço,1,2


## Criteria for word down-selection - density and min count

In [12]:
#Data = [[1.0,int(4000), int(225), 1],[2.0,int(1650), int(45), 2],[3.0,int(1250), int(25),4],[4.0,int(900), int(15),7]]
Data = [[1.0,int(4000), int(10), 1],[2.0,int(525), int(10), 2],[3.0,int(30), int(8),7],[4.0,int(100), int(5),5]]
WordCriteria = pd.DataFrame(Data, columns = ['Q_Words', 'Min_Density', 'Min_Count', 'Weight'])
WordCriteria

Unnamed: 0,Q_Words,Min_Density,Min_Count,Weight
0,1.0,4000,10,1
1,2.0,525,10,2
2,3.0,30,8,7
3,4.0,100,5,5


In [13]:
WordListDensity = WordListCount.merge(PartidoStats, on=['Partido'], how='left', indicator=True)
WordListDensity = WordListDensity.merge(WordCriteria, on=['Q_Words'], how='left')
WordListDensity['Density'] = WordListDensity.apply(lambda x: x['Count']*1000000/x['Count_Words'], axis = 1)
#WordListDensity

In [14]:
SelectedWordsPartido = WordListDensity[(WordListDensity['Density'] >  WordListDensity['Min_Density']) & (WordListDensity['Count'] >  WordListDensity['Min_Count'])]
SelectedWordsPartido.sort_values(by=['Density'],ascending=False)

Unnamed: 0.2,Partido,Word,Count,Q_Words,Unnamed: 0,Count_Words,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,Unnamed: 0.1.1.1.1,Unnamed: 0.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1.1.1,_merge,Min_Density,Min_Count,Weight,Density
457,AD,R,338,1,22923,22923,22923,22923,22923,0,0,0,0,0,0,both,4000,10,1,14745.015923
238419,IL,mais,543,1,38357,38357,38357,38357,38357,38357,38357,38357,38357,38357,0,both,4000,10,1,14156.477305
426575,PCP,público,287,1,22598,22598,22598,22598,22598,22598,22598,22598,22598,22598,22598,both,4000,10,1,12700.238959
284712,IL,é,477,1,38357,38357,38357,38357,38357,38357,38357,38357,38357,38357,0,both,4000,10,1,12435.800506
270756,IL,ser,454,1,38357,38357,38357,38357,38357,38357,38357,38357,38357,38357,0,both,4000,10,1,11836.170712
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415002,PCP,micro pequenas médias empresas,6,4,22598,22598,22598,22598,22598,22598,22598,22598,22598,22598,22598,both,100,5,5,265.510222
258974,IL,profissionais liberais trabalhadores independe...,10,4,38357,38357,38357,38357,38357,38357,38357,38357,38357,38357,0,both,100,5,5,260.708606
83577,BE,inquérito condições vida,9,3,36826,36826,36826,36826,36826,36826,36826,36826,36826,36826,36826,both,30,8,7,244.392549
233078,IL,instituições ensino superior,9,3,38357,38357,38357,38357,38357,38357,38357,38357,38357,38357,0,both,30,8,7,234.637745


In [15]:
ToExport = SelectedWordsPartido.copy()
#ToExport.drop('Q_Words', axis = 1, inplace=True)
ToExport['Weight_Count'] = ToExport['Count'] * ToExport['Weight']
ToExport['Weight_Density'] = ToExport['Density'] * ToExport['Weight']
#ToExport.drop('Count_Words', axis = 1, inplace=True)
ToExport.drop('_merge', axis = 1, inplace=True)
ToExport.drop('Unnamed: 0', axis = 1, inplace=True)
ToExport.drop('Unnamed: 0.1', axis = 1, inplace=True)
ToExport.drop('Min_Density', axis = 1, inplace=True)
ToExport.drop('Min_Count', axis = 1, inplace=True)
ToExport.sort_values(by=['Density'],ascending=False)

Unnamed: 0,Partido,Word,Count,Q_Words,Count_Words,Unnamed: 0.1.1,Unnamed: 0.1.1.1,Unnamed: 0.1.1.1.1,Unnamed: 0.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1.1.1,Weight,Density,Weight_Count,Weight_Density
457,AD,R,338,1,22923,22923,22923,0,0,0,0,0,0,1,14745.015923,338,14745.015923
238419,IL,mais,543,1,38357,38357,38357,38357,38357,38357,38357,38357,0,1,14156.477305,543,14156.477305
426575,PCP,público,287,1,22598,22598,22598,22598,22598,22598,22598,22598,22598,1,12700.238959,287,12700.238959
284712,IL,é,477,1,38357,38357,38357,38357,38357,38357,38357,38357,0,1,12435.800506,477,12435.800506
270756,IL,ser,454,1,38357,38357,38357,38357,38357,38357,38357,38357,0,1,11836.170712,454,11836.170712
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415002,PCP,micro pequenas médias empresas,6,4,22598,22598,22598,22598,22598,22598,22598,22598,22598,5,265.510222,30,1327.551111
258974,IL,profissionais liberais trabalhadores independe...,10,4,38357,38357,38357,38357,38357,38357,38357,38357,0,5,260.708606,50,1303.543030
83577,BE,inquérito condições vida,9,3,36826,36826,36826,36826,36826,36826,36826,36826,36826,7,244.392549,63,1710.747841
233078,IL,instituições ensino superior,9,3,38357,38357,38357,38357,38357,38357,38357,38357,0,7,234.637745,63,1642.464218


Basic count to check if parties are reasonable represented on selected words

In [16]:
ToExport.groupby(['Partido','Q_Words'])['Word'].count()

Partido  Q_Words
AD       1          16
         2          20
         3           4
BE       1          14
         2          10
         3           4
         4           1
CH       1          16
         2          32
         3          17
IL       1          15
         2          16
         3           5
         4           1
L        1          16
         2          22
         3           6
PAN      1          19
         2          15
         3           3
PCP      1          22
         2          28
         3           2
         4           2
PS       1          16
         2          19
         3           2
Name: Word, dtype: int64

In [17]:
ToExport.to_csv('Word Density by Partido.txt', index=True, mode='w', header=True, encoding='utf-8-sig')

Another Nice Place to Restart - last minute update to word exclusion

In [18]:
ToExclude = pd.read_csv('2Exclude.txt')
ToExport = pd.read_csv('Word Density by Partido.txt')

In [19]:
SelectedWords = ToExport.groupby(['Word'])['Word'].count().reset_index(name="Count")
SelectedWords.drop('Count', axis = 1, inplace=True)
SelectedWords = SelectedWords[~SelectedWords['Word'].isin(ToExclude['2Exclude'])]
SelectedWords

Unnamed: 0,Word
0,PRR
7,administração pública
9,alterações climáticas
10,animais companhia
11,animal
...,...
189,é
190,é fundamental
191,é necessário
192,é preciso


In [20]:
ToExport2 = SelectedWords.merge(WordListDensity, on=['Word'], how='left', indicator=False)
PartidoCount = ToExport2.groupby(['Partido'])['Partido'].count().reset_index(name="Count_Different")
ToExport2 = PartidoCount.merge(ToExport2, on=['Partido'], how='left', indicator=False)
ToExport2

Unnamed: 0.2,Partido,Count_Different,Word,Count,Q_Words,Unnamed: 0,Count_Words,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,...,Unnamed: 0.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1.1,Unnamed: 0.1.1.1.1.1.1.1.1.1,_merge,Min_Density,Min_Count,Weight,Density
0,AD,120,PRR,113,3,22923,22923,22923,22923,22923,...,0,0,0,0,0,both,30,8,7,4929.546743
1,AD,120,administração pública,17,2,22923,22923,22923,22923,22923,...,0,0,0,0,0,both,525,10,2,741.613227
2,AD,120,alterações climáticas,5,2,22923,22923,22923,22923,22923,...,0,0,0,0,0,both,525,10,2,218.121537
3,AD,120,animais companhia,1,2,22923,22923,22923,22923,22923,...,0,0,0,0,0,both,525,10,2,43.624307
4,AD,120,animal,8,1,22923,22923,22923,22923,22923,...,0,0,0,0,0,both,4000,10,1,348.994460
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
997,PS,124,violência doméstica,7,2,25760,25760,25760,0,0,...,0,0,0,0,0,both,525,10,2,271.739130
998,PS,124,é,150,1,25760,25760,25760,0,0,...,0,0,0,0,0,both,4000,10,1,5822.981366
999,PS,124,é fundamental,6,2,25760,25760,25760,0,0,...,0,0,0,0,0,both,525,10,2,232.919255
1000,PS,124,é necessário,9,2,25760,25760,25760,0,0,...,0,0,0,0,0,both,525,10,2,349.378882


## Cross Pivot so that words are columns (dimensions)

In [21]:
DensityPivot = ToExport2.pivot(index=['Partido', 'Count_Words', 'Count_Different'],columns='Word', values='Density')
DensityPivot = DensityPivot.fillna(0)
DensityPivot.to_csv('Data for Clustering - Density.txt', index=True, mode='w', header=True, encoding='utf-8-sig')
DensityPivot

Unnamed: 0_level_0,Unnamed: 1_level_0,Word,PRR,administração pública,alterações climáticas,animais companhia,animal,apoio,apoios sociais,assegurar,assembleia república,associações proteção animal,...,trabalho,ue,vez mais,vida,violência doméstica,é,é fundamental,é necessário,é preciso,é preciso mudar
Partido,Count_Words,Count_Different,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
AD,22923,120,4929.546743,741.613227,218.121537,43.624307,348.99446,3751.690442,697.988919,1657.723684,174.49723,0.0,...,3402.695982,697.988919,218.121537,1526.850761,392.618767,6325.524582,392.618767,610.740304,697.988919,567.115997
BE,36826,133,0.0,896.106012,407.320915,27.154728,1004.724923,2851.246402,135.773638,624.558736,162.928366,27.154728,...,4181.828056,271.547276,353.011459,1547.819475,271.547276,9558.464129,135.773638,651.713463,678.868191,0.0
CH,32017,126,0.0,530.967923,0.0,374.800887,1374.269919,2842.24006,249.867258,3997.876128,312.334073,0.0,...,2248.805322,687.13496,562.201331,1592.90377,187.400444,8026.985664,281.100665,93.700222,93.700222,0.0
IL,38357,127,0.0,1486.039054,208.566885,26.070861,312.850327,1147.117866,52.141721,1486.039054,808.196679,0.0,...,2528.873478,1199.259588,808.196679,1772.818521,130.354303,12435.800506,625.700654,677.842376,312.850327,0.0
L,33815,131,118.290699,650.598847,621.026172,207.008724,1271.625018,4613.337276,236.581399,3312.139583,236.581399,29.572675,...,3755.729706,591.453497,147.863374,2158.805264,266.154074,3844.44773,29.572675,295.726748,118.290699,0.0
PAN,17323,117,0.0,404.087052,1096.807712,1327.714599,9755.815967,4502.684293,57.726722,6580.846274,865.900825,577.267217,...,2944.062807,1269.987877,230.906887,1385.441321,1154.534434,3001.789528,115.453443,0.0,57.726722,0.0
PCP,22598,124,44.251704,796.530666,88.503407,0.0,88.503407,4026.905036,44.251704,4026.905036,398.265333,0.0,...,7567.041331,929.285778,309.761926,4159.660147,132.755111,5354.456147,88.503407,177.006815,442.517037,0.0
PS,25760,124,0.0,1009.31677,543.478261,77.639752,465.838509,4619.565217,155.279503,4309.006211,310.559006,0.0,...,2795.031056,1086.956522,388.198758,1940.993789,271.73913,5822.981366,232.919255,349.378882,310.559006,0.0


In [22]:
CountPivot = ToExport2.pivot(index='Partido',columns='Word', values='Count')
CountPivot = CountPivot.fillna(0)
CountPivot.to_csv('Data for Clustering - Count.txt', index=True, mode='w', header=True, encoding='utf-8-sig')
CountPivot

Word,PRR,administração pública,alterações climáticas,animais companhia,animal,apoio,apoios sociais,assegurar,assembleia república,associações proteção animal,...,trabalho,ue,vez mais,vida,violência doméstica,é,é fundamental,é necessário,é preciso,é preciso mudar
Partido,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AD,113.0,17.0,5.0,1.0,8.0,86.0,16.0,38.0,4.0,0.0,...,78.0,16.0,5.0,35.0,9.0,145.0,9.0,14.0,16.0,13.0
BE,0.0,33.0,15.0,1.0,37.0,105.0,5.0,23.0,6.0,1.0,...,154.0,10.0,13.0,57.0,10.0,352.0,5.0,24.0,25.0,0.0
CH,0.0,17.0,0.0,12.0,44.0,91.0,8.0,128.0,10.0,0.0,...,72.0,22.0,18.0,51.0,6.0,257.0,9.0,3.0,3.0,0.0
IL,0.0,57.0,8.0,1.0,12.0,44.0,2.0,57.0,31.0,0.0,...,97.0,46.0,31.0,68.0,5.0,477.0,24.0,26.0,12.0,0.0
L,4.0,22.0,21.0,7.0,43.0,156.0,8.0,112.0,8.0,1.0,...,127.0,20.0,5.0,73.0,9.0,130.0,1.0,10.0,4.0,0.0
PAN,0.0,7.0,19.0,23.0,169.0,78.0,1.0,114.0,15.0,10.0,...,51.0,22.0,4.0,24.0,20.0,52.0,2.0,0.0,1.0,0.0
PCP,1.0,18.0,2.0,0.0,2.0,91.0,1.0,91.0,9.0,0.0,...,171.0,21.0,7.0,94.0,3.0,121.0,2.0,4.0,10.0,0.0
PS,0.0,26.0,14.0,2.0,12.0,119.0,4.0,111.0,8.0,0.0,...,72.0,28.0,10.0,50.0,7.0,150.0,6.0,9.0,8.0,0.0


In [23]:
FinalPivot = DensityPivot.merge(CountPivot, on=['Partido'], how='left', indicator=True)
FinalPivot.drop('_merge', axis = 1, inplace=True)
FinalPivot = FinalPivot.fillna(0)
FinalPivot

Word,PRR_x,administração pública_x,alterações climáticas_x,animais companhia_x,animal_x,apoio_x,apoios sociais_x,assegurar_x,assembleia república_x,associações proteção animal_x,...,trabalho_y,ue_y,vez mais_y,vida_y,violência doméstica_y,é_y,é fundamental_y,é necessário_y,é preciso_y,é preciso mudar_y
Partido,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AD,4929.546743,741.613227,218.121537,43.624307,348.99446,3751.690442,697.988919,1657.723684,174.49723,0.0,...,78.0,16.0,5.0,35.0,9.0,145.0,9.0,14.0,16.0,13.0
BE,0.0,896.106012,407.320915,27.154728,1004.724923,2851.246402,135.773638,624.558736,162.928366,27.154728,...,154.0,10.0,13.0,57.0,10.0,352.0,5.0,24.0,25.0,0.0
CH,0.0,530.967923,0.0,374.800887,1374.269919,2842.24006,249.867258,3997.876128,312.334073,0.0,...,72.0,22.0,18.0,51.0,6.0,257.0,9.0,3.0,3.0,0.0
IL,0.0,1486.039054,208.566885,26.070861,312.850327,1147.117866,52.141721,1486.039054,808.196679,0.0,...,97.0,46.0,31.0,68.0,5.0,477.0,24.0,26.0,12.0,0.0
L,118.290699,650.598847,621.026172,207.008724,1271.625018,4613.337276,236.581399,3312.139583,236.581399,29.572675,...,127.0,20.0,5.0,73.0,9.0,130.0,1.0,10.0,4.0,0.0
PAN,0.0,404.087052,1096.807712,1327.714599,9755.815967,4502.684293,57.726722,6580.846274,865.900825,577.267217,...,51.0,22.0,4.0,24.0,20.0,52.0,2.0,0.0,1.0,0.0
PCP,44.251704,796.530666,88.503407,0.0,88.503407,4026.905036,44.251704,4026.905036,398.265333,0.0,...,171.0,21.0,7.0,94.0,3.0,121.0,2.0,4.0,10.0,0.0
PS,0.0,1009.31677,543.478261,77.639752,465.838509,4619.565217,155.279503,4309.006211,310.559006,0.0,...,72.0,28.0,10.0,50.0,7.0,150.0,6.0,9.0,8.0,0.0


In [24]:
FinalPivot.to_csv('Data for Clustering.txt', index=True, mode='w', header=True, encoding='utf-8-sig')