# Processing of Party Text Programs to get Word Usage Count

## Import the relevant libraries

In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict

In [2]:
Partidos = np.empty(10, dtype=object)
Partidos[0]="IL"
Partidos[1]="PSD"
Partidos[2]="PCP"
Partidos[3]="Livre"
Partidos[4]="CH"
Partidos[5]="PS"
Partidos[6]="PAN"
Partidos[7]="PEV"
Partidos[8]="BE"
Partidos[9]="CDS"

In [7]:
ToExclude = pd.read_csv('2Exclude.txt')
ToExclude

Unnamed: 0,2Exclude
0,vii
1,três
2,seis
3,/
4,0.5
...,...
120,qual
121,quer
122,seja
123,tal


## Go through text and count word usage paragraph by paragraph

In [5]:
def CountSequences(words,n):
#Given a list, returns a dictionary mapping each n-element sequence tuple to its number of occurrences in the list.
# Initialize all counts implicitly to 0.
    countDict = defaultdict(int)

    for i in range(len(words)-n+1):
        key = tuple(words[i:i+n])
        countDict[key] = countDict[key] + 1
    return countDict

In [6]:
FullWordList = pd.DataFrame(columns = ['Word', 'Partido'])
ProcessedInput = pd.DataFrame(columns = ['Word', 'Partido', 'Q_Words', 'Count'])            

for Partido in range (1):
    print(Partidos[Partido]+'.txt')
    
    with open(Partidos[Partido]+'.txt', encoding='utf-8-sig') as fp:
        
        line = fp.readline()

        while line:
            
            line = line.lower()
            
            for ToExcludeWord in ToExclude['2Exclude']:
                line = line.replace(" "+ToExcludeWord+" ", " ")
                if line[0:len(ToExcludeWord)+1] == ToExcludeWord+" ":
                    line = line[len(ToExcludeWord)+1:len(line)]
                if line[len(line)-len(ToExcludeWord)-2:len(line)-1] == " "+ToExcludeWord:
                    line = line[0:len(line)-len(ToExcludeWord)-2]+"\n"
            
            line = line.strip()
            Words = line.split()

            for Group in range (3):
                ToExport = CountSequences(Words,Group+1)
                ToExportKeys = ToExport.keys()
                for i in ToExportKeys:
                    ProcessedInput = ProcessedInput.append({'Word': '  '.join(i), 
                                                            'Partido' : Partidos[Partido], 
                                                            'Q_Words' : Group+1, 
                                                            'Count': ToExport[i]},
                                                          ignore_index = True)
                    
            line = fp.readline()
        
    fp.close()
    
ProcessedInput

IL.txt


Unnamed: 0,Word,Partido,Q_Words,Count
0,portugal,IL,1,1
1,precisa,IL,1,1
2,portugal precisa,IL,2,1
3,crescer,IL,1,1
4,duas,IL,1,1
...,...,...,...,...
282837,conscientemente otimização recolha,IL,3,1
282838,otimização recolha tratamento,IL,3,1
282839,recolha tratamento reee,IL,3,1
282840,tratamento reee proteger,IL,3,1


## Save as previous process is very slow

In [7]:
ProcessedInput.to_csv('Processed IL.txt', index=True, mode='w', header=True, encoding='utf-8-sig')

## Nice place to restart run if data was saved

In [11]:
ProcessedPast = pd.read_csv('Processed CH+BE+PCP+Livre+PSD+PS+IL.txt')
FullWordList = ProcessedPast

In [12]:
# Code to concatenate previous processing (as that is very slow execution my by done in chunks)
#FullWordList = pd.concat([ProcessedInput,ProcessedPast])
#FullWordList.to_csv('Processed CH+BE+PCP+Livre+PSD+PS+IL.txt', index=True, mode='w', header=True, encoding='utf-8-sig')
#FullWordList = ProcessedPast


## Nice place to restart run if data was saved

## Calculate Party Program word size

In [13]:
PartidoStats = FullWordList[FullWordList.Q_Words==1.0].groupby(['Partido']).count()
PartidoStats.rename(columns={"Q_Words": "Count_Words"}, inplace=True)
PartidoStats.drop('Word', axis = 1, inplace=True)
PartidoStats.drop('Count', axis = 1, inplace=True)
PartidoStats

Unnamed: 0_level_0,Unnamed: 0,Count_Words,Unnamed: 0.1,Unnamed: 0.1.1
Partido,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BE,38763,38763,38763,38763
CH,2309,2309,2309,2309
IL,102905,102905,0,0
Livre,20294,20294,20294,20294
PCP,28417,28417,28417,28417
PS,32349,32349,32349,0
PSD,38165,38165,38165,0


## Synonyms are use to consolidate words with same "meaning"

In [14]:
Synonyms = pd.read_csv('Synonyms.txt')
Synonyms

Unnamed: 0,Word,Word2
0,direita direita,direita
1,familiares,família
2,familiar,família
3,devem ser,deve ser
4,últimos,último
...,...,...
157,têm,tem
158,todas,todo
159,todos,todo
160,trabalho,trabalhar


In [15]:
WordList = FullWordList.merge(Synonyms, on=['Word'], how='left', indicator=True)
WordList

Unnamed: 0.2,Unnamed: 0,Word,Partido,Q_Words,Count,Unnamed: 0.1,Unnamed: 0.1.1,Word2,_merge
0,0,portugal,IL,1,1,,,,left_only
1,1,precisa,IL,1,1,,,,left_only
2,2,portugal precisa,IL,2,1,,,,left_only
3,3,crescer,IL,1,1,,,,left_only
4,4,duas,IL,1,1,,,,left_only
...,...,...,...,...,...,...,...,...,...
729750,445928,resposta situações temporárias,Livre,3,1,247585.0,247585.0,,left_only
729751,445929,voltar,Livre,1,1,247586.0,247586.0,,left_only
729752,445930,descarregar,Livre,1,1,247587.0,247587.0,,left_only
729753,445931,programa,Livre,1,1,247588.0,247588.0,,left_only


In [16]:
def GetSynonym(a, b):
    if pd.isna(b):
        return a
    else:
        return b

In [17]:
WordList['Word'] = WordList.apply(lambda x: GetSynonym(x['Word'], x['Word2']), axis = 1)
WordList.drop('_merge', axis = 1, inplace=True)
WordList.drop('Word2', axis = 1, inplace=True)
WordList.head(20)

Unnamed: 0.2,Unnamed: 0,Word,Partido,Q_Words,Count,Unnamed: 0.1,Unnamed: 0.1.1
0,0,portugal,IL,1,1,,
1,1,precisa,IL,1,1,,
2,2,portugal precisa,IL,2,1,,
3,3,crescer,IL,1,1,,
4,4,duas,IL,1,1,,
5,5,décadas,IL,1,1,,
6,6,estagnação,IL,1,1,,
7,7,economia,IL,1,1,,
8,7,economia,IL,1,1,,
9,8,marasmo,IL,1,1,,


In [18]:
WordListCount = WordList.groupby(['Partido', 'Word'])['Count'].sum().reset_index(name="Count")
WordListMin = WordList.groupby(['Partido', 'Word'])['Q_Words'].min().reset_index(name="Q_Words")
WordListCount = WordListCount.merge(WordListMin, on=['Partido', 'Word'], how='left', indicator=True)
WordListCount.drop('_merge', axis = 1, inplace=True)
WordListCount

Unnamed: 0,Partido,Word,Count,Q_Words
0,BE,%,15,1
1,BE,% 272,1,2
2,BE,% despesas,1,2
3,BE,% despesas consumo,1,3
4,BE,% pib,1,2
...,...,...,...,...
443693,PSD,€ 50 m€,2,3
443694,PSD,€ combate,1,2
443695,PSD,€ combate à,1,3
443696,PSD,€ empresas,2,2


## Criteria for word down-selection - density and min count

In [19]:
#Data = [[1.0,int(2200), int(75), 1],[2.0,int(1200), int(15), 2],[3.0,int(800), int(6),4]]
Data = [[1.0,int(3000), int(150), 1],[2.0,int(2000), int(50), 2],[3.0,int(1500), int(25),4]]
WordCriteria = pd.DataFrame(Data, columns = ['Q_Words', 'Min_Density', 'Min_Count', 'Weight'])
WordCriteria

Unnamed: 0,Q_Words,Min_Density,Min_Count,Weight
0,1.0,3000,150,1
1,2.0,2000,50,2
2,3.0,1500,25,4


In [20]:
WordListDensity = WordListCount.merge(PartidoStats, on=['Partido'], how='left', indicator=True)
WordListDensity = WordListDensity.merge(WordCriteria, on=['Q_Words'], how='left')
WordListDensity['Density'] = WordListDensity.apply(lambda x: x['Count']*1000000/x['Count_Words'], axis = 1)
WordListDensity

Unnamed: 0.2,Partido,Word,Count,Q_Words,Unnamed: 0,Count_Words,Unnamed: 0.1,Unnamed: 0.1.1,_merge,Min_Density,Min_Count,Weight,Density
0,BE,%,15,1,38763,38763,38763,38763,both,3000,150,1,386.966953
1,BE,% 272,1,2,38763,38763,38763,38763,both,2000,50,2,25.797797
2,BE,% despesas,1,2,38763,38763,38763,38763,both,2000,50,2,25.797797
3,BE,% despesas consumo,1,3,38763,38763,38763,38763,both,1500,25,4,25.797797
4,BE,% pib,1,2,38763,38763,38763,38763,both,2000,50,2,25.797797
...,...,...,...,...,...,...,...,...,...,...,...,...,...
443693,PSD,€ 50 m€,2,3,38165,38165,38165,0,both,1500,25,4,52.404035
443694,PSD,€ combate,1,2,38165,38165,38165,0,both,2000,50,2,26.202018
443695,PSD,€ combate à,1,3,38165,38165,38165,0,both,1500,25,4,26.202018
443696,PSD,€ empresas,2,2,38165,38165,38165,0,both,2000,50,2,52.404035


In [21]:
SelectedWordsPartido = WordListDensity[(WordListDensity['Density'] >  WordListDensity['Min_Density']) | (WordListDensity['Count'] >  WordListDensity['Min_Count'])]
SelectedWordsPartido.sort_values(by=['Density'],ascending=False)

Unnamed: 0.2,Partido,Word,Count,Q_Words,Unnamed: 0,Count_Words,Unnamed: 0.1,Unnamed: 0.1.1,_merge,Min_Density,Min_Count,Weight,Density
257700,Livre,proposta,345,1,20294,20294,20294,20294,both,3000,150,1,17000.098551
72768,CH,social,37,1,2309,2309,2309,2309,both,3000,150,1,16024.252923
69414,CH,direita,36,1,2309,2309,2309,2309,both,3000,150,1,15591.165006
295382,PCP,mais,430,1,28417,28417,28417,28417,both,3000,150,1,15131.787310
69510,CH,economia,30,1,2309,2309,2309,2309,both,3000,150,1,12992.637505
...,...,...,...,...,...,...,...,...,...,...,...,...,...
75634,IL,abandono escolar,54,2,102905,102905,0,0,both,2000,50,2,524.755843
187154,IL,profissionais saúde,52,2,102905,102905,0,0,both,2000,50,2,505.320441
122127,IL,ensino técnico profissional,40,3,102905,102905,0,0,both,1500,25,4,388.708032
207021,IL,serviço nacional saúde,26,3,102905,102905,0,0,both,1500,25,4,252.660221


In [22]:
ToExport = SelectedWordsPartido.copy()
#ToExport.drop('Q_Words', axis = 1, inplace=True)
ToExport['Weight_Count'] = ToExport['Count'] * ToExport['Weight']
ToExport['Weight_Density'] = ToExport['Density'] * ToExport['Weight']
#ToExport.drop('Count_Words', axis = 1, inplace=True)
ToExport.drop('_merge', axis = 1, inplace=True)
ToExport.drop('Unnamed: 0', axis = 1, inplace=True)
ToExport.drop('Unnamed: 0.1', axis = 1, inplace=True)
ToExport.drop('Min_Density', axis = 1, inplace=True)
ToExport.drop('Min_Count', axis = 1, inplace=True)
ToExport.sort_values(by=['Density'],ascending=False)

Unnamed: 0,Partido,Word,Count,Q_Words,Count_Words,Unnamed: 0.1.1,Weight,Density,Weight_Count,Weight_Density
257700,Livre,proposta,345,1,20294,20294,1,17000.098551,345,17000.098551
72768,CH,social,37,1,2309,2309,1,16024.252923,37,16024.252923
69414,CH,direita,36,1,2309,2309,1,15591.165006,36,15591.165006
295382,PCP,mais,430,1,28417,28417,1,15131.787310,430,15131.787310
69510,CH,economia,30,1,2309,2309,1,12992.637505,30,12992.637505
...,...,...,...,...,...,...,...,...,...,...
75634,IL,abandono escolar,54,2,102905,0,2,524.755843,108,1049.511686
187154,IL,profissionais saúde,52,2,102905,0,2,505.320441,104,1010.640882
122127,IL,ensino técnico profissional,40,3,102905,0,4,388.708032,160,1554.832127
207021,IL,serviço nacional saúde,26,3,102905,0,4,252.660221,104,1010.640882


Basic count to check if parties are reasonable represented on selected words

In [23]:
ToExport.groupby(['Partido','Q_Words'])['Word'].count()

Partido  Q_Words
BE       1          30
         2           1
CH       1          45
         2           2
IL       1          86
         2          15
         3           3
Livre    1          37
         3           2
PCP      1          28
PS       1          35
         2           3
PSD      1          35
Name: Word, dtype: int64

In [24]:
ToExport.to_csv('Word Density by Partido.txt', index=True, mode='w', header=True, encoding='utf-8-sig')

In [25]:
ToExclude = pd.read_csv('2Exclude.txt')
ToExport = pd.read_csv('Word Density by Partido.txt')
ToExport

Unnamed: 0.1,Unnamed: 0,Partido,Word,Count,Q_Words,Count_Words,Unnamed: 0.1.1,Weight,Density,Weight_Count,Weight_Density
0,9278,BE,bloco,156,1,38763,38763,1,4024.456311,156,4024.456311
1,15099,BE,criar,164,1,38763,38763,1,4230.838686,164,4230.838686
2,18804,BE,deve,127,1,38763,38763,1,3276.320202,127,3276.320202
3,19554,BE,direito,235,1,38763,38763,1,6062.482264,235,6062.482264
4,21030,BE,economia,129,1,38763,38763,1,3327.915796,129,3327.915796
...,...,...,...,...,...,...,...,...,...,...,...
317,438018,PSD,tem,181,1,38165,0,1,4742.565178,181,4742.565178
318,439066,PSD,todo,149,1,38165,0,1,3904.100616,149,3904.100616
319,442155,PSD,à,332,1,38165,0,1,8699.069828,332,8699.069828
320,442712,PSD,área,119,1,38165,0,1,3118.040089,119,3118.040089


In [26]:
SelectedWords = ToExport.groupby(['Word'])['Word'].count().reset_index(name="Count")
SelectedWords.drop('Count', axis = 1, inplace=True)
SelectedWords = SelectedWords[~SelectedWords['Word'].isin(ToExclude['2Exclude'])]
SelectedWords

Unnamed: 0,Word
3,abandono escolar
4,acesso
5,administração
6,administração pública
7,ambiente
...,...
137,vida
139,área
140,é
141,é essencial


In [40]:
ToExport2 = SelectedWords.merge(WordListDensity, on=['Word'], how='left', indicator=False)
PartidoCount = ToExport2.groupby(['Partido'])['Partido'].count().reset_index(name="Count_Different")
ToExport2 = PartidoCount.merge(ToExport2, on=['Partido'], how='left', indicator=False)
ToExport2

Unnamed: 0.2,Partido,Count_Different,Word,Count,Q_Words,Unnamed: 0,Count_Words,Unnamed: 0.1,Unnamed: 0.1.1,_merge,Min_Density,Min_Count,Weight,Density
0,BE,128,abandono escolar,4,2,38763,38763,38763,38763,both,2000,50,2,103.191187
1,BE,128,acesso,101,1,38763,38763,38763,38763,both,3000,150,1,2605.577484
2,BE,128,administração,32,1,38763,38763,38763,38763,both,3000,150,1,825.529500
3,BE,128,administração pública,14,2,38763,38763,38763,38763,both,2000,50,2,361.169156
4,BE,128,ambiente,26,1,38763,38763,38763,38763,both,3000,150,1,670.742719
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
878,PSD,130,vida,80,1,38165,38165,38165,0,both,3000,150,1,2096.161404
879,PSD,130,área,119,1,38165,38165,38165,0,both,3000,150,1,3118.040089
880,PSD,130,é,394,1,38165,38165,38165,0,both,3000,150,1,10323.594917
881,PSD,130,é essencial,6,2,38165,38165,38165,0,both,2000,50,2,157.212105


## Cross Pivot so that words are columns (dimensions)

In [43]:
DensityPivot = ToExport2.pivot(index=['Partido', 'Count_Words', 'Count_Different'],columns='Word', values='Density')
DensityPivot = DensityPivot.fillna(0)
DensityPivot.to_csv('Data for Clustering - Density.txt', index=True, mode='w', header=True, encoding='utf-8-sig')
DensityPivot

Unnamed: 0_level_0,Unnamed: 1_level_0,Word,abandono escolar,acesso,administração,administração pública,ambiente,apenas,apoio,assegurar,assim,através,...,tem,todo,trabalhador,trabalhar,valor,vida,área,é,é essencial,é necessário
Partido,Count_Words,Count_Different,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
BE,38763,128,103.191187,2605.577484,825.5295,361.169156,670.742719,1547.867812,2863.555452,335.371359,1315.68764,1754.250187,...,3921.265124,4127.647499,2786.162062,3998.658515,2657.173077,1160.900859,2321.801718,8719.655341,51.595594,438.562547
CH,2309,106,0.0,2165.439584,433.087917,0.0,0.0,866.175834,1299.263751,866.175834,0.0,2165.439584,...,433.087917,2598.527501,866.175834,1299.263751,866.175834,3031.615418,2165.439584,5630.142919,0.0,0.0
IL,102905,133,524.755843,3167.970458,2186.482678,1098.100189,621.932851,1797.774647,1613.138331,1797.774647,1953.257859,1826.927749,...,4888.003498,3955.104222,1535.396725,1992.128662,1642.291434,1156.406394,1914.387056,11282.25062,553.908945,719.109859
Livre,20294,129,0.0,3449.295358,1084.064255,788.410368,1527.545087,985.512959,3252.192766,1330.442495,1281.166847,5814.526461,...,1823.198975,7687.001084,2020.301567,4730.462206,1626.096383,2118.852863,2168.128511,3991.327486,147.826944,591.307776
PCP,28417,131,70.380406,2287.363198,1724.319949,1126.086498,1829.890558,668.613858,3167.118274,844.564873,809.37467,3624.590914,...,3976.492944,3413.449696,774.184467,2041.031777,1618.74934,2076.22198,2392.933807,9958.827462,175.951015,527.853046
PS,32349,126,30.912857,3184.024236,2689.418529,1916.597113,710.995703,556.43142,5564.314198,1854.771399,2349.377106,4853.318495,...,1143.775696,4915.144208,1762.032829,3462.239946,1421.991406,1854.771399,3338.588519,4204.148505,340.041423,247.302853
PSD,38165,130,26.202018,1807.939211,1703.131141,1100.484737,3484.868335,707.454474,3118.040089,1048.080702,1074.28272,3196.646142,...,4742.565178,3904.100616,602.646404,1912.747282,1991.353334,2096.161404,3118.040089,10323.594917,157.212105,733.656492


In [45]:
CountPivot = ToExport2.pivot(index='Partido',columns='Word', values='Count')
CountPivot = CountPivot.fillna(0)
CountPivot.to_csv('Data for Clustering - Count.txt', index=True, mode='w', header=True, encoding='utf-8-sig')
CountPivot

Word,abandono escolar,acesso,administração,administração pública,ambiente,apenas,apoio,assegurar,assim,através,...,tem,todo,trabalhador,trabalhar,valor,vida,área,é,é essencial,é necessário
Partido,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
BE,4.0,101.0,32.0,14.0,26.0,60.0,111.0,13.0,51.0,68.0,...,152.0,160.0,108.0,155.0,103.0,45.0,90.0,338.0,2.0,17.0
CH,0.0,5.0,1.0,0.0,0.0,2.0,3.0,2.0,0.0,5.0,...,1.0,6.0,2.0,3.0,2.0,7.0,5.0,13.0,0.0,0.0
IL,54.0,326.0,225.0,113.0,64.0,185.0,166.0,185.0,201.0,188.0,...,503.0,407.0,158.0,205.0,169.0,119.0,197.0,1161.0,57.0,74.0
Livre,0.0,70.0,22.0,16.0,31.0,20.0,66.0,27.0,26.0,118.0,...,37.0,156.0,41.0,96.0,33.0,43.0,44.0,81.0,3.0,12.0
PCP,2.0,65.0,49.0,32.0,52.0,19.0,90.0,24.0,23.0,103.0,...,113.0,97.0,22.0,58.0,46.0,59.0,68.0,283.0,5.0,15.0
PS,1.0,103.0,87.0,62.0,23.0,18.0,180.0,60.0,76.0,157.0,...,37.0,159.0,57.0,112.0,46.0,60.0,108.0,136.0,11.0,8.0
PSD,1.0,69.0,65.0,42.0,133.0,27.0,119.0,40.0,41.0,122.0,...,181.0,149.0,23.0,73.0,76.0,80.0,119.0,394.0,6.0,28.0


In [51]:
FinalPivot = DensityPivot.merge(CountPivot, on=['Partido'], how='left', indicator=True)
FinalPivot.drop('_merge', axis = 1, inplace=True)
FinalPivot = FinalPivot.fillna(0)
FinalPivot

Word,abandono escolar_x,acesso_x,administração_x,administração pública_x,ambiente_x,apenas_x,apoio_x,assegurar_x,assim_x,através_x,...,tem_y,todo_y,trabalhador_y,trabalhar_y,valor_y,vida_y,área_y,é_y,é essencial_y,é necessário_y
Partido,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
BE,103.191187,2605.577484,825.5295,361.169156,670.742719,1547.867812,2863.555452,335.371359,1315.68764,1754.250187,...,152.0,160.0,108.0,155.0,103.0,45.0,90.0,338.0,2.0,17.0
CH,0.0,2165.439584,433.087917,0.0,0.0,866.175834,1299.263751,866.175834,0.0,2165.439584,...,1.0,6.0,2.0,3.0,2.0,7.0,5.0,13.0,0.0,0.0
IL,524.755843,3167.970458,2186.482678,1098.100189,621.932851,1797.774647,1613.138331,1797.774647,1953.257859,1826.927749,...,503.0,407.0,158.0,205.0,169.0,119.0,197.0,1161.0,57.0,74.0
Livre,0.0,3449.295358,1084.064255,788.410368,1527.545087,985.512959,3252.192766,1330.442495,1281.166847,5814.526461,...,37.0,156.0,41.0,96.0,33.0,43.0,44.0,81.0,3.0,12.0
PCP,70.380406,2287.363198,1724.319949,1126.086498,1829.890558,668.613858,3167.118274,844.564873,809.37467,3624.590914,...,113.0,97.0,22.0,58.0,46.0,59.0,68.0,283.0,5.0,15.0
PS,30.912857,3184.024236,2689.418529,1916.597113,710.995703,556.43142,5564.314198,1854.771399,2349.377106,4853.318495,...,37.0,159.0,57.0,112.0,46.0,60.0,108.0,136.0,11.0,8.0
PSD,26.202018,1807.939211,1703.131141,1100.484737,3484.868335,707.454474,3118.040089,1048.080702,1074.28272,3196.646142,...,181.0,149.0,23.0,73.0,76.0,80.0,119.0,394.0,6.0,28.0


In [52]:
FinalPivot.to_csv('Data for Clustering.txt', index=True, mode='w', header=True, encoding='utf-8-sig')