# Creating a dataframe with MEPS names and political parties affiliations


Download data here: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/V2FJEF

Call the data folder "MEPS_data"

--- MEPS

    --- MEPS_data
    
    --- MEPS.ipynb
    
    --- MEPS_encodings.txt

Decision if the party is far-right: https://docs.google.com/spreadsheets/d/1pExrE7I-cM1izkW5dMGDq0EHA2GQAxA2FPhjlNjJJ6c/edit#gid=0

In [1]:
import pandas as pd
import os
# ! pip install openpyxl

In [2]:
# we need only the files for the period 1996-2011

import os
path = os.getcwd()
file_names = [os.path.join(path,'MEPS_data/5/Dataset MEPs EP4 1994-1999.xlsx'),
            os.path.join(path,'MEPS_data/6/Dataset MEPs EP5 1999-2004.xlsx'),
            os.path.join(path, 'MEPS_data/7/Dataset MEPs EP 6 2004-2009.xlsx'),
            os.path.join(path, 'MEPS_data/8/Dataset MEPs 7EP 2009-2014.xlsx')
         ]
file_names

['/home/elena/code/konratp/Detecting-Far-Right-Talking-Points/MEPS/MEPS_data/5/Dataset MEPs EP4 1994-1999.xlsx',
 '/home/elena/code/konratp/Detecting-Far-Right-Talking-Points/MEPS/MEPS_data/6/Dataset MEPs EP5 1999-2004.xlsx',
 '/home/elena/code/konratp/Detecting-Far-Right-Talking-Points/MEPS/MEPS_data/7/Dataset MEPs EP 6 2004-2009.xlsx',
 '/home/elena/code/konratp/Detecting-Far-Right-Talking-Points/MEPS/MEPS_data/8/Dataset MEPs 7EP 2009-2014.xlsx']

## Dataframes 1994 - 1999, 1999 -2004
NOTE: These processing is applicable to:
* MEPS_data/5/Dataset MEPs EP4 1994-1999.xlsx
* MEPS_data/6/Dataset MEPs EP5 1999-2004.xlsx

### Extract encodings

In [3]:
# info from the first file: MEPS_data/5/Dataset MEPs EP4 1994-1999.xlsx
# not all files contain this info

political_parties_df = pd.read_excel(file_names[0], sheet_name = 2)
political_parties_df

Unnamed: 0,EP Group,Abbeviation,Code
0,Independents for a European of Nations,I-EDN,A
1,Europe of Democracies and Diversities,EDD,A
2,European Democrats,ED,C
3,European People's Party,PPE,E
4,European People's Party-European Democrats,PPE-DE,E
5,Forza Europa,FE,F
6,Progressive European Democrats,DEP,G
7,European Democratic Alliance,RDE,G
8,Union for Europe,UPE,G
9,Union for a Europe of Nations,UEN,G


In [4]:
political_party_grouped = political_parties_df.groupby(political_parties_df['Code'], as_index = False).agg(lambda x: list(x))
political_party_dic = dict(zip(political_party_grouped['Code'], political_party_grouped['EP Group']))
# political_party_dic

In [5]:
national_parties_df = pd.read_excel(file_names[0], sheet_name = 1)
# national_parties_df

In [6]:
national_party_name_dic = dict(zip(national_parties_df['Code'], national_parties_df['National Party']))
# national_party_name_dic

In [7]:
national_party_family_dic = dict(zip(national_parties_df['Code'], national_parties_df['Party Family']))
national_party_family_dic

{1001: 'Right',
 1002: 'Grn',
 1003: 'Lib',
 1004: 'CDem',
 1005: 'Soc',
 1101: 'Grn',
 1102: 'CDem',
 1103: 'CDem',
 1104: 'Grn',
 1105: 'Reg',
 1106: 'Right',
 1107: 'Lib',
 1108: 'Soc',
 1109: 'CDem',
 1110: 'Lib',
 1111: 'Soc',
 1112: 'Right',
 1113: 'Reg',
 1201: 'CDem',
 1202: 'CDem',
 1203: 'Lib',
 1204: 'Grn',
 1205: 'Left',
 1206: 'Right',
 1207: 'Soc',
 1301: 'Con',
 1302: 'Reg',
 1303: 'Lib',
 1304: 'Lib',
 1305: 'Reg',
 1306: 'Reg',
 1307: 'Reg',
 1308: 'Reg',
 1309: 'Reg',
 1310: 'Lib',
 1311: 'Left',
 1312: 'Reg',
 1313: 'Reg',
 1314: 'Soc',
 1315: 'Soc',
 1316: 'Con',
 1317: 'CDem',
 1318: 'CDem',
 1319: 'Reg',
 1320: 'Lib',
 1401: 'CDem',
 1402: 'Con',
 1403: 'Anti-EU',
 1404: 'Con',
 1405: 'Soc',
 1406: 'Right',
 1407: 'Anti-EU',
 1408: 'Left',
 1409: 'Left',
 1410: 'Con',
 1411: 'Soc',
 1412: 'Soc',
 1413: 'Lib',
 1414: 'Con',
 1415: 'Reg',
 1416: 'Grn',
 1417: 'Lib',
 1418: 'Ind',
 1419: 'Left',
 1420: 'CDem',
 1421: 'Lib',
 1422: 'CDem',
 1423: 'Left',
 1424: 'Con',

### Encode far-right vs non far-right
* 1 - far-right
* 0 - non far-right

In [8]:
far_right_dic = dict(zip(political_parties_df['Code'].unique(), [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1]))
far_right_dic

{'A': 1,
 'C': 0,
 'E': 0,
 'F': 0,
 'G': 1,
 'L': 0,
 'M': 0,
 'O': 0,
 'N': 1,
 'R': 0,
 'S': 0,
 'V': 0,
 'X': 1}

### Create new dataframes

In [9]:
def process_table_1(df, file_name):
    new_table = df[['full_name', 'country_name', 'epg_name', 'national party_id']]
    new_table['surname'] = [x.split(" ")[0].lower() for x in new_table['full_name']] # contains diacritics
    new_table['far_right'] = [far_right_dic[x] for x in new_table['epg_name']]
    new_table['epg_names'] = [political_party_dic[x] for x in new_table['epg_name']]
    new_table['national_party_name'] = [national_party_name_dic[x] for x in new_table['national party_id']]
    new_table['national_party_family'] = [national_party_family_dic[x] for x in new_table['national party_id']]
    new_table['years'] = file_name[-14:-5]
    new_table.rename(columns = {'epg_name':'epg_code',
                               'national party_id':'national_party_id',
                               }, inplace = True)
    new_table = new_table[['full_name', 'surname', 'far_right', 'years', 'country_name', 'epg_code', 'epg_names', 'national_party_id', 'national_party_name', 'national_party_family']]
    
    return new_table

In [10]:
# 'MEPS_data/5/Dataset MEPs EP4 1994-1999.xlsx'

df_1994 = pd.read_excel(file_names[0], sheet_name = 0)

# Dagmar Reichenbach
df_1994.loc[568, 'epg_name'] = 'S'  # '\xa0\xa0\xa0\xa0\xa0S&D' -> S
df_1994.loc[568, 'national party_id'] = 2407 #'\xa0\xa0\xa0\xa0\xa0Social Democratic Party' -> 2407
# Wagenknecht Sahra
df_1994.loc[719, 'epg_name'] = 'O'  # 'EUL/NGL' -> O
df_1994.loc[719, 'national party_id'] = 1205 # 31 -> 1205

df_1994 = process_table_1(df_1994, file_names[0])
df_1994

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_table['surname'] = [x.split(" ")[0].lower() for x in new_table['full_name']] # contains diacritics
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_table['far_right'] = [far_right_dic[x] for x in new_table['epg_name']]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_table['epg_names'] = [p

Unnamed: 0,full_name,surname,far_right,years,country_name,epg_code,epg_names,national_party_id,national_party_name,national_party_family
0,ADAM Gordon J.,adam,0,1994-1999,U.K.,S,"[Socialist Group, Party of European Socialists]",2404,Labour Party,Soc
1,AELVOET Magda G.H.,aelvoet,0,1994-1999,Belgium,V,"[Green Group, Greens/European Free Alliance]",1101,"Anders gaan arbeiden, leven en vrijen",Grn
2,AGLIETTA Maria Adelaide,aglietta,0,1994-1999,Italy,V,"[Green Group, Greens/European Free Alliance]",1609,Verdi Arcobaleno / Federazione dei Verdi / Ver...,Grn
3,AHERN Nuala,ahern,0,1994-1999,Ireland,V,"[Green Group, Greens/European Free Alliance]",2203,Green Party,Grn
4,AHLQVIST Birgitta,ahlqvist,0,1994-1999,Sweden,S,"[Socialist Group, Party of European Socialists]",2306,Socialdemokratiska arbetarepartiet,Soc
...,...,...,...,...,...,...,...,...,...,...
737,WURTH-POLFER Lydie,wurth-polfer,0,1994-1999,Luxembourg,L,"[Liberal and Democratic Group, Liberal Democra...",1802,Parti démcratique,Lib
738,WURTZ Francis,wurtz,0,1994-1999,France,M,"[Communist Group, European United Left/Nordic ...",1409,Parti communiste française / Gauche unitaire /...,Left
739,WYNN Terence,wynn,0,1994-1999,U.K.,S,"[Socialist Group, Party of European Socialists]",2404,Labour Party,Soc
740,ZIMMERMANN Wilmya,zimmermann,0,1994-1999,Germany,S,"[Socialist Group, Party of European Socialists]",1207,Sozialdemokratische Partei Deutschlands,Soc


In [11]:
# 'MEPS_data/6/Dataset MEPs EP5 1999-2004.xlsx'

df_1999 = pd.read_excel(file_names[1], sheet_name = 0)
df_1999 = process_table_1(df_1999, file_names[1])
df_1999

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_table['surname'] = [x.split(" ")[0].lower() for x in new_table['full_name']] # contains diacritics
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_table['far_right'] = [far_right_dic[x] for x in new_table['epg_name']]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_table['epg_names'] = [p

Unnamed: 0,full_name,surname,far_right,years,country_name,epg_code,epg_names,national_party_id,national_party_name,national_party_family
0,BERGER Maria,berger,0,1999-2004,Austria,S,"[Socialist Group, Party of European Socialists]",1005,Sozialdemokratische Partei Österreichs,Soc
1,BÖSCH Herbert,bösch,0,1999-2004,Austria,S,"[Socialist Group, Party of European Socialists]",1005,Sozialdemokratische Partei Österreichs,Soc
2,ECHERER Raina A. Mercedes,echerer,0,1999-2004,Austria,V,"[Green Group, Greens/European Free Alliance]",1002,Die Grünen – Die Grüne Alternative,Grn
3,ETTL Harald,ettl,0,1999-2004,Austria,S,"[Socialist Group, Party of European Socialists]",1005,Sozialdemokratische Partei Österreichs,Soc
4,FLEMMING Marialiese,flemming,0,1999-2004,Austria,E,"[European People's Party, European People's Pa...",1004,Österreichische Volkspartei,CDem
...,...,...,...,...,...,...,...,...,...,...
687,WYNN Terence,wynn,0,1999-2004,U.K.,S,"[Socialist Group, Party of European Socialists]",2404,Labour Party,Soc
688,DONNELLY Alan John,donnelly,0,1999-2004,U.K.,S,"[Socialist Group, Party of European Socialists]",2404,Labour Party,Soc
689,GREEN Pauline,green,0,1999-2004,U.K.,S,"[Socialist Group, Party of European Socialists]",2404,Labour Party,Soc
690,BOOTH Graham H.,booth,1,1999-2004,U.K.,A,"[Independents for a European of Nations, Europ...",2409,United Kingdom Independence Party,Anti-EU


## Dataframe 2004-2009

### Extract encodings

In [12]:
national_party_2004 = pd.read_excel(file_names[2], sheet_name = 1)
national_party_dic_2004 = dict(zip(national_party_2004['ID'], national_party_2004['Name']))
national_party_dic_2004

{1: "Alleanza Popolare - Unione Democratici per l'Europa",
 2: 'Alleanza nazionale',
 3: 'Alternativa sociale: Lista Mussolini',
 4: 'Democratici di Sinistra',
 5: 'Federazione dei Verdi',
 6: 'Forza Italia',
 7: 'Forza Nuova',
 8: 'Indipendente',
 9: 'Italia dei Valori',
 10: 'La Destra - Alleanza Siciliana',
 11: 'La Margherita',
 12: "Lega Nord per l'indipendenza della Padania",
 13: 'Lista Emma Bonino',
 14: 'Movimento Repubblicani Europei',
 15: 'Movimento Sociale Fiamma tricolore',
 16: 'Partito Democratico',
 17: 'Partito Pensionati',
 18: 'Partito Socialista',
 19: 'Partito dei Comunisti Italiani',
 20: 'Partito del Sud',
 21: 'Partito della Rifondazione Comunista - Sinistra Europea',
 22: 'Partito della Rifondazione Comunista - Sinistra Europea (Indipendente)',
 23: 'Sinistra Democratica',
 24: 'Socialisti democratici italiani',
 25: 'Südtiroler Volkspartei (Partito popolare sudtirolese)',
 26: 'Unione dei Democratici cristiani e dei Democratici di Centro',
 27: "Uniti nell'Ul

### Encode far-right vs non far-right

In [13]:
# 'MEPS_data/7/Dataset MEPs EP 6 2004-2009.xlsx'
df_2004 = pd.read_excel(file_names[2], sheet_name = 0)
df_2004

Unnamed: 0,nr,full_name,country_name,epg_name,id,national party_id,Newcomer/Amateur,Rielected in the EP,Rielected in the EP after interval,Number of past legislatures in the EP (not conisdering the 2004-2009),No political experience/amateur,National level,Code for previous national political roles,Local level,Code for previous local political roles,Gender,Role in the EP,Role in the party group,notes,Report
0,,"BERGER, Maria",Austria,SOC,2285.0,206,n,y,,2.0,,,,,,f,,,,5
1,,"BÖSCH, Herbert",Austria,SOC,2048.0,206,n,y,,2.0,,MP,1.0,,,m,committee chair,,,5
2,,"ETTL, Harald",Austria,SOC,2286.0,206,n,y,,2.0,,senior and/or junior minister,3.0,,,m,,,,6
3,,"KARAS, Othmar",Austria,EPP-ED,4246.0,207,n,y,,1.0,,member of national party leadership,4.0,,,m,,,,6
4,,"LEICHTFRIED, Jörg",Austria,SOC,28251.0,206,y,,,,,member of national party leadership,,,10.0,m,,,,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
908,,"WILLMOTT, Glenis",UK,SOC,35743.0,57,y,,,,y,,,,,f,,,,0
909,,"SIMPSON, Brian",UK,SOC,1309.0,57,n,y,,3.0,,,,,,m,,,,0
910,,"COLMAN, Trevor",UK,IND/DEM,94283.0,66,y,,,,,member of national party leadership,4.0,,,m,,,,0
911,,"VILLIERS, Theresa",UK,EPP-ED,4520.0,54,n,y,,1.0,,,,,,f,,,,0


In [14]:
df_2004['epg_name'].unique()
df_2004.loc[307,'epg_name'] = 'S&D'   # '\xa0\xa0\xa0\xa0\xa0S&D' ==> 'SOC'
df_2004.loc[535,'epg_name'] = 'na'    # nan -> 'na'
df_2004.loc[806,'epg_name'] = 'na'    # nan -> 'na'

far_right_dic_2004 = { 'SOC' : 0,
                      'S&D' : 0,
                      'EPP-ED' : 0,
                      'na' : 0,
                      'ALDE' : 0,
                      'G/EFA' : 0,
                      'EUL/NGL' : 0,
                      'IND/DEM' : 1,
                      'UEN' : 1}


In [15]:
party_names_dic_2004 = {'SOC' : "SOC",  ## ??
                      'S&D' : "The Progressive Alliance of Socialists and Democrats",
                      'EPP-ED' : "European People's Party Group and European Democrats",
                      'na' : 'NA',
                      'ALDE' : "Alliance of Liberals and Democrats for Europe Party",
                      'G/EFA' : "The Greens/European Free Alliance",
                      'EUL/NGL' : "The Left in the European Parliament",
                      'IND/DEM' : "Independence/Democracy",
                      'UEN' : "Union for Europe of the Nations"}

### Create a new dataframe

In [16]:
df_2004 = df_2004[['full_name', 'country_name', 'epg_name', 'national party_id']]
df_2004['surname'] = [x.split(",")[0].lower() for x in df_2004['full_name']] # contains diacritics
df_2004['far_right'] = [far_right_dic_2004[x] for x in df_2004['epg_name']]
df_2004['years'] = file_names[2][-14:-5]
df_2004.loc[307,'national party_id'] = 151    #Dagmar Reichenbach 
# "\xa0\xa0\xa0\xa0\xa0Social Democratic Party" -> 'Socialdemokratiet'
df_2004['national_party_name'] = [national_party_dic_2004[x] for x in df_2004['national party_id']]
df_2004['epg_names'] = [party_names_dic_2004[x] for x in df_2004['epg_name']]
df_2004.rename(columns = {'epg_name':'epg_code',
                          'national party_id' : 'national_party_id'              
                         }, inplace = True)
df_2004

#full_name	surname	far_right	years	country_name	epg_code	epg_names	national_party_id	national_party_name	national_party_family

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2004['surname'] = [x.split(",")[0].lower() for x in df_2004['full_name']] # contains diacritics
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2004['far_right'] = [far_right_dic_2004[x] for x in df_2004['epg_name']]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2004['years'] = file_names[

Unnamed: 0,full_name,country_name,epg_code,national_party_id,surname,far_right,years,national_party_name,epg_names
0,"BERGER, Maria",Austria,SOC,206,berger,0,2004-2009,Sozialdemokratische Partei Österreichs,SOC
1,"BÖSCH, Herbert",Austria,SOC,206,bösch,0,2004-2009,Sozialdemokratische Partei Österreichs,SOC
2,"ETTL, Harald",Austria,SOC,206,ettl,0,2004-2009,Sozialdemokratische Partei Österreichs,SOC
3,"KARAS, Othmar",Austria,EPP-ED,207,karas,0,2004-2009,Österreichische Volkspartei - Liste Ursula Ste...,European People's Party Group and European Dem...
4,"LEICHTFRIED, Jörg",Austria,SOC,206,leichtfried,0,2004-2009,Sozialdemokratische Partei Österreichs,SOC
...,...,...,...,...,...,...,...,...,...
908,"WILLMOTT, Glenis",UK,SOC,57,willmott,0,2004-2009,Labour Party,SOC
909,"SIMPSON, Brian",UK,SOC,57,simpson,0,2004-2009,Labour Party,SOC
910,"COLMAN, Trevor",UK,IND/DEM,66,colman,1,2004-2009,UK Independence Party,Independence/Democracy
911,"VILLIERS, Theresa",UK,EPP-ED,54,villiers,0,2004-2009,Conservative and Unionist Party,European People's Party Group and European Dem...


##  Dataframe 2009-2014
'MEPS_data/8/Dataset MEPs 7EP 2009-2014.xlsx'

### Encode far-right vs non far-right

In [17]:
'/home/elena/code/konratp/Detecting-Far-Right-Talking-Points/MEPS_data/8/Dataset MEPs 7EP 2009-2014.xlsx'
df_2009 = pd.read_excel(file_names[3], sheet_name = 0)
df_2009

Unnamed: 0,fullName,country,politicalGroup,national Party,Newcomer in the EP,Rielected in the EP,Rielected in the EP after interval,Number of past legislatures in the EP (not conisdering the 2009-2014),No political experience/Amateur,National level,Code for previous national political roles,Local level,ode for previous local politcal roles,gender,Role in the EP,Role in the party group,Reports,Notes
0,Martin Ehrenhauser,Austria,NI,Hans-Peter Martin's List,y,,,,y,,,,,m,,,2.0,
1,Karin Kadenbach,Austria,S&D,Social Democratic Party,y,,,,,,,Member of regional parliament,5.0,f,,,1.0,
2,Othmar Karas,Austria,EPP,People's Party,n,y,,2.0,,member of national party leadership,4.0,,,m,vice president,vice chair,5.0,
3,Elisabeth Köstinger,Austria,EPP,People's Party,y,,,,,,,member of regional party leadership,,f,,member of the bureau,2.0,
4,Jörg Leichtfried,Austria,S&D,Social Democratic Party,N,Y,,1.0,,,,member of regional party leadership,10.0,m,,vice chair,8.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
811,Jean Roatta,France,EPP,Union for a Popular Movement,y,,,,,MP,1.0,,,m,,,0.0,
812,Yves Cochet,France,G–EFA,Europe Ecology,y,,,,,MP,1.0,,,m,,,0.0,
813,Nils TORVALDS,Finland,ALDE,Svenska folkpartiet,Y,,,,,member of national party leadership,,council member,9.0,M,,,1.0,
814,Isabelle THOMAS,France,S&D,Parti socialiste,Y,,,,,member of national party leadership,4.0,Member of regional parliament,5.0,F,,,2.0,


In [18]:
df_2009['politicalGroup'].unique()

far_right_dic_2009 = {
    'NI' : 0,
    'S&D' : 0, 
    'EPP' : 0,
    'G–EFA' : 0,
    'ECR' : 0,
    'ALDE' : 0,
    'NI/EFD' : 1,
    'EUL–NGL' : 0,
    'EFD' : 1,
    'EUL-NGL' : 0,
    'EUL/NGL' : 0,
    'G-EFA' : 0,
    'EPP-ED' : 0}

In [19]:
political_party_names_2009 = {
    'NI' : "Non-Inscrits",
    'S&D' : "The Progressive Alliance of Socialists and Democrats", 
    'EPP' : "European People's Party Group",
    'G–EFA' : "The Greens/European Free Alliance",
    'ECR' : "The European Conservatives and Reformists Group",
    'ALDE' : "Alliance of Liberals and Democrats for Europe Party",
    'NI/EFD' : "Non-Inscrits / Europe of Freedom and Democracy",   ## Frank Vanhecke
    'EUL–NGL' : "The Left in the European Parliament",
    'EFD' : "Europe of Freedom and Democracy",
    'EUL-NGL' : "The Left in the European Parliament",
    'EUL/NGL' : "The Left in the European Parliament",
    'G-EFA' : "The Greens/European Free Alliance",
    'EPP-ED' : "European People's Party Group / European Democrats"}

### Create a new dataframe

In [20]:
df_2009 = df_2009[['fullName', 'country', 'politicalGroup', 'national Party']]
df_2009['surname'] = [x.split(" ")[1].lower() for x in df_2009['fullName']] # contains diacritics
df_2009['far_right'] = [far_right_dic_2009[x] for x in df_2009['politicalGroup']]
df_2009['years'] = file_names[3][-14:-5]
df_2009['epg_names'] = [political_party_names_2009[x] for x in df_2009['politicalGroup']]
df_2009.rename(columns = {'country':'country_name',
                          'fullName' : 'full_name',
                          'politicalGroup': 'epg_code',
                          'national Party' : 'national_party_name'                    
                         }, inplace = True)
df_2009

#full_name	surname	far_right	years	country_name	epg_code	epg_names	national_party_id	national_party_name	national_party_family

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2009['surname'] = [x.split(" ")[1].lower() for x in df_2009['fullName']] # contains diacritics
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2009['far_right'] = [far_right_dic_2009[x] for x in df_2009['politicalGroup']]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2009['years'] = file_n

Unnamed: 0,full_name,country_name,epg_code,national_party_name,surname,far_right,years,epg_names
0,Martin Ehrenhauser,Austria,NI,Hans-Peter Martin's List,ehrenhauser,0,2009-2014,Non-Inscrits
1,Karin Kadenbach,Austria,S&D,Social Democratic Party,kadenbach,0,2009-2014,The Progressive Alliance of Socialists and Dem...
2,Othmar Karas,Austria,EPP,People's Party,karas,0,2009-2014,European People's Party Group
3,Elisabeth Köstinger,Austria,EPP,People's Party,köstinger,0,2009-2014,European People's Party Group
4,Jörg Leichtfried,Austria,S&D,Social Democratic Party,leichtfried,0,2009-2014,The Progressive Alliance of Socialists and Dem...
...,...,...,...,...,...,...,...,...
811,Jean Roatta,France,EPP,Union for a Popular Movement,roatta,0,2009-2014,European People's Party Group
812,Yves Cochet,France,G–EFA,Europe Ecology,cochet,0,2009-2014,The Greens/European Free Alliance
813,Nils TORVALDS,Finland,ALDE,Svenska folkpartiet,torvalds,0,2009-2014,Alliance of Liberals and Democrats for Europe ...
814,Isabelle THOMAS,France,S&D,Parti socialiste,thomas,0,2009-2014,The Progressive Alliance of Socialists and Dem...


In [21]:
df_2009[df_2009['far_right'] == 1]

Unnamed: 0,full_name,country_name,epg_code,national_party_name,surname,far_right,years,epg_names
43,Frank Vanhecke,Belgium,NI/EFD,Flemish Interest,vanhecke,1,2009-2014,Non-Inscrits / Europe of Freedom and Democracy
110,Morten Messerschmidt,Denmark,EFD,People's Party,messerschmidt,1,2009-2014,Europe of Freedom and Democracy
112,Anna Rosbach Andersen,Denmark,EFD,People's Party,rosbach,1,2009-2014,Europe of Freedom and Democracy
134,Timo Soini,Finland,EFD,True Finns,soini,1,2009-2014,Europe of Freedom and Democracy
206,Philippe de Villiers,France,EFD,Libertas (MPF),de,1,2009-2014,Europe of Freedom and Democracy
327,Thanos Plevris,Greece,EFD,Popular Orthodox Rally,plevris,1,2009-2014,Europe of Freedom and Democracy
328,Nikolaos Salavrakos,Greece,EFD,Popular Orthodox Rally,salavrakos,1,2009-2014,Europe of Freedom and Democracy
335,Niki Tzavela,Greece,EFD,Popular Orthodox Rally,tzavela,1,2009-2014,Europe of Freedom and Democracy
425,Mara Bizzotto,Italy,EFD,Lega Nord,bizzotto,1,2009-2014,Europe of Freedom and Democracy
428,Mario Borghezio,Italy,EFD,Lega Nord,borghezio,1,2009-2014,Europe of Freedom and Democracy


## Merge dataframes

In [22]:
concatenated = pd.concat([df_1994, df_1999, df_2004, df_2009])
concatenated.reset_index(drop = True, inplace = True)
concatenated

Unnamed: 0,full_name,surname,far_right,years,country_name,epg_code,epg_names,national_party_id,national_party_name,national_party_family
0,ADAM Gordon J.,adam,0,1994-1999,U.K.,S,"[Socialist Group, Party of European Socialists]",2404,Labour Party,Soc
1,AELVOET Magda G.H.,aelvoet,0,1994-1999,Belgium,V,"[Green Group, Greens/European Free Alliance]",1101,"Anders gaan arbeiden, leven en vrijen",Grn
2,AGLIETTA Maria Adelaide,aglietta,0,1994-1999,Italy,V,"[Green Group, Greens/European Free Alliance]",1609,Verdi Arcobaleno / Federazione dei Verdi / Ver...,Grn
3,AHERN Nuala,ahern,0,1994-1999,Ireland,V,"[Green Group, Greens/European Free Alliance]",2203,Green Party,Grn
4,AHLQVIST Birgitta,ahlqvist,0,1994-1999,Sweden,S,"[Socialist Group, Party of European Socialists]",2306,Socialdemokratiska arbetarepartiet,Soc
...,...,...,...,...,...,...,...,...,...,...
3158,Jean Roatta,roatta,0,2009-2014,France,EPP,European People's Party Group,,Union for a Popular Movement,
3159,Yves Cochet,cochet,0,2009-2014,France,G–EFA,The Greens/European Free Alliance,,Europe Ecology,
3160,Nils TORVALDS,torvalds,0,2009-2014,Finland,ALDE,Alliance of Liberals and Democrats for Europe ...,,Svenska folkpartiet,
3161,Isabelle THOMAS,thomas,0,2009-2014,France,S&D,The Progressive Alliance of Socialists and Dem...,,Parti socialiste,


In [23]:
concatenated['full_name'] = concatenated['full_name'].apply(lambda x: x.replace(',', ''))
concatenated

Unnamed: 0,full_name,surname,far_right,years,country_name,epg_code,epg_names,national_party_id,national_party_name,national_party_family
0,ADAM Gordon J.,adam,0,1994-1999,U.K.,S,"[Socialist Group, Party of European Socialists]",2404,Labour Party,Soc
1,AELVOET Magda G.H.,aelvoet,0,1994-1999,Belgium,V,"[Green Group, Greens/European Free Alliance]",1101,"Anders gaan arbeiden, leven en vrijen",Grn
2,AGLIETTA Maria Adelaide,aglietta,0,1994-1999,Italy,V,"[Green Group, Greens/European Free Alliance]",1609,Verdi Arcobaleno / Federazione dei Verdi / Ver...,Grn
3,AHERN Nuala,ahern,0,1994-1999,Ireland,V,"[Green Group, Greens/European Free Alliance]",2203,Green Party,Grn
4,AHLQVIST Birgitta,ahlqvist,0,1994-1999,Sweden,S,"[Socialist Group, Party of European Socialists]",2306,Socialdemokratiska arbetarepartiet,Soc
...,...,...,...,...,...,...,...,...,...,...
3158,Jean Roatta,roatta,0,2009-2014,France,EPP,European People's Party Group,,Union for a Popular Movement,
3159,Yves Cochet,cochet,0,2009-2014,France,G–EFA,The Greens/European Free Alliance,,Europe Ecology,
3160,Nils TORVALDS,torvalds,0,2009-2014,Finland,ALDE,Alliance of Liberals and Democrats for Europe ...,,Svenska folkpartiet,
3161,Isabelle THOMAS,thomas,0,2009-2014,France,S&D,The Progressive Alliance of Socialists and Dem...,,Parti socialiste,


In [24]:
concatenated[concatenated['far_right'] == 1]

Unnamed: 0,full_name,surname,far_right,years,country_name,epg_code,epg_names,national_party_id,national_party_name,national_party_family
8,ALDO Blaise,aldo,1,1994-1999,France,G,"[Progressive European Democrats, European Demo...",1404,Rassemblement pour la République / Défence des...,Con
9,AMADEO Amedeo,amadeo,1,1994-1999,Italy,N,[Technical Coordination of Democrats and Indep...,1601,Movimento soziale italiano / Alleanza nazionale,Right
15,ANDREWS Niall,andrews,1,1994-1999,Ireland,G,"[Progressive European Democrats, European Demo...",2201,Fianna Fáil,Con
16,ANGELILLI Roberta,angelilli,1,1994-1999,Italy,N,[Technical Coordination of Democrats and Indep...,1601,Movimento soziale italiano / Alleanza nazionale,Right
18,ANTONY Bernard,antony,1,1994-1999,France,N,[Technical Coordination of Democrats and Indep...,1406,Front national,Right
...,...,...,...,...,...,...,...,...,...,...
3098,Derek Clark,clark,1,2009-2014,UK,EFD,Europe of Freedom and Democracy,,UK Independence Party,
3100,The Earl of Dartmouth,earl,1,2009-2014,UK,EFD,Europe of Freedom and Democracy,,UK Independence Party,
3107,Nigel Farage,farage,1,2009-2014,UK,EFD,Europe of Freedom and Democracy,,UK Independence Party,
3116,Roger Helmer,helmer,1,2009-2014,UK,EFD,Europe of Freedom and Democracy,,UK Independence Party,


In [25]:
concatenated.to_csv(path_or_buf = os.path.join(path, 'MEP_dataframe.csv'))