# Data wrangling

#### Importing functions and reading files

In [2]:
import pandas as pd

In [4]:
results = pd.read_csv("../your-project/data/2019_novembre_eleccions_generals_sc (2).csv")
family_income = pd.read_csv("../your-project/data/2017_distribucio_territorial_renda_familiar (1).csv")

### Data Wrangling election results database

In [5]:
# Creating pivot table
results1 = results.pivot_table(index=["Seccio_censal", "Codi_districte", "Nom_districte", "Codi_barri", "Nom_barri", "Any"], columns="Nombre", values="Camp")
results1 = results1.reset_index()

In [6]:
#Deleting useless columns
results1.drop(["Any", "I.Fem", "IZQP", "MÁS PAÍS", "PACMA", 'PCPC', 'PCTC', "PUM+J", "RECORTES CERO-GV", 'Vots vàlids', 'Blancs', "Nuls", 'Seccio_censal'], axis=1, inplace= True)

In [7]:
# Renaming column names to English lang
col_names = ["district_code", "district_name", "neighbourhood_code",'neighbourhood_name', 'CUP-PR', 'Cs',
       'ECP-GUANYEM', 'ERC-SOBIRANISTES', 'electors', 'JxCAT-JUNTS', 'PP',
       'PSC-PSOE', 'VOX', 'total_votes']

results1.columns = col_names

In [8]:
#Arranging column names
results1 = results1[['district_code', "district_name", "neighbourhood_code", 'neighbourhood_name', 'electors', 'total_votes', 'CUP-PR', 'Cs',
       'ECP-GUANYEM', 'ERC-SOBIRANISTES', 'JxCAT-JUNTS', 'PP',
       'PSC-PSOE', 'VOX']]

In [9]:
#Grouping by districts
results2 = results1.groupby(['district_code',"district_name"]).sum()

In [10]:
# Create participation column with total_votes / Electors * 100
part = (results2["total_votes"]) / (results2["electors"]) * 100
participation = results2["participation"] = part.round(2)

### Wrangling family income index dataframe

In [11]:
#Deleting useless columns
family_income.drop(['Any', 'Nom_Barri', 'Codi_Barri','Població', "Codi_Districte"], axis=1, inplace= True)

In [12]:
col_names1 = ['district_name', 'index_family_income']
family_income.columns = col_names1

In [13]:
family_income_grouped = family_income.groupby(['district_name']).mean().round(2)

### Merging table with family income index dataset

In [14]:
# Merging datasets
final_db = results2.merge(right= family_income_grouped, how= "inner", left_on="district_name", right_on="district_name")

In [15]:
# Arranging columns for final db
final_db = final_db.reset_index()
final_db = final_db[['district_name', 'electors', 'total_votes', 'index_family_income', "participation",'CUP-PR', 'Cs',
       'ECP-GUANYEM', 'ERC-SOBIRANISTES', 'JxCAT-JUNTS', 'PP', 'PSC-PSOE',
       'VOX']]

In [16]:
#Creating columns with percentages
percentage_db = final_db[['district_name', 'electors', 'total_votes', 'index_family_income', "participation",'CUP-PR', 'Cs',
       'ECP-GUANYEM', 'ERC-SOBIRANISTES', 'JxCAT-JUNTS', 'PP', 'PSC-PSOE',
       'VOX']]
percentage_db['percentage_CUP']=round((percentage_db['CUP-PR']/ percentage_db['total_votes'])*100,2)
percentage_db['percentage_Cs']=round((percentage_db['Cs']/ percentage_db['total_votes'])*100,2)
percentage_db['percentage_PP']=round((percentage_db['PP']/ percentage_db['total_votes'])*100,2)
percentage_db['percentage_ECP']=round((percentage_db['ECP-GUANYEM']/ percentage_db['total_votes'])*100,2)
percentage_db['percentage_ERC']=round((percentage_db['ERC-SOBIRANISTES']/ percentage_db['total_votes'])*100,2)
percentage_db['percentage_JxCAT']=round((percentage_db['JxCAT-JUNTS']/ percentage_db['total_votes'])*100,2)
percentage_db['percentage_PSC']=round((percentage_db['PSC-PSOE']/ percentage_db['total_votes'])*100,2)
percentage_db['percentage_VOX']=round((percentage_db['VOX']/ percentage_db['total_votes'])*100,2)

#Deleting useless columns 
percentage_db.drop(['CUP-PR', 'Cs', 'ECP-GUANYEM', 'ERC-SOBIRANISTES',
       'JxCAT-JUNTS', 'PP', 'PSC-PSOE', 'VOX'], axis=1, inplace= True)

### Exporting files for analysis

In [18]:
final_db.to_csv("../your-project/data/final_db.csv")

In [19]:
percentage_db.to_csv("../your-project/data/final_db_percentages.csv")