In [1]:
#Libraries
import pandas as pd
import numpy as np

In [2]:
#Import the data
ages = {}
for i in range(2015,2020):
    ages["ages" + str(i)] = pd.read_csv("data/ages" + str(i) + ".csv")
    

In [3]:
#columns name change
population = pd.concat(ages)
population = population.drop(["Codi_Districte","Codi_Barri","Nom_Barri"], axis=1)
population_columns = ['Year', 'District','Gender','Age','Number']
population.columns = population_columns

population.reset_index(drop=True, inplace=True) 

In [4]:
#functions to be applied

def growth_rate(present_year, prior_year):
    growth = (present_year - prior_year)/prior_year
    return round(growth*100,2)

def clean_age(ages):
    cleaned_age = []
    for age in ages['Age']:
        clean = age.split(' ')[0]
        cleaned_age.append(clean)
    return cleaned_age

def new_column(row):
    if row['Age'] < 10:
        return '0-9'
    elif 10 <= row['Age'] < 20:
        return '10-19'
    elif 20 <= row['Age'] < 30:
        return '20-29'
    elif 30 <= row['Age'] < 40:
        return '30-39'
    elif 40 <= row['Age'] < 50:
        return '40-49'
    elif 50 <= row['Age'] < 60:
        return '50-59'
    elif 60 <= row['Age'] < 70:
        return '60-69'
    elif 70 <= row['Age'] < 80:
        return '70-79'
    elif 80 <= row['Age'] < 90:
        return '80-89'
    elif 90 <= row['Age']:
        return '90+'


In [5]:
#clean the Age Data

population['Gender'] = population['Gender'].str.replace('Home','Male').str.replace('Dona','Female')

population['Age'] = clean_age(population)

population['Age']=population['Age'].astype(int)

population


Unnamed: 0,Year,District,Gender,Age,Number
0,2015,Ciutat Vella,Male,0,227
1,2015,Ciutat Vella,Male,0,34
2,2015,Ciutat Vella,Male,0,40
3,2015,Ciutat Vella,Male,0,92
4,2015,Eixample,Male,0,127
...,...,...,...,...,...
71243,2019,Sant Martí,Male,99,1
71244,2019,Sant Martí,Male,99,1
71245,2019,Sant Martí,Male,99,0
71246,2019,Sant Martí,Male,99,2


In [6]:
#Population by Gender

group_by_gender = population.drop(columns = "Age").groupby(['Year','District','Gender']).sum().swaplevel(0,1).unstack('Year').unstack('Gender')

years = [i for i in [i for i in [b for i in group_by_gender.columns for b in i] if i != "Number"] if type(i) == int]
gender = [i for i in [i for i in [b for i in group_by_gender.columns for b in i] if i != "Number"] if type(i) == str]

lst = []
for i in range(len(gender)):
    lst.append(gender[i] + "_" + str(years[i]))
    
group_by_gender.columns  = lst

group_by_gender['% of women'] = (group_by_gender['Female_2019'])/(group_by_gender['Female_2019']+group_by_gender['Male_2019'])

group_by_gender

Unnamed: 0_level_0,Female_2015,Male_2015,Female_2016,Male_2016,Female_2017,Male_2017,Female_2018,Male_2018,Female_2019,Male_2019,% of women
District,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Ciutat Vella,47579,52536,47633,52437,48467,52920,48095,52619,48626,54803,0.470139
Eixample,141341,122217,141539,122766,142554,123862,141556,123318,141840,124070,0.533414
Gràcia,65260,55141,65517,55401,65793,55554,65616,55389,65889,55909,0.540969
Horta-Guinardó,88335,78224,88622,78646,89485,79266,89978,79542,91052,80443,0.530931
Les Corts,43559,37971,43567,38075,43820,38213,43829,37926,43919,38055,0.535767
Nou Barris,87012,77636,87220,77661,88348,78231,89231,78637,90636,80033,0.531063
Sant Andreu,77091,69403,77133,69598,77571,70023,77894,70338,78733,71088,0.525514
Sant Martí,121162,112497,121379,112549,122263,113250,122561,113399,123722,114593,0.519153
Sants-Montjuïc,94194,86563,94461,86516,95006,86904,94817,86486,95950,88141,0.52121
Sarrià-Sant Gervasi,79502,67332,80188,67838,80877,68402,80804,68308,80776,68484,0.541176


In [7]:
#Population Growth

population_by_year = population.groupby(["Year", "District"])["Number"].sum()
df_population_by_year = population_by_year.to_frame().reset_index()
growth = df_population_by_year.pivot(columns = "Year", values="Number", index="District").reset_index()

In [8]:
#Detailed Growth
for year in range(2015,2019):
    name_column = 'gr_'+str(year)+'_'+str(year+1)
    growth[name_column] = ((growth[year+1] - growth[year])/growth[year])*100

growth['total_growth'] = ((growth[2019] - growth[2015])/growth[2015])*100

growth

Year,District,2015,2016,2017,2018,2019,gr_2015_2016,gr_2016_2017,gr_2017_2018,gr_2018_2019,total_growth
0,Ciutat Vella,100115,100070,101387,100714,103429,-0.044948,1.316079,-0.663793,2.695752,3.310193
1,Eixample,263558,264305,266416,264874,265910,0.283429,0.798698,-0.578794,0.391129,0.892403
2,Gràcia,120401,120918,121347,121005,121798,0.429398,0.354786,-0.281836,0.655345,1.160289
3,Horta-Guinardó,166559,167268,168751,169520,171495,0.425675,0.886601,0.455701,1.165054,2.963514
4,Les Corts,81530,81642,82033,81755,81974,0.137373,0.47892,-0.338888,0.267874,0.544585
5,Nou Barris,164648,164881,166579,167868,170669,0.141514,1.029834,0.773807,1.668573,3.656892
6,Sant Andreu,146494,146731,147594,148232,149821,0.161781,0.588151,0.432267,1.071968,2.271083
7,Sant Martí,233659,233928,235513,235960,238315,0.115125,0.677559,0.189798,0.998051,1.992647
8,Sants-Montjuïc,180757,180977,181910,181303,184091,0.12171,0.515535,-0.333681,1.537757,1.844465
9,Sarrià-Sant Gervasi,146834,148026,149279,149112,149260,0.811801,0.846473,-0.111871,0.099254,1.652206


In [9]:
#Global Population Growth
growth_population = pd.DataFrame(population.groupby(["Year"])["Number"].sum()).T.reset_index().drop(columns = "index")
for year in range(2015,2019):
    name_column = 'gr_'+str(year)+'_'+str(year+1)
    growth_population[name_column] = ((growth_population[year+1] - growth_population[year])/growth_population[year])*100

growth_population['total_growth'] = ((growth_population[2019] - growth_population[2015])/growth_population[2015])*100
growth_population

Year,2015,2016,2017,2018,2019,gr_2015_2016,gr_2016_2017,gr_2017_2018,gr_2018_2019,total_growth
0,1604555,1608746,1620809,1620343,1636762,0.261194,0.749839,-0.028751,1.013304,2.007223


In [10]:
#aging

population['multiplication'] =population['Age']*population['Number']
population['Gender'].value_counts()

population_by_year = population.groupby(['Year','District']).sum().drop(columns='Age')

population_by_year['avg_age']=(population_by_year['multiplication']/population_by_year['Number']).round(2)

average_age = population_by_year.reset_index()
average_age_table = average_age.pivot(columns="Year", values='avg_age', index='District').rename_axis(columns='')

average_age_table['Growth rate %'] = growth_rate(average_age_table[2019],average_age_table[2015])
average_age_table

Unnamed: 0_level_0,2015,2016,2017,2018,2019,Growth rate %
District,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ciutat Vella,40.37,40.26,39.99,39.9,39.67,-1.73
Eixample,44.86,44.84,44.73,44.79,44.74,-0.27
Gràcia,44.25,44.22,44.18,44.21,44.09,-0.36
Horta-Guinardó,45.22,45.31,45.24,45.26,45.15,-0.15
Les Corts,45.48,45.62,45.61,45.74,45.8,0.7
Nou Barris,44.62,44.73,44.64,44.55,44.39,-0.52
Sant Andreu,43.9,44.04,44.11,44.19,44.22,0.73
Sant Martí,43.33,43.45,43.5,43.58,43.63,0.69
Sants-Montjuïc,43.71,43.81,43.79,43.88,43.77,0.14
Sarrià-Sant Gervasi,42.9,42.91,42.82,42.92,43.07,0.4


In [11]:
#Agregation of ages

population_by_year = population.groupby(["Year","Age", "District"])["Number"].sum()
age_district = population_by_year.to_frame().reset_index()


age_district['Age'] = age_district['Age'].astype(int)
age_district['Range_age'] = age_district.apply(new_column, axis=1)
age_district_by_range = age_district.groupby(['Year', 'District', 'Range_age']).sum().reset_index()

districts = list(age_district['District'].unique())

list_df = []
for year in range(2015,2020):
    for district in districts:
        df_sliced = age_district_by_range[(age_district_by_range['Year']==year) & (age_district_by_range['District']==district)]
        df_sliced['Percentage_age'] = ((df_sliced['Number']/df_sliced['Number'].sum())*100).round(2)
        list_df.append(df_sliced)
        

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [12]:
#Cleaned Table with percentages/merge 'range' with 'ages'

age_district_totals = age_district_by_range.groupby(['Year','District']).sum().reset_index().drop(columns='Age')
age_district_merged = pd.merge(age_district_by_range,age_district_totals,on=['Year', 'District'])
age_district_merged['Percentage'] = ((age_district_merged['Number_x']/age_district_merged['Number_y'])*100).round(2)
age_district_merged = age_district_merged.drop(columns=['Number_x','Number_y','Age'])
age_district_merged

Unnamed: 0,Year,District,Range_age,Percentage
0,2015,Ciutat Vella,0-9,7.62
1,2015,Ciutat Vella,10-19,6.53
2,2015,Ciutat Vella,20-29,16.26
3,2015,Ciutat Vella,30-39,23.59
4,2015,Ciutat Vella,40-49,16.60
...,...,...,...,...
495,2019,Sarrià-Sant Gervasi,50-59,13.74
496,2019,Sarrià-Sant Gervasi,60-69,10.58
497,2019,Sarrià-Sant Gervasi,70-79,8.76
498,2019,Sarrià-Sant Gervasi,80-89,5.88


In [13]:
age_range = pd.pivot_table(age_district_merged, index = ['District','Range_age','Year'], values="Percentage").unstack('Range_age')

age_range

Unnamed: 0_level_0,Unnamed: 1_level_0,Percentage,Percentage,Percentage,Percentage,Percentage,Percentage,Percentage,Percentage,Percentage,Percentage
Unnamed: 0_level_1,Range_age,0-9,10-19,20-29,30-39,40-49,50-59,60-69,70-79,80-89,90+
District,Year,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Ciutat Vella,2015,7.62,6.53,16.26,23.59,16.6,11.05,7.33,5.61,4.37,1.05
Ciutat Vella,2016,7.55,6.64,16.75,22.89,16.74,11.35,7.28,5.51,4.22,1.07
Ciutat Vella,2017,7.5,6.82,17.49,22.57,16.52,11.38,7.33,5.33,4.02,1.05
Ciutat Vella,2018,7.42,6.95,17.92,22.05,16.68,11.41,7.44,5.17,3.91,1.05
Ciutat Vella,2019,7.1,7.01,18.71,22.17,16.56,11.39,7.35,5.05,3.64,1.01
Eixample,2015,7.47,7.06,11.9,17.56,14.58,13.34,11.3,8.3,6.66,1.83
Eixample,2016,7.5,7.07,12.08,17.21,14.62,13.41,11.32,8.32,6.59,1.87
Eixample,2017,7.47,7.09,12.48,17.04,14.65,13.32,11.29,8.27,6.47,1.93
Eixample,2018,7.39,7.15,12.73,16.65,14.68,13.25,11.37,8.44,6.34,1.99
Eixample,2019,7.27,7.14,13.12,16.52,14.74,13.04,11.36,8.73,6.08,2.0


In [14]:
age_range_2019 = age_district_merged[age_district_merged['Year'] == 2019]

age_range2019 = pd.pivot_table(age_range_2019, index = ['District','Range_age','Year'], values="Percentage").unstack('Range_age')

age_range2019

Unnamed: 0_level_0,Unnamed: 1_level_0,Percentage,Percentage,Percentage,Percentage,Percentage,Percentage,Percentage,Percentage,Percentage,Percentage
Unnamed: 0_level_1,Range_age,0-9,10-19,20-29,30-39,40-49,50-59,60-69,70-79,80-89,90+
District,Year,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Ciutat Vella,2019,7.1,7.01,18.71,22.17,16.56,11.39,7.35,5.05,3.64,1.01
Eixample,2019,7.27,7.14,13.12,16.52,14.74,13.04,11.36,8.73,6.08,2.0
Gràcia,2019,8.15,7.64,11.68,17.14,15.77,12.44,10.8,8.76,5.85,1.78
Horta-Guinardó,2019,8.01,8.55,10.7,14.1,15.73,13.81,10.78,9.78,6.86,1.68
Les Corts,2019,8.35,8.45,10.93,13.16,13.86,12.65,13.06,11.27,6.55,1.73
Nou Barris,2019,8.65,9.16,11.02,13.61,15.88,13.53,10.19,9.59,6.96,1.41
Sant Andreu,2019,8.86,8.85,10.32,13.8,16.49,13.73,11.7,9.11,5.75,1.39
Sant Martí,2019,8.8,8.82,10.66,14.97,16.52,13.95,10.91,8.41,5.62,1.33
Sants-Montjuïc,2019,7.72,7.88,12.26,16.74,15.7,13.63,10.98,8.07,5.67,1.34
Sarrià-Sant Gervasi,2019,9.81,11.52,11.5,11.66,14.57,13.74,10.58,8.76,5.88,1.99
