In [1]:
import pandas as pd
import numpy as np

In [2]:
age = pd.read_csv('Percentage_age_immigrant_population_year.csv')
age.rename(columns={'percentage_immigrants_per_gender': 'percentage'}, inplace=True)
age

Unnamed: 0,year,age,percentage
0,2017,0-4,4.0331
1,2017,5-9,2.8474
2,2017,10-14,2.8062
3,2017,15-19,4.6465
4,2017,20-24,13.1189
...,...,...,...
58,2015,80-84,0.6570
59,2015,85-89,0.5521
60,2015,90-94,0.2661
61,2015,95-99,0.0637


In [3]:
age_district = pd.read_csv('Percentage_age_immigrant_population_district_year.csv')
age_district.rename(columns={'immigrants_percentage': 'percentage'}, inplace=True)
age_district

Unnamed: 0,year,district_name,age,percentage
0,2017,Ciutat Vella,>=100,0.0000
1,2017,Ciutat Vella,0-4,2.5391
2,2017,Ciutat Vella,10-14,1.5641
3,2017,Ciutat Vella,15-19,3.9712
4,2017,Ciutat Vella,20-24,17.2557
...,...,...,...,...
688,2015,Sarrià-Sant Gervasi,75-79,1.0234
689,2015,Sarrià-Sant Gervasi,80-84,0.8333
690,2015,Sarrià-Sant Gervasi,85-89,0.6871
691,2015,Sarrià-Sant Gervasi,90-94,0.3801


### The average immigrant in terms of age

Based on 2017 data - the most common ages are (rounded by 2 decimals):

In [4]:
age_distribution_2017 = age[(age['year'] == 2017)]
decimals = 2    
age_distribution_2017['percentage'] = age_distribution_2017['percentage'].apply(lambda x: round(x, decimals))
age_distribution_2017

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,year,age,percentage
0,2017,0-4,4.03
1,2017,5-9,2.85
2,2017,10-14,2.81
3,2017,15-19,4.65
4,2017,20-24,13.12
5,2017,25-29,21.52
6,2017,30-34,15.92
7,2017,35-39,10.57
8,2017,40-44,7.14
9,2017,45-49,4.79


### Check trend in immigration ages evolution from 2015 - 2017

Minor changes in terms of evolution. About 2% increase for ranges 20-24 and 25-29. Slightly decrease (1%) for range 35-39. Quite stable (also quite a short period for observing trends). We discard to pursue a year comparison by district.

In [5]:
age_distribution_years = age
decimals = 2    
age_distribution_years['percentage'] = age_distribution_years['percentage'].apply(lambda x: round(x, decimals))
age_distribution_years
age_years = age_distribution_years.pivot(index="age", columns="year", values="percentage")
age_years

year,2015,2016,2017
age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0-4,4.07,4.5,4.03
10-14,3.02,3.26,2.81
15-19,4.18,4.92,4.65
20-24,11.36,12.62,13.12
25-29,19.43,20.33,21.52
30-34,16.18,15.65,15.92
35-39,11.58,10.83,10.57
40-44,7.83,7.15,7.14
45-49,5.44,4.81,4.79
5-9,3.13,3.22,2.85


### Check age distribution per district

Age range distribution per district. Based on the latest available data. Objective: identify possible trends in age ranges depending on the different neighbourhoods.

In [6]:
age_district_2017 = age_district[(age_district['year'] == 2017) & (age_district['district_name'] != 'No consta')]
decimals = 2    
age_district_2017['percentage'] = age_district_2017['percentage'].apply(lambda x: round(x, decimals))
age_district_2017


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,year,district_name,age,percentage
0,2017,Ciutat Vella,>=100,0.00
1,2017,Ciutat Vella,0-4,2.54
2,2017,Ciutat Vella,10-14,1.56
3,2017,Ciutat Vella,15-19,3.97
4,2017,Ciutat Vella,20-24,17.26
...,...,...,...,...
226,2017,Sarrià-Sant Gervasi,75-79,1.09
227,2017,Sarrià-Sant Gervasi,80-84,0.92
228,2017,Sarrià-Sant Gervasi,85-89,0.68
229,2017,Sarrià-Sant Gervasi,90-94,0.40


In [7]:
age_per_district_2017 = age_district_2017.pivot(index="age", columns="district_name", values="percentage")
age_per_district_2017


district_name,Ciutat Vella,Eixample,Gràcia,Horta-Guinardó,Les Corts,Nou Barris,Sant Andreu,Sant Martí,Sants-Montjuïc,Sarrià-Sant Gervasi
age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0-4,2.54,3.15,3.37,4.09,5.03,5.29,4.01,5.01,3.3,6.15
10-14,1.56,1.85,2.0,3.18,3.79,3.87,3.35,2.86,2.57,4.84
15-19,3.97,3.76,4.3,4.94,5.17,5.98,4.88,3.99,5.2,5.5
20-24,17.26,14.31,13.23,12.26,11.7,12.79,11.52,11.57,13.66,9.79
25-29,26.79,25.69,24.78,19.59,19.66,16.24,17.85,20.07,22.43,15.87
30-34,17.51,17.07,19.64,15.5,13.99,13.16,14.7,16.22,16.44,12.97
35-39,10.64,9.98,10.6,11.87,9.81,10.2,10.99,11.15,10.79,9.55
40-44,6.12,5.92,6.08,7.05,8.3,7.8,8.62,8.02,6.91,8.44
45-49,4.09,4.08,3.57,4.6,4.89,6.09,5.37,5.02,4.55,6.39
5-9,2.08,2.24,2.18,2.77,3.54,4.06,3.38,2.82,2.56,4.05
