In [1]:
# Libraries
import pandas as pd
import requests
from bs4 import BeautifulSoup


## Import safety evaluation 2019 - 2015

Based on the: 'Encuesta de victimización en Barcelona' performed by the Ajuntament de Barcelona

Parse content on the website: https://www.bcn.cat/estadistica/castella/dades/anuari/cap08/C0805050.htm

In [2]:
url = 'https://www.bcn.cat/estadistica/castella/dades/anuari/cap08/C0805050.htm'
html = requests.get(url).content
html[0:600]
soup = BeautifulSoup(html, "lxml")
soup

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<title>Percepción de la seguridad en la ciudad y el barrio por distritos. 2015-2019</title>
<script language="JavaScript" src="funcions.js"></script>
<script language="JavaScript" src="estructura.js"></script>
<script language="Javascript">whpreload()</script>
</head>
<body leftmargin="0" marginheight="0" marginwidth="0" topmargin="0">
<script language="JavaScript">document.write(makeheader());</script>
<table border="0" cellpadding="0" cellspacing="2" width="1">
<tr><td class="WhadsTitVar1" colspan="13" nowrap="" valign="bottom">5. Encuesta de victimización de Barcelona</td></tr>
<tr><td bgcolor="#000000" colspan="13"><script language="javascript">document.write('<img src="' + whpath + 'images/cpbk.gif" border="0" width="100%" height="2">')</script></td></tr>
<tr><td class="WhadsTitVar2" colspan="13" nowrap="" valign="bottom">5.5. Percepción de la seguridad en la ciudad

In [3]:
table = soup.find_all('td',{'class':'WhadsColVar1'})
year1 = [elem.text.replace('\xa0', '').replace(' (1)', '') for elem in table]
year1 = year1[1:6]

In [4]:
table = soup.find_all('td',{'class':'WhadsDades'})
points = [elem.text.replace('\xa0', '').replace(',', '.') for elem in table]
points = [x for x in points if x]
points_city1 = points[0:5] 
points_neighbourhood1 = points[5:10]

In [5]:
print(year1)
print(points_city1)
print(points_neighbourhood1)

['2015', '2016', '2017', '2018', '2019']
['6.1', '6.2', '6.3', '6.2', '5.2']
['6.4', '6.3', '6.5', '6.4', '5.9']


## Import safety evaluation 2014 - 2010

Based on the: 'Encuesta de victimización en Barcelona' performed by the Ajuntament de Barcelona

Parse content on the website: https://www.bcn.cat/estadistica/castella/dades/anuaris/anuari14/cap08/C0804060.htm

In [6]:
url = 'https://www.bcn.cat/estadistica/castella/dades/anuaris/anuari14/cap08/C0804060.htm'
html = requests.get(url).content
html[0:600]
soup = BeautifulSoup(html, "lxml")
soup

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<title>Nivel de seguridad durante el último año por distritos. 2010-2014</title>
<script language="JavaScript" src="funcions.js"></script>
<script language="JavaScript" src="estructura.js"></script>
<script language="Javascript">whpreload()</script>
</head>
<body leftmargin="0" marginheight="0" marginwidth="0" topmargin="0">
<script language="JavaScript">document.write(makeheader());</script>
<table border="0" cellpadding="0" cellspacing="2" width="1">
<tr><td class="WhadsTitVar1" colspan="13" nowrap="" valign="bottom">4. Encuesta de victimización en Barcelona</td></tr>
<tr><td bgcolor="#000000" colspan="13"><script language="javascript">document.write('<img src="' + whpath + 'images/cpbk.gif" border="0" width="100%" height="2">')</script></td></tr>
<tr><td class="WhadsTitVar2" colspan="13" nowrap="" valign="bottom">4.6. Nivel de seguridad durante el último año por distr

In [7]:
table = soup.find_all('td',{'class':'WhadsColVar1'})
year2 = [elem.text.replace('\xa0', '').replace(' (1)', '') for elem in table]
year2 = year2[1:6]

In [8]:
table = soup.find_all('td',{'class':'WhadsDades'})
points = [elem.text.replace('\xa0', '').replace(',', '.') for elem in table]
points = [x for x in points if x]
points_city2 = points[0:5] 
points_neighbourhood2 = points[5:10]

In [9]:
print(year2)
print(points_city2)
print(points_neighbourhood2)

['2010', '2011', '2012', '2013', '2014']
['6.1', '6.2', '6.3', '6.2', '6.4']
['5.6', '5.4', '5.6', '5.7', '6.0']


## Create dataframe

Put data together and ensure it is clean and normalized

In [10]:
## Append both lists
year = year2 + year1 
points_city = points_city2 + points_city1
points_neighbourhood = points_neighbourhood2 + points_neighbourhood1

In [11]:
# Create DF
safety_perception = pd.DataFrame(list(zip(year, points_city, points_neighbourhood)), columns =['year', 'points_city', 'points_neighbourhood']) 
safety_perception

Unnamed: 0,year,points_city,points_neighbourhood
0,2010,6.1,5.6
1,2011,6.2,5.4
2,2012,6.3,5.6
3,2013,6.2,5.7
4,2014,6.4,6.0
5,2015,6.1,6.4
6,2016,6.2,6.3
7,2017,6.3,6.5
8,2018,6.2,6.4
9,2019,5.2,5.9


In [12]:
safety_perception.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 3 columns):
year                    10 non-null object
points_city             10 non-null object
points_neighbourhood    10 non-null object
dtypes: object(3)
memory usage: 368.0+ bytes


In [13]:
##Change series type
safety_perception.points_city = safety_perception.points_city.astype('float')
safety_perception.points_neighbourhood = safety_perception.points_neighbourhood.astype('float')

In [14]:
safety_perception.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 3 columns):
year                    10 non-null object
points_city             10 non-null float64
points_neighbourhood    10 non-null float64
dtypes: float64(2), object(1)
memory usage: 368.0+ bytes


## Create CSV for analysis


In [15]:
safety_perception.to_csv('../../Analysis/safety_perception.csv')

In [None]:
#safety_perception.to_csv('../../Visualization/safety_perception.csv', sep=';')