# Extracting Education Services Status (Public, Private or Charter)

In [40]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL de la página web a raspar
url = 'https://escoles.barcelona/es/colegios/privados-educacion-primaria'

# Realiza una petición HTTP a la web
response = requests.get(url)

# Verifica que la petición fue exitosa
if response.status_code == 200:
    # Parsea el contenido HTML de la página usando BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')

    # Encuentra todos los divs con la clase 'jitem-body-content'
    divs = soup.find_all('div', class_='jitem-body-content')
    #print(divs)

    # Lista para guardar los datos
    data = []

    # Extrae información de cada div
    for div in divs:
        title = div.find('div', class_='jitem-title').get_text(strip=True)

        # Encuentra el div de descripción y luego busca dentro de este el div con clase 'post-category'
        description_div = div.find('div', class_='jitem-desc')
        company_address_div = description_div.find('div', class_='company-address')
        # Extract locality
        locality_text = company_address_div.find('span', itemprop='addressLocality').get_text(strip=True)

        if locality_text == 'Barcelona':
          category_text = description_div.find('div', class_='post-category').get_text(strip=True)

          data.append({'Name': title, 'Neighborhood Name': category_text})


    # Crea un DataFrame con los datos extraídos
    df = pd.DataFrame(data)

    # Muestra el DataFrame
    print(df)
else:
    print("Failed to retrieve webpage")

                                     Name    Neighborhood Name
0  Benjamin Franklin Internacional School  Sarrià-Sant Gervasi
1                      Escola Casa Nostra  Sarrià-Sant Gervasi
2                     Escola Perez Iborra             Eixample
3        Santa Clara International School  Sarrià-Sant Gervasi
4            St. Peter's School Barcelona            Pedralbes
5                Princess Margaret School       Horta-Guinardó
6                 Zürich Schule Barcelona            Pedralbes


In [41]:
#https://escoles.barcelona/es/parvularios/privados-parvularios?controller=search&orderBy=packageOrder%20desc&typeSearch=13&radius=100&resetSearch=1&start=20
#https://escoles.barcelona/es/parvularios/privados-parvularios?controller=search&orderBy=packageOrder%20desc&typeSearch=13&radius=100&resetSearch=1&start=0
#CAMBIAR ULTIMO VALOR STRING

## FUNCTION DEFINITION

In [42]:
#IMPORTS
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [43]:
def read_webpage(url, status, num_pages):
  base_url = url
  # Create list to store data
  data = []

  # Iterate over pages & construct URL
  for i in range(num_pages):
    if num_pages == 1:
      url = base_url
    else:
      new_value = i * 20
      url = base_url + str(new_value)

    # HTTP Petition to webpage
    response = requests.get(url)

    # Verify successful petition
    if response.status_code == 200:
      # Parse HTML webpage content using BeautifulSoup
      soup = BeautifulSoup(response.text, 'html.parser')

      # Find all divs with class 'jitem-body-content'
      divs = soup.find_all('div', class_='jitem-body-content')

      # Extract info of each divv
      for div in divs:
        title = div.find('div', class_='jitem-title').get_text(strip=True)

        # Encuentra el div de descripción y luego busca dentro de este el div con clase 'post-category'
        description_div = div.find('div', class_='jitem-desc')
        company_address_div = description_div.find('div', class_='company-address')
        # Extract locality
        locality_text = company_address_div.find('span', itemprop='addressLocality').get_text(strip=True)

        if locality_text == 'Barcelona':
          category_text = description_div.find('div', class_='post-category').get_text(strip=True)

          data.append({'Name': title, 'Neighborhood Name': category_text, 'Status': status})
    else:
      print("Failed to retrieve webpage")

  # Crea un DataFrame con los datos extraídos
  df = pd.DataFrame(data)

  # Muestra el DataFrame
  #print(df)
  return df

## 1. Educación Primaria en Barcelona


### 1.1. Público Educación Primaria

In [46]:
status_public = 'public'
num_pages_public_primary = 9
url_public_primary = 'https://escoles.barcelona/es/colegios/privados-educacion-primaria?controller=search&categoryId=0&orderBy=packageOrder%20desc&citySearch=barcelona&typeSearch=7&radius=100&resetSearch=1&start='
public_primary_df = read_webpage(url_public_primary, status_public, num_pages_public_primary)
public_primary_df.head(7)

Unnamed: 0,Name,Neighborhood Name,Status
0,Escola Poeta Foix,Sarrià-Sant Gervasi,public
1,Escola Arc Iris,Horta-Guinardó,public
2,Escola Baixeras,Ciutat Vella,public
3,Escola Parc de la Ciutadella,Ciutat Vella,public
4,Escola Mossèn Jacint Verdaguer,Sants-Montjuïc,public
5,Escola De Bosc de Montjuïc,Sants-Montjuïc,public
6,Escola Ramon Llull,Eixample,public


### 1.2 Concertado Educación Primaria

In [47]:
status_charter = 'charter'
num_pages_charter_primary = 9
url_charter_primary = 'https://escoles.barcelona/es/colegios/privados-educacion-primaria?controller=search&categoryId=0&orderBy=packageOrder%20desc&citySearch=barcelona&typeSearch=9&radius=100&resetSearch=1&start='
charter_primary_df = read_webpage(url_charter_primary, status_charter, num_pages_charter_primary)
charter_primary_df.head(7)

Unnamed: 0,Name,Neighborhood Name,Status
0,Anna Ravell,Sants-Montjuïc,charter
1,Lys,Sarrià-Sant Gervasi,charter
2,Mireia,Eixample,charter
3,Sagrado Corazón,Eixample,charter
4,Sant Pere Claver,Sants-Montjuïc,charter
5,Sant Francesc Xavier,Sants-Montjuïc,charter
6,Sant Josep-Teresianes,Gràcia,charter


### 1.3 Privado Educación Primaria

In [49]:
status_private = 'private'
num_pages_private_primary = 2
url_private_primary = 'https://escoles.barcelona/es/colegios/privados-educacion-primaria?controller=search&orderBy=packageOrder%20desc&typeSearch=14&radius=100&resetSearch=1&start='
private_primary_df = read_webpage(url_private_primary, status_private, num_pages_private_primary)
display(private_primary_df)

Unnamed: 0,Name,Neighborhood Name,Status
0,Benjamin Franklin Internacional School,Sarrià-Sant Gervasi,private
1,Escola Casa Nostra,Sarrià-Sant Gervasi,private
2,Escola Perez Iborra,Eixample,private
3,Santa Clara International School,Sarrià-Sant Gervasi,private
4,St. Peter's School Barcelona,Pedralbes,private
5,Princess Margaret School,Horta-Guinardó,private
6,Zürich Schule Barcelona,Pedralbes,private
7,English Academy Santa Claus,Sarrià-Sant Gervasi,private
8,Kensington School Barcelona,Pedralbes,private
9,École Française Ferdinand de Lesseps,Eixample,private


## 2. Educación Secundaria en Barcelona

### 2.1 Público Educación Secundaria

In [50]:
#status_public = 'public'
num_pages_public_secondary = 9
url_public_secondary = 'https://escoles.barcelona/es/colegios/publicos-educacion-secundaria?controller=search&orderBy=packageOrder%20desc&citySearch=Barcelona&typeSearch=7&radius=100&resetSearch=1&start='
public_secondary_df = read_webpage(url_public_secondary, status_public, num_pages_public_secondary)
public_secondary_df.head(7)

Unnamed: 0,Name,Neighborhood Name,Status
0,Escola Poeta Foix,Sarrià-Sant Gervasi,public
1,Escola Arc Iris,Horta-Guinardó,public
2,Escola Baixeras,Ciutat Vella,public
3,Escola Parc de la Ciutadella,Ciutat Vella,public
4,Escola Mossèn Jacint Verdaguer,Sants-Montjuïc,public
5,Escola De Bosc de Montjuïc,Sants-Montjuïc,public
6,Escola Ramon Llull,Eixample,public


### 2.2 Concertado Educación Secundaria

In [51]:
#status_charter = 'charter'
num_pages_charter_secondary = 7
url_charter_secondary = 'https://escoles.barcelona/es/colegios/concertados-educacion-secundaria?controller=search&orderBy=packageOrder%20desc&citySearch=Barcelona&typeSearch=11&radius=100&resetSearch=1&start='
charter_secondary_df = read_webpage(url_charter_secondary, status_charter, num_pages_charter_secondary)
charter_secondary_df.head(7)

Unnamed: 0,Name,Neighborhood Name,Status
0,Mireia,Eixample,charter
1,Sagrado Corazón,Eixample,charter
2,Anna Ravell,Sants-Montjuïc,charter
3,Sant Francesc Xavier,Sants-Montjuïc,charter
4,Sant Josep-Teresianes,Gràcia,charter
5,Claret,Gràcia,charter
6,Acis-Artur Martorell,Horta-Guinardó,charter


### 2.3 Privado Educación Secundaria

In [52]:
#status_private = 'private'
num_pages_private_secondary = 2
url_private_secondary = 'https://escoles.barcelona/es/colegios/privados-educacion-secundaria?controller=search&orderBy=packageOrder%20desc&typeSearch=12&radius=100&resetSearch=1&start='
private_secondary_df = read_webpage(url_private_secondary, status_private, num_pages_private_secondary)
display(private_secondary_df)

Unnamed: 0,Name,Neighborhood Name,Status
0,Aula Escola Europea,Sarrià-Sant Gervasi,private
1,Benjamin Franklin Internacional School,Sarrià-Sant Gervasi,private
2,Escola Casa Nostra,Sarrià-Sant Gervasi,private
3,Escola Perez Iborra,Eixample,private
4,Santa Clara International School,Sarrià-Sant Gervasi,private
5,St. Peter's School Barcelona,Pedralbes,private
6,Princess Margaret School,Horta-Guinardó,private
7,Zürich Schule Barcelona,Pedralbes,private
8,Granés Batxillerat,Eixample,private
9,CIC Escola Batxillerats,Sant Gervasi - la Bonanova,private


## 3. Guarderías/Educación Infantil

### 3.1 Público Educación Infantil

In [54]:
#status_public = 'public'
num_pages_public_childhood = 9
url_public_childhood = 'https://escoles.barcelona/es/parvularios/publicos-parvularios?controller=search&orderBy=packageOrder%20desc&citySearch=Barcelona&typeSearch=8&radius=100&resetSearch=1&start='
public_childhood_df = read_webpage(url_public_childhood, status_public, num_pages_public_childhood)
public_childhood_df.head(7)

Unnamed: 0,Name,Neighborhood Name,Status
0,Escola Rius i Taulet,Gràcia,public
1,Escola Baixeras,Ciutat Vella,public
2,Escola Parc de la Ciutadella,Ciutat Vella,public
3,Escola Mossèn Jacint Verdaguer,Sants-Montjuïc,public
4,Escola De Bosc de Montjuïc,Sants-Montjuïc,public
5,Escola Poeta Foix,Sarrià-Sant Gervasi,public
6,Escola Ramon Llull,Eixample,public


### 3.2 Concertado Educación Infantil

In [55]:
#status_charter = 'charter'
num_pages_charter_childhood = 8
url_charter_childhood = 'https://escoles.barcelona/es/parvularios/concertados-parvularios?controller=search&orderBy=packageOrder%20desc&citySearch=Barcelona&typeSearch=10&radius=100&resetSearch=1&start='
charter_childhood_df = read_webpage(url_charter_childhood, status_charter, num_pages_charter_childhood)
charter_childhood_df.head(7)

Unnamed: 0,Name,Neighborhood Name,Status
0,Sagrat Cor-Sarrià,Sarrià-Sant Gervasi,charter
1,Anna Ravell,Sants-Montjuïc,charter
2,Lys,Sarrià-Sant Gervasi,charter
3,Mireia,Eixample,charter
4,Sagrado Corazón,Eixample,charter
5,Jesús Maria,Sants-Montjuïc,charter
6,Sant Pere Claver,Sants-Montjuïc,charter


### 3.3 Privado Educación Infantil


In [56]:
#status_private = 'private'
num_pages_private_childhood = 2
url_private_childhood = 'https://escoles.barcelona/es/parvularios/privados-parvularios?controller=search&orderBy=packageOrder%20desc&typeSearch=13&radius=100&resetSearch=1&start='
private_childhood_df = read_webpage(url_private_childhood, status_private, num_pages_private_childhood)
display(private_childhood_df)

Unnamed: 0,Name,Neighborhood Name,Status
0,Aula Escola Europea,Sarrià-Sant Gervasi,private
1,Benjamin Franklin Internacional School,Sarrià-Sant Gervasi,private
2,Escola Casa Nostra,Sarrià-Sant Gervasi,private
3,Escola Perez Iborra,Eixample,private
4,Santa Clara International School,Sarrià-Sant Gervasi,private
5,St. Peter's School Barcelona,Pedralbes,private
6,Princess Margaret School,Horta-Guinardó,private
7,Zürich Schule Barcelona,Pedralbes,private
8,English Academy Santa Claus,Sarrià-Sant Gervasi,private
9,Kensington School Barcelona,Pedralbes,private


### 3.4 Público Guardería

In [57]:
#status_public = 'public'
num_pages_public_kindergarten = 6
url_public_kindergarten = 'https://escoles.barcelona/es/guarderias/publicos-guarderias?controller=search&orderBy=packageOrder%20desc&citySearch=Barcelona&typeSearch=15&radius=100&resetSearch=1&start='
public_kindergarten_df = read_webpage(url_public_kindergarten, status_public, num_pages_public_kindergarten)
public_kindergarten_df.head(7)

Unnamed: 0,Name,Neighborhood Name,Status
0,Escola Bressol Municipal El Tren de Fort Pienc,Eixample,public
1,Escola Bressol Municipal Cobi,el Poblenou,public
2,Escola Bressol Municipal La Mar Xica,el Poblenou,public
3,Escola Bressol Municipal El Xalet de la Paperera,el Poblenou,public
4,Escola Bressol Municipal L'Aliança,el Poblenou,public
5,Escola Bressol Municipal Júpiter,el Poblenou,public
6,Escola Bressol Municipal Diagonal Mar,Diagonal Mar i el Front Marítim del Poblenou,public


### 3.5 Concertado Guardería

In [58]:
#status_charter = 'charter'
#num_pages_charter_kindergarten = 8
#url_charter_kindergarten = 'https://escoles.barcelona/es/parvularios/concertados-parvularios?controller=search&orderBy=packageOrder%20desc&citySearch=Barcelona&typeSearch=10&radius=100&resetSearch=1&start='
#charter_kindergarten_df = read_webpage(url_charter_kindergarten, status_charter, num_pages_charter_kindergarten)
#charter_kindergarten_df.head(7)

# Error on Page (not found)

### 3.6 Privado Guardería

In [59]:
#status_private = 'private'
num_pages_private_kindergarten = 5
url_private_kindergarten = 'https://escoles.barcelona/es/guarderias/privados-guarderias?controller=search&orderBy=packageOrder%20desc&typeSearch=16&radius=100&resetSearch=1&start='
private_kindergarten_df = read_webpage(url_private_kindergarten, status_private, num_pages_private_kindergarten)
display(private_kindergarten_df)

Unnamed: 0,Name,Neighborhood Name,Status
0,Escola Infantil Ninets,Eixample,private
1,Escola Bressol Petits,Eixample,private
2,Guardería La Gavina - Laforja,Gràcia,private
3,Guardería La Gavina - Marc Aureli,Sarrià-Sant Gervasi,private
4,Escuela Infantil Zürich Schule,Pedralbes,private
...,...,...,...
80,Llar d'infants Guinyolet,la Sagrera,private
81,Llar d'infants La Pau - Gran Via,Sants-Montjuïc,private
82,Llar d'infants Àgora,Eixample,private
83,Escola Infantil Pas A Pas,Horta-Guinardó,private


## Merge All Data

- Concatenate Datasets

- Reset Indices

- Drop duplicate rows

In [61]:
# Concat dataframes: public_primary_df, charter_primary_df, private_primary_df, public_secondary_df, charter_secondary_df, private_secondary_df,
# public_childhood_df, charter_childhood_df, private_childhood_df, public_kindergarten_df, private_kindergarten_df

# Concatenate
status_df = pd.concat([public_primary_df, charter_primary_df, private_primary_df, public_secondary_df, charter_secondary_df, private_secondary_df, public_childhood_df, charter_childhood_df, private_childhood_df, public_kindergarten_df, private_kindergarten_df])
#display(status_df)

# Remove duplicate rows
status_df.drop_duplicates(inplace=True)

# Reset Index
status_df.reset_index(drop=True, inplace=True)
display(status_df)

Unnamed: 0,Name,Neighborhood Name,Status
0,Escola Poeta Foix,Sarrià-Sant Gervasi,public
1,Escola Arc Iris,Horta-Guinardó,public
2,Escola Baixeras,Ciutat Vella,public
3,Escola Parc de la Ciutadella,Ciutat Vella,public
4,Escola Mossèn Jacint Verdaguer,Sants-Montjuïc,public
...,...,...,...
549,Llar d'infants Guinyolet,la Sagrera,private
550,Llar d'infants La Pau - Gran Via,Sants-Montjuïc,private
551,Llar d'infants Àgora,Eixample,private
552,Escola Infantil Pas A Pas,Horta-Guinardó,private


In [62]:
status_df.to_csv('status_education.csv')