In [162]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [155]:
# Use the requests library to get the overview page and feed the HTML of that page to the BeautifulSoup library.
req = requests.get("https://www.nulandia.nl/287/team-informatie/?sznid=4")
html = req.text
soup = BeautifulSoup(html, "lxml")

In [156]:
# Use the select method of BeautifulSoup to make a list of the items containing the names, whether that are table rows, list elements, divs, etcetera.
names = soup.select(".well.well-sm.clearfix a h4")
links = soup.select(".well.well-sm.clearfix a")
images =  soup.select('img.img-thumbnail')

In [157]:
player_names = []
player_links = []
player_images = []

# Loop through that list and get the basic information out of the rows and append that as a dict to a list. You should at least get the person's name and the link to their 'detail' page. Optionally, limit this list to the first 25 people on the page.
# Also doing some data cleaning + Changing the basic default image to "no_profile_picture"

for name in names:
    name = name.get_text()
    player_names.append(name)
    
for link in links:
    path = link['href']
    url = f'http://nulandia.nl{path}'
    if url not in player_links: #stopping duplicate entries
        player_links.append(url)

for image in images:
    image_url = image['src']
    if image_url == "https://cache.voetbalassist.nl/Data/Nulandia/Modules/ClubManagement/Front/Persoon/Standaard/Thumb/0.jpg?ticks=636174726621921994&w=60&h=80&crop=auto&quality=80":
        image_url = "no_profile_picture"
    player_images.append(image_url)

In [158]:
# Convert the list with dicts to a Pandas dataframe, save it to a CSV file and display the dataframe in the Notebook.
basic_player_info = []

for name in player_names:
    list_location = player_names.index(name)
    info = {
        "name": player_names[list_location],
        "link": player_links[list_location],
        "image": player_images[list_location]
    }
    basic_player_info.append(info)

basic_df = pd.DataFrame(basic_player_info)
basic_df.to_csv("basic_nulandia.csv")
basic_df

Unnamed: 0,image,link,name
0,https://cache.voetbalassist.nl/Data/Nulandia/M...,http://nulandia.nl/488/33/sander-arts/profiel/,Sander Arts
1,https://cache.voetbalassist.nl/Data/Nulandia/M...,http://nulandia.nl/488/43/stan-van-bergen/prof...,Stan van Bergen
2,https://cache.voetbalassist.nl/Data/Nulandia/M...,http://nulandia.nl/488/42/stef-van-bergen/prof...,Stef van Bergen
3,no_profile_picture,http://nulandia.nl/488/54/tim-vd-biezen/profiel/,Tim vd. Biezen
4,https://cache.voetbalassist.nl/Data/Nulandia/M...,http://nulandia.nl/488/104/martijn-coelen/prof...,Martijn Coelen
5,https://cache.voetbalassist.nl/Data/Nulandia/M...,http://nulandia.nl/488/127/luuk-van-de-doelen/...,Luuk van de Doelen
6,https://cache.voetbalassist.nl/Data/Nulandia/M...,http://nulandia.nl/488/146/stijn-van-der-donk/...,Stijn van der Donk
7,https://cache.voetbalassist.nl/Data/Nulandia/M...,http://nulandia.nl/488/150/thijs-van-druenen/p...,Thijs van Druenen
8,https://cache.voetbalassist.nl/Data/Nulandia/M...,http://nulandia.nl/488/152/patrick-van-de-elze...,Patrick van de Elzen
9,https://cache.voetbalassist.nl/Data/Nulandia/M...,http://nulandia.nl/488/189/stef-de-groot/profiel/,Stef de Groot


In [159]:
# Loop through the list again, fetch the user detail page with requests, parse that using BeautifulSoup and add user detail information to the list.
all_players = []

for player in basic_player_info:
    link = player['link']
    req = requests.get(link)
    html = req.text
    soup = BeautifulSoup(html, "lxml")
    birthday = soup.select(".col-sm-9")[1].get_text()
    status = soup.select(".pull-left li")[0].get_text().strip()
    team = status.split(",")[0].strip()
    team_role = status.split(",")
    
    # Dealing with exceptions when the profile page is different from normal
    if len(team_role) > 1:
        team_role = team_role[1].strip()
    else:
        team_role = team_role[0].strip()
    
    player["birthday"] = birthday
    player["team"] = team
    player["team_role"] = team_role 

In [161]:
# Convert the ‘expanded’ list with dicts to a Pandas dataframe and save that to a CSV file, and display the dataframe in the Notebook.
df = pd.DataFrame(basic_player_info)
df

Unnamed: 0,birthday,image,link,name,team,team_role
0,5 juni,https://cache.voetbalassist.nl/Data/Nulandia/M...,http://nulandia.nl/488/33/sander-arts/profiel/,Sander Arts,Nulandia 1 (zon),Teamspeler
1,21 oktober,https://cache.voetbalassist.nl/Data/Nulandia/M...,http://nulandia.nl/488/43/stan-van-bergen/prof...,Stan van Bergen,Nulandia 1 (zon),Teamspeler
2,13 juni,https://cache.voetbalassist.nl/Data/Nulandia/M...,http://nulandia.nl/488/42/stef-van-bergen/prof...,Stef van Bergen,Nulandia 1 (zon),Teamspeler
3,31 juli,no_profile_picture,http://nulandia.nl/488/54/tim-vd-biezen/profiel/,Tim vd. Biezen,Nulandia 1 (zon),Teamspeler
4,15 april,https://cache.voetbalassist.nl/Data/Nulandia/M...,http://nulandia.nl/488/104/martijn-coelen/prof...,Martijn Coelen,Nulandia 1 (zon),Teamspeler
5,8 februari,https://cache.voetbalassist.nl/Data/Nulandia/M...,http://nulandia.nl/488/127/luuk-van-de-doelen/...,Luuk van de Doelen,Nulandia 1 (zon),Teamspeler
6,24 mei,https://cache.voetbalassist.nl/Data/Nulandia/M...,http://nulandia.nl/488/146/stijn-van-der-donk/...,Stijn van der Donk,Nulandia 1 (zon),Teamspeler
7,2 mei,https://cache.voetbalassist.nl/Data/Nulandia/M...,http://nulandia.nl/488/150/thijs-van-druenen/p...,Thijs van Druenen,Nulandia 1 (zon),Teamspeler
8,26 mei,https://cache.voetbalassist.nl/Data/Nulandia/M...,http://nulandia.nl/488/152/patrick-van-de-elze...,Patrick van de Elzen,Nulandia 1 (zon),Teamspeler
9,20 oktober,https://cache.voetbalassist.nl/Data/Nulandia/M...,http://nulandia.nl/488/189/stef-de-groot/profiel/,Stef de Groot,Nulandia 1 (zon),Teamspeler


In [None]:
df.to_csv("nulandia.csv")