## Workbook to scrape player market values from TransferMarkt.com

Set primary key for tables

Update scrape team list for more seasons and leagues

## Import libraries and connect to local SQLite

In [1]:
from bs4 import BeautifulSoup
import requests
import re
from datetime import datetime
import pandas as pd
from pandas.io import sql
import sqlite3
import numpy as np
connection = sqlite3.connect('../sql/stats.db.sqlite')
connection.text_factory = str

## Create list of URLs to scrape teams in leagues data

In [2]:
# Create list of teams in each league

# League pages

leagues = ['https://www.transfermarkt.com/super-lig/startseite/wettbewerb/TR1', 
           'https://www.transfermarkt.com/eredivisie/startseite/wettbewerb/NL1',
           'https://www.transfermarkt.com/ligue-1/startseite/wettbewerb/FR1',
           'https://www.transfermarkt.com/laliga/startseite/wettbewerb/ES1', 
           'https://www.transfermarkt.com/serie-a/startseite/wettbewerb/IT1',
           'https://www.transfermarkt.com/1-bundesliga/startseite/wettbewerb/L1']

# Years to scrape

years = ['2015','2016','2017']

# Pages to scrape

pages_to_scrape = []

league_pages = []

for league in leagues:
    league_page = league + '/plus/?saison_id='
    league_pages.append(league_page)

for page in league_pages:
    for year in years:
        league_page = page + year
        pages_to_scrape.append(league_page)

In [3]:
pages_to_scrape

['https://www.transfermarkt.com/super-lig/startseite/wettbewerb/TR1/plus/?saison_id=2015',
 'https://www.transfermarkt.com/super-lig/startseite/wettbewerb/TR1/plus/?saison_id=2016',
 'https://www.transfermarkt.com/super-lig/startseite/wettbewerb/TR1/plus/?saison_id=2017',
 'https://www.transfermarkt.com/eredivisie/startseite/wettbewerb/NL1/plus/?saison_id=2015',
 'https://www.transfermarkt.com/eredivisie/startseite/wettbewerb/NL1/plus/?saison_id=2016',
 'https://www.transfermarkt.com/eredivisie/startseite/wettbewerb/NL1/plus/?saison_id=2017',
 'https://www.transfermarkt.com/ligue-1/startseite/wettbewerb/FR1/plus/?saison_id=2015',
 'https://www.transfermarkt.com/ligue-1/startseite/wettbewerb/FR1/plus/?saison_id=2016',
 'https://www.transfermarkt.com/ligue-1/startseite/wettbewerb/FR1/plus/?saison_id=2017',
 'https://www.transfermarkt.com/laliga/startseite/wettbewerb/ES1/plus/?saison_id=2015',
 'https://www.transfermarkt.com/laliga/startseite/wettbewerb/ES1/plus/?saison_id=2016',
 'https:

In [4]:
# Create list that stores team details

season_teams = []

for url in pages_to_scrape:
    
    # Set url and beautiful soup
    r = requests.get(url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36"})
    soup = BeautifulSoup(r.content,"html.parser")

    # Extract team table from HTML
    tables = soup.find_all('tbody')
    teamtable = tables[1]
    teamtable = str(teamtable)
    
    rows = teamtable.split("\n")

    teamrows = []
    for index, value in enumerate(rows):
        if 'zentriert' in value:
            teamrows.append(index)
    
    for row in teamrows:

        # Loop to next row with team information
        team = rows[row]

        # Split each player row into lines for each column / detail
        c = team.split('<td')

        # Extract details for each player and load into DataFrame

        # Extract team url name
        result = re.search('href="/(.*)/startseite/verein', c[1])
        urlname = result.group(1)
        urlname

        # Extract team id
        result = re.search('/startseite/verein/(.*)/saison_id', c[1])
        teamid = result.group(1)
        teamid

        # Extract year
        result = re.search('/saison_id/(.*)"><img alt="', c[1])
        year = result.group(1)
        year

        # Combine details needed to create urls for each team

        details = (urlname,teamid,year)

        # Add details to season_teams list:

        season_teams.append(details)
        
    print('{} scrape complete'.format(url))

https://www.transfermarkt.com/super-lig/startseite/wettbewerb/TR1/plus/?saison_id=2015 scrape complete
https://www.transfermarkt.com/super-lig/startseite/wettbewerb/TR1/plus/?saison_id=2016 scrape complete
https://www.transfermarkt.com/super-lig/startseite/wettbewerb/TR1/plus/?saison_id=2017 scrape complete
https://www.transfermarkt.com/eredivisie/startseite/wettbewerb/NL1/plus/?saison_id=2015 scrape complete
https://www.transfermarkt.com/eredivisie/startseite/wettbewerb/NL1/plus/?saison_id=2016 scrape complete
https://www.transfermarkt.com/eredivisie/startseite/wettbewerb/NL1/plus/?saison_id=2017 scrape complete
https://www.transfermarkt.com/ligue-1/startseite/wettbewerb/FR1/plus/?saison_id=2015 scrape complete
https://www.transfermarkt.com/ligue-1/startseite/wettbewerb/FR1/plus/?saison_id=2016 scrape complete
https://www.transfermarkt.com/ligue-1/startseite/wettbewerb/FR1/plus/?saison_id=2017 scrape complete
https://www.transfermarkt.com/laliga/startseite/wettbewerb/ES1/plus/?saison_

In [5]:
season_teams

[('fenerbahce-istanbul', '36', '2015'),
 ('galatasaray-istanbul', '141', '2015'),
 ('besiktas-istanbul', '114', '2015'),
 ('trabzonspor', '449', '2015'),
 ('bursaspor', '20', '2015'),
 ('medical-park-antalyaspor', '589', '2015'),
 ('kasimpasa', '10484', '2015'),
 ('eskisehirspor', '825', '2015'),
 ('istanbul-buyuksehir-belediyespor', '6890', '2015'),
 ('ankaraspor', '2944', '2015'),
 ('genclerbirligi-ankara', '820', '2015'),
 ('sivasspor', '2381', '2015'),
 ('gaziantepspor', '524', '2015'),
 ('akhisar-belediye-genclik-ve-spor', '19771', '2015'),
 ('kayserispor', '3205', '2015'),
 ('torku-konyaspor', '2293', '2015'),
 ('caykur-rizespor', '126', '2015'),
 ('mersin-idmanyurdu', '3216', '2015'),
 ('besiktas-istanbul', '114', '2016'),
 ('fenerbahce-istanbul', '36', '2016'),
 ('galatasaray-istanbul', '141', '2016'),
 ('trabzonspor', '449', '2016'),
 ('istanbul-buyuksehir-belediyespor', '6890', '2016'),
 ('ankaraspor', '2944', '2016'),
 ('akhisar-belediye-genclik-ve-spor', '19771', '2016'),
 

## Create list of URLs to scrape player data

In [6]:
# Create list of URLs to scrape player data

urllist = []

# Loop through list of team URL details to create specific URL to scrape

for row in season_teams:
    team, clubid, year = row
    url_syntax = "https://www.transfermarkt.com/" + team + "/kader/verein/" + clubid + "/plus/1/galerie/0?saison_id=" + year
    urllist.append(url_syntax)

urllist

['https://www.transfermarkt.com/fenerbahce-istanbul/kader/verein/36/plus/1/galerie/0?saison_id=2015',
 'https://www.transfermarkt.com/galatasaray-istanbul/kader/verein/141/plus/1/galerie/0?saison_id=2015',
 'https://www.transfermarkt.com/besiktas-istanbul/kader/verein/114/plus/1/galerie/0?saison_id=2015',
 'https://www.transfermarkt.com/trabzonspor/kader/verein/449/plus/1/galerie/0?saison_id=2015',
 'https://www.transfermarkt.com/bursaspor/kader/verein/20/plus/1/galerie/0?saison_id=2015',
 'https://www.transfermarkt.com/medical-park-antalyaspor/kader/verein/589/plus/1/galerie/0?saison_id=2015',
 'https://www.transfermarkt.com/kasimpasa/kader/verein/10484/plus/1/galerie/0?saison_id=2015',
 'https://www.transfermarkt.com/eskisehirspor/kader/verein/825/plus/1/galerie/0?saison_id=2015',
 'https://www.transfermarkt.com/istanbul-buyuksehir-belediyespor/kader/verein/6890/plus/1/galerie/0?saison_id=2015',
 'https://www.transfermarkt.com/ankaraspor/kader/verein/2944/plus/1/galerie/0?saison_id=2

## Scrape player data

In [7]:
# Create position lists
keeper = ['Keeper']
defender = ['Sweeper','Centre-Back','Left-Back','Right-Back']
midfielder = ['Defensive Midfield','Central Midfield','Left Midfield','Right Midfield','Attacking Midfield']
attacker = ['Left Wing','Right Wing','Secondary Striker','Centre-Forward']   

In [8]:
# Function to parse table on page into dataframe

def converttodf(url):
    
    # Set url and beautiful soup
    r = requests.get(url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36"})
    soup = BeautifulSoup(r.content,"html.parser")

    # Extract current team
    title = soup.title.get_text()
    result = re.search('(.*) - Detailed squad', title)
    team = result.group(1)
    
    # Extract year
    result = title.replace("(","").replace(")","")
    result = re.search('Detailed squad (.*) Detailed view', result)
    season = result.group(1)
    
    # Extract league country
    country = soup.find_all('form')
    country = str(country)
    country = country.split("<form")
    result = re.search('<option selected="selected" value="(.*)</option>', country[3])
    country = result.group(1)
    result = re.search('>(.*)', country)
    country = result.group(1)

    # Extract team URL and season
    urlcode = soup.find_all('meta', property="og:url")
    urlcode = str(urlcode)
    result = re.search('transfermarkt.com/(.*)/kader', urlcode)
    teamurl = result.group(1)
    result = re.search('saison_id=(.*)" property', urlcode)
    year = int(result.group(1))
    result = re.search('verein/(.*)/plus', urlcode)
    teamid = int(result.group(1))

    # Extract team table from HTML
    tables = soup.find_all('tbody')
    teamtable = tables[1]
    teamtable = str(teamtable)

    # Create a line for each row
    rows = teamtable.split("\n")

    # Create list of which row numbers are player details
    playerrows = []
    for index, value in enumerate(rows):
        if 'zentriert' in value:
            playerrows.append(index)

    # Create Market Values dataframe
    df_marketvals = pd.DataFrame(columns=['teamid','number','name','pid','main_pos','position','dob',
                                          'age', 'nat1','nat2','height', 'foot','teamsince','prevteam',
                                          'prevteamid','transferfee','contractuntil','marketval',
                                          'prevmarketval','team','season','teamurl','year','current_team',
                                          'current_teamid','country'])

    
    # Check number of columns in table - if greater than 12, it means it has a current club column
    tableheads = soup.find_all('thead')
    tableheads = str(tableheads)
    cols = tableheads.split('<th')
    col_nums = len(cols)
    
    # Create loop to extract details for each player in the table

    for row in playerrows:

        # Loop to next row with player information
        player = rows[row]

        # Split each player row into lines for each column / detail
        c = player.split('<td')

        # Extract details for each player and load into DataFrame

        # Extract player number
        try:
            result = re.search('"rn_nummer">(.*)</div>', c[1])
            number = result.group(1)
            number = int(number)
        except:
            number = np.NaN

        # Extract player name
        result = re.search('title="(.*)"/></a>', c[3])
        name = result.group(1)

        # Extract player ID
        try:    
            result = re.search('/profil/spieler/(.*)" id=', c[4])
            pid = result.group(1)
            num = pid.find('"')
            pid = pid[:num]
            pid = int(pid)
        except:
            pid = np.NaN
            
        # Extract position
        try:    
            result = re.search('>(.*)</td></tr>', c[5])
            position = result.group(1)
        except:
            position = np.NaN
        
        # Summary position
        try:
            if position in keeper:
                main_pos = 'keeper'
            elif position in defender:
                main_pos = 'defender'
            elif position in midfielder:
                main_pos = 'midfielder'
            elif position in attacker:
                main_pos = 'attacker'
        except:
            main_pos = np.NaN

        # Extract birthdate and age
        try:    
            result = re.search('">(.*)</td>', c[6])
            dob = result.group(1).replace("(","<").replace(")","")
            result = re.search('<(.*)', dob)
            age = int(result.group(1))
            result = re.search('(.*) <', dob)
            dob = result.group(1)
            dob = datetime.strptime(dob, '%b %d, %Y').date()
            dob = str(dob.year) + "-" + str(dob.month) + "-" + str(dob.day)
            dob = str(dob)
        except:
            dob = np.NaN

        # Extract nationalities
        try:    
            count = c[7].count('alt="') # This counts how many nationalities there are
            d = c[7].split('alt="') # This splits the lines up by nationalities (the first line doesn't include a nationality)
            nats = [] # Open an empty list for nationalities & then append each nationality
            for i in range(0,count):
                num = d[i+1].find('"')
                nats.append(d[i+1][:num])
            if count == 1:
                nat1 = nats[0]
                nat2 = np.NaN
            elif count == 2:
                nat1 = nats[0]
                nat2 = nats[1]
        except:
            nat1 = np.NaN
            nat2 = np.NaN
            
        if col_nums == 12:

            # Extract height
            try:    
                result = re.search('class="zentriert">(.*) m</td>', c[8])
                height = float(result.group(1).replace(",","."))
            except:
                height = np.NaN

            # Extract foot
            try:    
                result = re.search('class="zentriert">(.*)</td>', c[9])
                foot = result.group(1)
                if foot == 'left' or foot == 'right' or foot == 'both':
                    foot = foot
                else:
                    foot = np.NaN
            except:
                foot = np.NaN

            # Extract team since
            try:    
                result = re.search('class="zentriert">(.*)</td>', c[10])
                teamsince = result.group(1)
                teamsince = datetime.strptime(teamsince, '%b %d, %Y').date()
                teamsince = str(teamsince.year) + "-" + str(teamsince.month) + "-" + str(teamsince.day)
            except:
                teamsince = np.NaN

            # Extract previous team
            try:    
                result = re.search('<img alt="(.*)" class="" src="', c[11])
                prevteam = result.group(1)
            except:
                prevteam = np.NaN

            # Extract previous team-id
            try:    
                result = re.search('id="(.*)"><img alt="', c[11])
                prevteamid = int(result.group(1))
            except:
                prevteamid = np.NaN

            # Extract transfer fee
            try:    
                result = re.search('title=": A(.*) \xe2\x82', c[11])
                transferfee = result.group(1)
                transferfee = transferfee.decode('utf8').encode('ascii', errors='ignore').replace("blse ",'').replace(",",'.')
                # Convert transfer fee to millions or thousands
                if 'Mill.' in transferfee:
                    num = transferfee.find(' Mill.')
                    transferfee = float(transferfee[:num])*1000000
                elif 'Th.' in transferfee:
                    num = transferfee.find(' Th.')
                    transferfee = float(transferfee[:num])*1000
            except:
                transferfee = float(0)

            # Extract contract until
            try:    
                result = re.search('class="zentriert">(.*)</td>', c[12])
                contractuntil = result.group(1)
                contractuntil = datetime.strptime(contractuntil, '%d.%m.%Y').date()
                contractuntil = str(contractuntil.year) + "-" + str(contractuntil.month) + "-" + str(contractuntil.day)
            except:
                contractuntil = np.NaN

            # Extract market value
            try:    
                result = re.search('hauptlink">(.*) \xe2\x82\xac\xc2\xa0', c[13])
                marketval = result.group(1).replace(",",'.')
                #marketval = transferfee.decode('utf8').encode('ascii', errors='ignore').replace("blse ",'').replace(",",'.')
                # Convert transfer fee to millions or thousands
                if 'Mill.' in marketval:
                    num = marketval.find(' Mill.')
                    marketval = float(marketval[:num])*1000000
                elif 'Th.' in marketval:
                    num = marketval.find(' Th.')
                    marketval = float(marketval[:num])*1000
            except:
                marketval = np.NaN

            # Extract previous market value
            try:    
                result = re.search('Previous market value: (.*) \xe2\x82\xac', c[13])
                prevmarketval = result.group(1).replace(",",'.')
                #marketval = transferfee.decode('utf8').encode('ascii', errors='ignore').replace("blse ",'').replace(",",'.')
                # Convert transfer fee to millions or thousands
                if 'Mill.' in prevmarketval:
                    num = prevmarketval.find(' Mill.')
                    prevmarketval = float(prevmarketval[:num])*1000000
                elif 'Th.' in prevmarketval:
                    num = prevmarketval.find(' Th.')
                    prevmarketval = float(prevmarketval[:num])*1000
            except:
                prevmarketval = np.NaN

            # Insert values (these are the same since it is current season)
            current_team = team
            current_teamid = teamid
        
        if col_nums > 12:
            
            # Extract current team
            try:    
                result = re.search('<img alt="(.*)" class="" src="', c[8])
                current_team = result.group(1)
            except:
                current_team = np.NaN

            # Extract current team id
            try:    
                result = re.search('id="(.*)"><img alt="', c[8])
                current_teamid = int(result.group(1))
            except:
                current_teamid = np.NaN

            # Extract height
            try:    
                result = re.search('class="zentriert">(.*) m</td>', c[9])
                height = float(result.group(1).replace(",","."))
            except:
                height = np.NaN

            # Extract foot
            try:    
                result = re.search('class="zentriert">(.*)</td>', c[10])
                foot = result.group(1)
                if foot == 'left' or foot == 'right' or foot == 'both':
                    foot = foot
                else:
                    foot = np.NaN
            except:
                foot = np.NaN

            # Extract team since
            try:    
                result = re.search('class="zentriert">(.*)</td>', c[11])
                teamsince = result.group(1)
                teamsince = datetime.strptime(teamsince, '%b %d, %Y').date()
                teamsince = str(teamsince.year) + "-" + str(teamsince.month) + "-" + str(teamsince.day)
            except:
                teamsince = np.NaN

            # Extract previous team
            try:    
                result = re.search('<img alt="(.*)" class="" src="', c[12])
                prevteam = result.group(1)
            except:
                prevteam = np.NaN

            # Extract previous team-id
            try:    
                result = re.search('id="(.*)"><img alt="', c[12])
                prevteamid = int(result.group(1))
            except:
                prevteamid = np.NaN

            # Extract transfer fee
            try:    
                result = re.search('title=": A(.*) \xe2\x82', c[12])
                transferfee = result.group(1)
                transferfee = transferfee.decode('utf8').encode('ascii', errors='ignore').replace("blse ",'').replace(",",'.')
                # Convert transfer fee to millions or thousands
                if 'Mill.' in transferfee:
                    num = transferfee.find(' Mill.')
                    transferfee = float(transferfee[:num])*1000000
                elif 'Th.' in transferfee:
                    num = transferfee.find(' Th.')
                    transferfee = float(transferfee[:num])*1000
            except:
                transferfee = float(0)

            # Extract contract until
            try:    
                result = re.search('class="zentriert">(.*)</td>', c[13])
                contractuntil = result.group(1)
                contractuntil = datetime.strptime(contractuntil, '%d.%m.%Y').date()
                contractuntil = str(contractuntil.year) + "-" + str(contractuntil.month) + "-" + str(contractuntil.day)
            except:
                contractuntil = np.NaN

            # Extract market value
            try:    
                result = re.search('hauptlink">(.*) \xe2\x82\xac\xc2\xa0', c[14])
                marketval = result.group(1).replace(",",'.')
                #marketval = transferfee.decode('utf8').encode('ascii', errors='ignore').replace("blse ",'').replace(",",'.')
                # Convert transfer fee to millions or thousands
                if 'Mill.' in marketval:
                    num = marketval.find(' Mill.')
                    marketval = float(marketval[:num])*1000000
                elif 'Th.' in marketval:
                    num = marketval.find(' Th.')
                    marketval = float(marketval[:num])*1000
            except:
                marketval = np.NaN

            # Extract previous market value
            try:    
                result = re.search('Vorheriger Marktwert: (.*) \xe2\x82\xac', c[14])
                prevmarketval = result.group(1).replace(",",'.')
                #marketval = transferfee.decode('utf8').encode('ascii', errors='ignore').replace("blse ",'').replace(",",'.')
                # Convert transfer fee to millions or thousands
                if 'Mill.' in prevmarketval:
                    num = prevmarketval.find(' Mill.')
                    prevmarketval = float(prevmarketval[:num])*1000000
                elif 'Th.' in prevmarketval:
                    num = prevmarketval.find(' Th.')
                    prevmarketval = float(prevmarketval[:num])*1000
            except:
                prevmarketval = np.NaN
        
        # Load player into Market Values dataframe
        df_marketvals.loc[len(df_marketvals)] = [teamid, number, name, pid, main_pos, position, dob, age, nat1,
                                                 nat2, height, foot, teamsince, prevteam, prevteamid,
                                                 transferfee, contractuntil, marketval, prevmarketval, 
                                                 team, season, teamurl, year, current_team, current_teamid, 
                                                 country]
    
    # Update the column types
    df_marketvals['number'] = df_marketvals['number'].astype(float)
    df_marketvals['age'] = df_marketvals['age'].astype(float)
    df_marketvals['pid'] = df_marketvals['pid'].astype(float)
    df_marketvals['year'] = df_marketvals['year'].astype(float)
    df_marketvals['prevteamid'] = df_marketvals['prevteamid'].astype(float)
    df_marketvals['teamid'] = df_marketvals['teamid'].astype(float)
    
    df_marketvals.to_sql(name = 'market_vals', con = connection, if_exists = 'append', index = False)
    

## Scrape player data from team pages

In [9]:
count = 0

for url in urllist:
    converttodf(url)
    count += 1
    if count % 10 == 0:
        print("{} urls scraped. Most recent: {}".format(count, url))

10 urls scraped. Most recent: https://www.transfermarkt.com/ankaraspor/kader/verein/2944/plus/1/galerie/0?saison_id=2015
20 urls scraped. Most recent: https://www.transfermarkt.com/fenerbahce-istanbul/kader/verein/36/plus/1/galerie/0?saison_id=2016
30 urls scraped. Most recent: https://www.transfermarkt.com/kasimpasa/kader/verein/10484/plus/1/galerie/0?saison_id=2016
40 urls scraped. Most recent: https://www.transfermarkt.com/istanbul-buyuksehir-belediyespor/kader/verein/6890/plus/1/galerie/0?saison_id=2017
50 urls scraped. Most recent: https://www.transfermarkt.com/sivasspor/kader/verein/2381/plus/1/galerie/0?saison_id=2017
60 urls scraped. Most recent: https://www.transfermarkt.com/vitesse-arnheim/kader/verein/499/plus/1/galerie/0?saison_id=2015
70 urls scraped. Most recent: https://www.transfermarkt.com/heracles-almelo/kader/verein/1304/plus/1/galerie/0?saison_id=2015
80 urls scraped. Most recent: https://www.transfermarkt.com/sc-heerenveen/kader/verein/306/plus/1/galerie/0?saison_i

### Testing code

In [708]:
url = "https://www.transfermarkt.com/wolverhampton-wanderers/kader/verein/543/plus/1/galerie/0?saison_id=2010"
r = requests.get(url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36"})
soup = BeautifulSoup(r.content,"html.parser")


In [722]:
country = soup.find_all('form')
country = str(country)
country = country.split("<form")
result = re.search('<option selected="selected" value="(.*)</option>', country[3])
country = result.group(1)
result = re.search('>(.*)', country)
country = result.group(1)
country

'England'

## Hurdles overcome

Convert date time fields to SQL string compatable:
https://stackoverflow.com/questions/17227110/what-is-the-best-way-to-save-datetime-type-in-sqlite
    
Insert a check for whether table header includes 'current club' and have split of two different functions

Change all of the references to INT to FLOAT in the first function

Insert new DataFrame headings for current club and current club ID

Update scrape team list for more seasons and leagues

### Code that causes a conflict when uploading table to SQLite

In [None]:
df_marketvals['dob'] = df_marketvals['dob'].astype('datetime64[ns]')

### Completed to do list items

Convert date time fields to SQL string compatable:
https://stackoverflow.com/questions/17227110/what-is-the-best-way-to-save-datetime-type-in-sqlite
    
Insert a check for whether table header includes 'current club' and have split of two different functions

Change all of the references to INT to FLOAT in the first function???

Insert new DataFrame headings for current club and current club ID
Check why Branko Jelic is not working in this: "https://www.transfermarkt.com/perth-glory/kader/verein/970/plus/1/galerie/0?saison_id=2009"

In [None]:
# Test dataframe for upload
    
    #columns = df_marketvals.columns
    
    #df_test = df_marketvals[columns[:32]]

# CREATE TABLE
# IF NOT EXISTS market_vals (
#  pid integer PRIMARY KEY,
# );

In [None]:
urllist = ["https://www.transfermarkt.com/arsenal-fc/kader/verein/11/plus/1/galerie/0?saison_id=2017"]

### League list

In [None]:
leagues = ['https://www.transfermarkt.com/a-league/startseite/wettbewerb/AUS1',
           'https://www.transfermarkt.com/premier-league/startseite/wettbewerb/GB1',
           'https://www.transfermarkt.com/super-lig/startseite/wettbewerb/TR1', 
           'https://www.transfermarkt.com/eredivisie/startseite/wettbewerb/NL1', 
           'https://www.transfermarkt.com/major-league-soccer/startseite/wettbewerb/MLS1',
           'https://www.transfermarkt.com/ligue-1/startseite/wettbewerb/FR1',
           'https://www.transfermarkt.com/laliga/startseite/wettbewerb/ES1', 
           'https://www.transfermarkt.com/serie-a/startseite/wettbewerb/IT1',
           'https://www.transfermarkt.com/1-bundesliga/startseite/wettbewerb/L1']