### Scraping current supercharger data from Teslas website

In [1]:
# This uses the complete width of the screen with Jupyter
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from urllib.parse import urljoin,urlparse,parse_qs
import re
from datetime import datetime, timezone


In [None]:
# Get list of countries
url = 'https://www.tesla.com/findus/list'
response = requests.get(url)
soup = BeautifulSoup(response.content)
countries = []
for row in soup.find_all('section', {'class': 'row'}):
    for h2 in row.find_all('h2'):
        if 'Superchargers' in (h2.text):
            hrefs = row.find_all('a', href=True)
            for href in hrefs:
                country = href['href'].rsplit('/')[-1]
                countries.append(country)
countries = list(set(countries))
countries.sort()

In [None]:
def get_locations(url,country):
    locations = []
    response = requests.get(url)
    try:
        soup = BeautifulSoup(response.content, 'lxml')
        if soup:
            for address in soup.find_all('address', {'class': 'vcard'}):
                location = {}
                location['URL'] = urljoin(url ,address.find('a', href=True)['href'])
                try:
                    location['street_address'] = address.find('span', {'class': 'street-address'}).text.strip()
                except:
                    pass
                try:
                    location['locality'] = address.find('span', {'class': 'locality'}).text.strip()
                except:
                    pass
                locations.append(location)
    except:
        print("Fail on {}".format(country))
        pass
    return locations

In [None]:
locations = []
from tqdm.notebook import tqdm

for country in tqdm(set(countries)):
    url = 'https://www.tesla.com/findus/list/superchargers/'
    url = urljoin(url, country)
    locations = locations + get_locations(url, country)

In [None]:
df = pd.DataFrame(locations)

In [None]:
df = df.replace(to_replace ='\s+', value = ' ', regex = True)

In [None]:
df.describe()

In [None]:
df.to_parquet('df.'+datetime.utcnow().isoformat(sep='T',timespec='minutes')+'.parquet')

In [None]:
def get_info_from_url(url, index, debug=False):
    global df
    # Grab data from URL
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'lxml')
    # Do we have a "Driving Directions" entry ?
    hrefs = soup.find_all('a', href=True)
    coords = ''
    for href in hrefs:
        if href.text == 'Driving Directions':
            coords = urlparse(href['href']).query.split('=')[1]
    # If not, use grab the info from the map image
    if len(coords) == 0:
        div = soup.find('div', {'id': 'location-map'})
        if div:
            img_src = div.find('img')['src']
            if img_src:
                coords = parse_qs(urlparse(img_src).query)['center'][0]
    # Add ccordinate info to dataframe
    if coords:
        df.loc[index, 'latitude'] = coords.split(',')[0]
        df.loc[index, 'longitude'] = coords.split(',')[1]
    else:
        df.loc[index, 'latitude'] = ''
        df.loc[index, 'longitude'] = ''
    # Grab charging info from "Charging" line
    charging_text = soup.select_one('p:-soup-contains("Charging")')
    if charging_text:
        charging_text= charging_text.get_text(separator=" ")
        df.loc[index, 'charging_text'] = charging_text
        if debug:
            print(charging_text)
        number = re.search('(\d+).*Superchargers', charging_text)
        if number:
            number = number.group(1)
            df.loc[index, 'number'] = number
            if debug:
                print(number)
        kw = re.search('(\d+)kW', charging_text)
        if kw:
            kw = kw.group(1)        
            df.loc[index, 'kw'] = kw
            if debug:
                print(kw)        
    else:
        df.loc[index, 'number'] = ''
        df.loc[index, 'kw'] = ''
            
    # Grab open info
    open_text = soup.select_one('i:-soup-contains("Non-Tesla")')
    if open_text:
        df.loc[index, 'open'] = open_text.get_text(separator=" ")
        if debug:
            print(open_text)
    else:
        df.loc[index, 'open'] = ''


In [None]:
from tqdm.notebook import tqdm
import traceback
# loop over all locations, and grab extra info from URL
#for index,location in df.iterrows():
for index,location in tqdm(df.iterrows(), total=df.shape[0]):
    try:
        get_info_from_url(location['URL'],index)
    except Exception:
        print("Fail on ({}):{}".format(index,location['URL']))
        traceback.print_exc()
        break

In [None]:
df.to_parquet('df.filled.'+datetime.utcnow().isoformat(sep='T',timespec='minutes')+'.parquet')

In [None]:
df.describe()

In [None]:
from collections import defaultdict
charging_texts = df.charging_text.unique().tolist()
kw = defaultdict(int)
for charging_text in charging_texts:
    try:
        for match in re.findall('(\d+)kW',charging_text):
            kw[int(match)] += 1
    except:
        pass
kw = sorted(list(kw))

In [None]:
from tqdm.notebook import tqdm
import traceback
# loop over all locations, and grab extra info from URL
for index,location in tqdm(df.iterrows(), total=df.shape[0]):
    charging_text = location["charging_text"]
    try:
        if charging_text:
            counts = re.findall('(\d+)\s+Superchargers',charging_text)
            powers = re.findall('(\d+)kW',charging_text)
            for count, power in zip(counts,powers):
                df.loc[index, power] = count
    except:
        print(charging_text)
        break

In [None]:
df.head()