In [1]:
import re
import requests

import numpy as np
import pandas as pd

from bs4 import BeautifulSoup

## basic info from wikipedia

In [2]:
url = 'https://en.wikipedia.org/wiki/Chemical_element'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'lxml')

In [3]:
soup

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
<head>
<meta charset="utf-8"/>
<title>Chemical element - Wikipedia</title>
<script>document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgMonthNamesShort":["","Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],"wgRequestId":"XdwzewpAAEIAAHI5dG0AAAEY","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"Chemical_element","wgTitle":"Chemical element","wgCurRevisionId":926499711,"wgRevisionId":926499711,"wgArticleId":5659,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["CS1: Julian–Gregorian uncertainty","Wikipedia indefinitely semi-protected page

In [4]:
elements_raw = soup.find_all('table', {'class': 'wikitable sortable collapsible'})[0]
elements_raw

<table class="wikitable sortable collapsible">
<tbody><tr>
<th colspan="13">List of chemical elements
</th></tr>
<tr style="vertical-align:top">
<th><a href="/wiki/Atomic_number" title="Atomic number"><i>Z</i></a><sup class="reference" id="cite_ref-fn10_43-0"><a href="#cite_note-fn10-43">[I]</a></sup>
</th>
<th><a href="/wiki/Symbol_(chemistry)" title="Symbol (chemistry)">Symbol</a>
</th>
<th><a class="mw-selflink selflink">Element</a>
</th>
<th>Origin of name<sup class="reference" id="cite_ref-44"><a href="#cite_note-44">[43]</a></sup><sup class="reference" id="cite_ref-45"><a href="#cite_note-45">[44]</a></sup>
</th>
<th><a href="/wiki/Group_(periodic_table)" title="Group (periodic table)">Group</a>
</th>
<th><a href="/wiki/Period_(periodic_table)" title="Period (periodic table)">Period</a>
</th>
<th><a class="mw-redirect" href="/wiki/Atomic_weight" title="Atomic weight">Atomic weight</a><sup class="reference" id="cite_ref-46"><a href="#cite_note-46">[45]</a></sup><sup class="referen

In [5]:
def list_from_wikitable(item, start, cell, tup=False):
    temp_list = [np.NaN] * 118
    for i, row in enumerate(item.find_all('tr')[start:(start + 118)]):
        value = row.find_all('td')[cell].text
        if value != 'no data':
            temp_list[i] = value
        else:
            temp_list[i] = np.NaN
    if tup:
        temp_list = tuple(temp_list)
    return temp_list

In [20]:
symbols = list_from_wikitable(elements_raw, 4, 1, tup=True)
symbols

('H',
 'He',
 'Li',
 'Be',
 'B',
 'C',
 'N',
 'O',
 'F',
 'Ne',
 'Na',
 'Mg',
 'Al',
 'Si',
 'P',
 'S',
 'Cl',
 'Ar',
 'K',
 'Ca',
 'Sc',
 'Ti',
 'V',
 'Cr',
 'Mn',
 'Fe',
 'Co',
 'Ni',
 'Cu',
 'Zn',
 'Ga',
 'Ge',
 'As',
 'Se',
 'Br',
 'Kr',
 'Rb',
 'Sr',
 'Y',
 'Zr',
 'Nb',
 'Mo',
 'Tc',
 'Ru',
 'Rh',
 'Pd',
 'Ag',
 'Cd',
 'In',
 'Sn',
 'Sb',
 'Te',
 'I',
 'Xe',
 'Cs',
 'Ba',
 'La',
 'Ce',
 'Pr',
 'Nd',
 'Pm',
 'Sm',
 'Eu',
 'Gd',
 'Tb',
 'Dy',
 'Ho',
 'Er',
 'Tm',
 'Yb',
 'Lu',
 'Hf',
 'Ta',
 'W',
 'Re',
 'Os',
 'Ir',
 'Pt',
 'Au',
 'Hg',
 'Tl',
 'Pb',
 'Bi',
 'Po',
 'At',
 'Rn',
 'Fr',
 'Ra',
 'Ac',
 'Th',
 'Pa',
 'U',
 'Np',
 'Pu',
 'Am',
 'Cm',
 'Bk',
 'Cf',
 'Es',
 'Fm',
 'Md',
 'No',
 'Lr',
 'Rf',
 'Db',
 'Sg',
 'Bh',
 'Hs',
 'Mt',
 'Ds',
 'Rg',
 'Cn',
 'Nh',
 'Fl',
 'Mc',
 'Lv',
 'Ts',
 'Og')

In [21]:
names = list_from_wikitable(elements_raw, 4, 2, tup=True)
names

('Hydrogen',
 'Helium',
 'Lithium',
 'Beryllium',
 'Boron',
 'Carbon',
 'Nitrogen',
 'Oxygen',
 'Fluorine',
 'Neon',
 'Sodium',
 'Magnesium',
 'Aluminium',
 'Silicon',
 'Phosphorus',
 'Sulfur',
 'Chlorine',
 'Argon',
 'Potassium',
 'Calcium',
 'Scandium',
 'Titanium',
 'Vanadium',
 'Chromium',
 'Manganese',
 'Iron',
 'Cobalt',
 'Nickel',
 'Copper',
 'Zinc',
 'Gallium',
 'Germanium',
 'Arsenic',
 'Selenium',
 'Bromine',
 'Krypton',
 'Rubidium',
 'Strontium',
 'Yttrium',
 'Zirconium',
 'Niobium',
 'Molybdenum',
 'Technetium',
 'Ruthenium',
 'Rhodium',
 'Palladium',
 'Silver',
 'Cadmium',
 'Indium',
 'Tin',
 'Antimony',
 'Tellurium',
 'Iodine',
 'Xenon',
 'Caesium',
 'Barium',
 'Lanthanum',
 'Cerium',
 'Praseodymium',
 'Neodymium',
 'Promethium',
 'Samarium',
 'Europium',
 'Gadolinium',
 'Terbium',
 'Dysprosium',
 'Holmium',
 'Erbium',
 'Thulium',
 'Ytterbium',
 'Lutetium',
 'Hafnium',
 'Tantalum',
 'Tungsten',
 'Rhenium',
 'Osmium',
 'Iridium',
 'Platinum',
 'Gold',
 'Mercury',
 'Thalliu

In [22]:
names_lower = tuple(n.lower() for n in names)
names_lower

('hydrogen',
 'helium',
 'lithium',
 'beryllium',
 'boron',
 'carbon',
 'nitrogen',
 'oxygen',
 'fluorine',
 'neon',
 'sodium',
 'magnesium',
 'aluminium',
 'silicon',
 'phosphorus',
 'sulfur',
 'chlorine',
 'argon',
 'potassium',
 'calcium',
 'scandium',
 'titanium',
 'vanadium',
 'chromium',
 'manganese',
 'iron',
 'cobalt',
 'nickel',
 'copper',
 'zinc',
 'gallium',
 'germanium',
 'arsenic',
 'selenium',
 'bromine',
 'krypton',
 'rubidium',
 'strontium',
 'yttrium',
 'zirconium',
 'niobium',
 'molybdenum',
 'technetium',
 'ruthenium',
 'rhodium',
 'palladium',
 'silver',
 'cadmium',
 'indium',
 'tin',
 'antimony',
 'tellurium',
 'iodine',
 'xenon',
 'caesium',
 'barium',
 'lanthanum',
 'cerium',
 'praseodymium',
 'neodymium',
 'promethium',
 'samarium',
 'europium',
 'gadolinium',
 'terbium',
 'dysprosium',
 'holmium',
 'erbium',
 'thulium',
 'ytterbium',
 'lutetium',
 'hafnium',
 'tantalum',
 'tungsten',
 'rhenium',
 'osmium',
 'iridium',
 'platinum',
 'gold',
 'mercury',
 'thalliu

In [23]:
names_origin = list_from_wikitable(elements_raw, 4, 3, tup=True)
names_origin

("Greek elements hydro- and -gen, meaning 'water-forming'",
 "Greek hḗlios, 'sun'",
 "Greek líthos, 'stone'",
 'beryl, a mineral (ultimately from the name of Belur in southern India)',
 'borax, a mineral (from Arabic bawraq)',
 "Latin carbo, 'coal'",
 "Greek nítron and -gen, meaning 'niter-forming'",
 "Greek oxy- and -gen, meaning 'acid-forming'",
 "Latin fluere, 'to flow'",
 "Greek néon, 'new'",
 "English soda (the symbol Na is derived from New Latin natrium, coined from German Natron, 'natron')",
 'Magnesia, a district of Eastern Thessaly in Greece',
 "alumina, from Latin alumen (gen. aluminis), 'bitter salt, alum'",
 "Latin silex, 'flint' (originally silicium)",
 "Greek phōsphóros, 'light-bearing'",
 "Latin sulphur, 'brimstone'",
 "Greek chlōrós, 'greenish yellow'",
 "Greek argós, 'idle' (because of its inertness)",
 "New Latin potassa, 'potash' (the symbol K is derived from Latin kalium)",
 "Latin calx, 'lime'",
 "Latin Scandia, 'Scandinavia'",
 'Titans, the sons of the Earth godde

In [32]:
def _get_groups():
    for g in list_from_wikitable(elements_raw, 4, 4):
        if g.isnumeric():
            yield int(g)
        else:
            yield np.nan

In [33]:
groups = tuple(_get_groups())
groups

(1,
 18,
 1,
 2,
 13,
 14,
 15,
 16,
 17,
 18,
 1,
 2,
 13,
 14,
 15,
 16,
 17,
 18,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 1,
 2,
 3,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 1,
 2,
 3,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18)

In [37]:
cas = {
    1: (1, 'A'),
    2: (2, 'A'),
    3: (3, 'B'),
    4: (4, 'B'),
    5: (5, 'B'),
    6: (6, 'B'),
    7: (7, 'B'),
    8: (8, 'B'),
    9: (8, 'B'),
    10: (8, 'B'),
    11: (1, 'B'),
    12: (2, 'B'),
    13: (3, 'A'),
    14: (4, 'A'),
    15: (5, 'A'),
    16: (6, 'A'),
    17: (7, 'A'),
    18: (8, 'A')
}

In [48]:
groups_cas = []

for g in groups:
    if g > 0:
        groups_cas.append(cas[int(g)])
    else:
        groups_cas.append(np.nan)

groups_cas

[(1, 'A'),
 (8, 'A'),
 (1, 'A'),
 (2, 'A'),
 (3, 'A'),
 (4, 'A'),
 (5, 'A'),
 (6, 'A'),
 (7, 'A'),
 (8, 'A'),
 (1, 'A'),
 (2, 'A'),
 (3, 'A'),
 (4, 'A'),
 (5, 'A'),
 (6, 'A'),
 (7, 'A'),
 (8, 'A'),
 (1, 'A'),
 (2, 'A'),
 (3, 'B'),
 (4, 'B'),
 (5, 'B'),
 (6, 'B'),
 (7, 'B'),
 (8, 'B'),
 (8, 'B'),
 (8, 'B'),
 (1, 'B'),
 (2, 'B'),
 (3, 'A'),
 (4, 'A'),
 (5, 'A'),
 (6, 'A'),
 (7, 'A'),
 (8, 'A'),
 (1, 'A'),
 (2, 'A'),
 (3, 'B'),
 (4, 'B'),
 (5, 'B'),
 (6, 'B'),
 (7, 'B'),
 (8, 'B'),
 (8, 'B'),
 (8, 'B'),
 (1, 'B'),
 (2, 'B'),
 (3, 'A'),
 (4, 'A'),
 (5, 'A'),
 (6, 'A'),
 (7, 'A'),
 (8, 'A'),
 (1, 'A'),
 (2, 'A'),
 (3, 'B'),
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 (4, 'B'),
 (5, 'B'),
 (6, 'B'),
 (7, 'B'),
 (8, 'B'),
 (8, 'B'),
 (8, 'B'),
 (1, 'B'),
 (2, 'B'),
 (3, 'A'),
 (4, 'A'),
 (5, 'A'),
 (6, 'A'),
 (7, 'A'),
 (8, 'A'),
 (1, 'A'),
 (2, 'A'),
 (3, 'B'),
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 (4, 'B

In [34]:
def _get_periods():
    for p in list_from_wikitable(elements_raw, 4, 5):
        if p.isnumeric():
            yield int(p)
        else:
            yield np.NaN

In [35]:
periods = tuple(_get_periods())
periods

(1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7)

In [26]:
# adapted from chempy, sort of

def _get_relative_atomic_masses():
    for mass in list_from_wikitable(elements_raw, 4, 6):
        yield float(re.search('[0-9\.]+', mass).group(0))
        
relative_atomic_masses = tuple(_get_relative_atomic_masses())
relative_atomic_masses

(1.008,
 4.002602,
 6.94,
 9.0121831,
 10.81,
 12.011,
 14.007,
 15.999,
 18.998403163,
 20.1797,
 22.98976928,
 24.305,
 26.9815384,
 28.085,
 30.973761998,
 32.06,
 35.45,
 39.948,
 39.0983,
 40.078,
 44.955908,
 47.867,
 50.9415,
 51.9961,
 54.938043,
 55.845,
 58.933194,
 58.6934,
 63.546,
 65.38,
 69.723,
 72.63,
 74.921595,
 78.971,
 79.904,
 83.798,
 85.4678,
 87.62,
 88.90584,
 91.224,
 92.90637,
 95.95,
 98.0,
 101.07,
 102.90549,
 106.42,
 107.8682,
 112.414,
 114.818,
 118.71,
 121.76,
 127.6,
 126.90447,
 131.293,
 132.90545196,
 137.327,
 138.90547,
 140.116,
 140.90766,
 144.242,
 145.0,
 150.36,
 151.964,
 157.25,
 158.925354,
 162.5,
 164.930328,
 167.259,
 168.934218,
 173.045,
 174.9668,
 178.49,
 180.94788,
 183.84,
 186.207,
 190.23,
 192.217,
 195.084,
 196.96657,
 200.592,
 204.38,
 207.2,
 208.9804,
 209.0,
 210.0,
 222.0,
 223.0,
 226.0,
 227.0,
 232.0377,
 231.03588,
 238.02891,
 237.0,
 244.0,
 243.0,
 247.0,
 247.0,
 251.0,
 252.0,
 257.0,
 258.0,
 259.0,
 26

In [16]:
def atomic_number(name):
    try:
        return symbols.index(name) + 1
    except ValueError:
        return lower_names.index(name.lower()) + 1

In [27]:
electronegativities = list_from_wikitable(elements_raw, 4, 11, tup=False)
electronegativities = [np.NaN if en == '–' else float(en) for en in electronegativities]
electronegativities = tuple(electronegativities)
electronegativities

(2.2,
 nan,
 0.98,
 1.57,
 2.04,
 2.55,
 3.04,
 3.44,
 3.98,
 nan,
 0.93,
 1.31,
 1.61,
 1.9,
 2.19,
 2.58,
 3.16,
 nan,
 0.82,
 1.0,
 1.36,
 1.54,
 1.63,
 1.66,
 1.55,
 1.83,
 1.88,
 1.91,
 1.9,
 1.65,
 1.81,
 2.01,
 2.18,
 2.55,
 2.96,
 3.0,
 0.82,
 0.95,
 1.22,
 1.33,
 1.6,
 2.16,
 1.9,
 2.2,
 2.28,
 2.2,
 1.93,
 1.69,
 1.78,
 1.96,
 2.05,
 2.1,
 2.66,
 2.6,
 0.79,
 0.89,
 1.1,
 1.12,
 1.13,
 1.14,
 1.13,
 1.17,
 1.2,
 1.2,
 1.2,
 1.22,
 1.23,
 1.24,
 1.25,
 1.1,
 1.27,
 1.3,
 1.5,
 2.36,
 1.9,
 2.2,
 2.2,
 2.28,
 2.54,
 2.0,
 1.62,
 1.87,
 2.02,
 2.0,
 2.2,
 2.2,
 0.7,
 0.9,
 1.1,
 1.3,
 1.5,
 1.38,
 1.36,
 1.28,
 1.13,
 1.28,
 1.3,
 1.3,
 1.3,
 1.3,
 1.3,
 1.3,
 1.3,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan)

## electron configuration

In [62]:
url2 = 'https://en.wikipedia.org/wiki/Electron_configurations_of_the_elements_(data_page)'
response2 = requests.get(url2)
soup2 = BeautifulSoup(response2.content, 'lxml')

In [63]:
soup2

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
<head>
<meta charset="utf-8"/>
<title>Electron configurations of the elements (data page) - Wikipedia</title>
<script>document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgMonthNamesShort":["","Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],"wgRequestId":"XddC@gpAIC4AAHv3LJwAAACJ","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"Electron_configurations_of_the_elements_(data_page)","wgTitle":"Electron configurations of the elements (data page)","wgCurRevisionId":925363089,"wgRevisionId":925363089,"wgArticleId":2264346,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserG

In [69]:
econfig = soup2.find('table', {'class': 'wikitable'})

In [90]:
print(econfig.find_all('th'))

[<th colspan="26">Legend
</th>, <th colspan="26">1 H <a href="/wiki/Hydrogen" title="Hydrogen">hydrogen</a> : 1s<sup>1</sup>
</th>, <th colspan="26">2 He <a href="/wiki/Helium" title="Helium">helium</a> : 1s<sup>2</sup>
</th>, <th colspan="26">3 Li <a href="/wiki/Lithium" title="Lithium">lithium</a> : [He] 2s<sup>1</sup>
</th>, <th colspan="26">4 Be <a href="/wiki/Beryllium" title="Beryllium">beryllium</a> : [He] 2s<sup>2</sup>
</th>, <th colspan="26">5 B <a href="/wiki/Boron" title="Boron">boron</a> : [He] 2s<sup>2</sup> 2p<sup>1</sup>
</th>, <th colspan="26">6 C <a href="/wiki/Carbon" title="Carbon">carbon</a> : [He] 2s<sup>2</sup> 2p<sup>2</sup>
</th>, <th colspan="26">7 N <a href="/wiki/Nitrogen" title="Nitrogen">nitrogen</a> : [He] 2s<sup>2</sup> 2p<sup>3</sup>
</th>, <th colspan="26">8 O <a href="/wiki/Oxygen" title="Oxygen">oxygen</a> : [He] 2s<sup>2</sup> 2p<sup>4</sup>
</th>, <th colspan="26">9 F <a href="/wiki/Fluorine" title="Fluorine">fluorine</a> : [He] 2s<sup>2</sup> 2p<s

In [None]:
def df_from_table(item):
    item_df = []
    columns = []
    for head in item.find('tr').find_all('th'):
        columns.append(re.sub('\n', '', head.text))
    for row in item.find_all('tr')[1:]:
        item_dict = {}
        for i, col in enumerate(row.find_all('td')):
            item_dict[columns[i]] = re.sub('\n', '', col.text)
        item_df.append(item_dict)
    return pd.DataFrame(item_df)
    

In [140]:
econfig_df = []
for row in econfig.find_all('th', {'colspan': 26})[1:]:
    row_dict = {}
    text = row.text
    row_dict['number'] = re.search('[0-9]+', text).group(0)
    row_dict['symbol'] = re.search('[A-Z][a-z]*', text).group(0)
    row_dict['name'] = row.find('a').attrs['title'].lower()
    row_dict['econfig'] = text.split(sep=':')[1].strip()
    econfig_df.append(row_dict)
econfig_df = pd.DataFrame(econfig_df)
econfig_df.head()

Unnamed: 0,number,symbol,name,econfig
0,1,H,hydrogen,1s1
1,2,He,helium,1s2
2,3,Li,lithium,[He] 2s1
3,4,Be,beryllium,[He] 2s2
4,5,B,boron,[He] 2s2 2p1


In [141]:
test_string = econfig_df.loc[4, 'econfig']
test_string

'[He] 2s2 2p1'

In [147]:
re.search('\].*', test_string).group(0)[2:]

'2s2 2p1'

In [148]:
re.search('\[\w{2}\]', test_string).group(0)

'[He]'

In [151]:
for i in econfig_df.index:
    if i < 2:
        pass
    else:
        econfig_df.loc[i, 'core'] = re.search('\[\w{2}\]', econfig_df.loc[i, 'econfig']).group(0)
        econfig_df.loc[i, 'valence'] = re.search('\].*', econfig_df.loc[i, 'econfig']).group(0)[2:]

In [152]:
econfig_df.head()

Unnamed: 0,number,symbol,name,econfig,core,valence
0,1,H,hydrogen,1s1,,
1,2,He,helium,1s2,,
2,3,Li,lithium,[He] 2s1,[He],2s1
3,4,Be,beryllium,[He] 2s2,[He],2s2
4,5,B,boron,[He] 2s2 2p1,[He],2s2 2p1


In [154]:
econfig_df.at[0, 'valence'] = '1s1'
econfig_df.at[1, 'valence'] = '1s2'

In [155]:
econfig_df.head()

Unnamed: 0,number,symbol,name,econfig,core,valence
0,1,H,hydrogen,1s1,,1s1
1,2,He,helium,1s2,,1s2
2,3,Li,lithium,[He] 2s1,[He],2s1
3,4,Be,beryllium,[He] 2s2,[He],2s2
4,5,B,boron,[He] 2s2 2p1,[He],2s2 2p1


In [163]:
econfig_df.tail()

Unnamed: 0,number,symbol,name,econfig,core,valence
167,168,Uho,unhexoctium,[Og] 5g18 6f14 7d10 8s2 8p2 9s2 9p2,[Og],5g18 6f14 7d10 8s2 8p2 9s2 9p2
168,169,Uhe,unhexennium,[Og] 5g18 6f14 7d10 8s2 8p3 9s2 9p2,[Og],5g18 6f14 7d10 8s2 8p3 9s2 9p2
169,170,Usn,unseptnilium,[Og] 5g18 6f14 7d10 8s2 8p4 9s2 9p2,[Og],5g18 6f14 7d10 8s2 8p4 9s2 9p2
170,171,Usu,unseptunium,[Og] 5g18 6f14 7d10 8s2 8p5 9s2 9p2,[Og],5g18 6f14 7d10 8s2 8p5 9s2 9p2
171,172,Usb,unseptbium,[Og] 5g18 6f14 7d10 8s2 8p6 9s2 9p2,[Og],5g18 6f14 7d10 8s2 8p6 9s2 9p2


In [166]:
econfig_df['number'] = econfig_df['number'].astype(int)

In [168]:
econfig_df = econfig_df[econfig_df['number'] <= 118]

In [None]:
url = 'https://en.wikipedia.org/wiki/Chemical_element'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'lxml')