### Sample program for crawling  

#### Import libraries  

In [2]:
import re
import requests
from bs4 import BeautifulSoup
import pandas as pd

#### Parameters  

In [3]:
url = 'https://www.toyo.ac.jp/toyo2020/basic_data/'
csv_out = 'num_students_toyo.csv'

#### Crawling of web page  

In [4]:
s = requests.Session()
r = s.get(url)

print(r.status_code)
print(r.encoding)

200
UTF-8


In [5]:
# response body
print(r.text)  # str
#print(r.content)  # binary

<!DOCTYPE html>
<html lang="ja">
<head prefix="og: http://ogp.me/ns#" data-page="data">
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-3879787-1"></script>
<script>
  window.dataLayer = window.dataLayer || [];
  function gtag(){dataLayer.push(arguments);}
  gtag('js', new Date());

  gtag('config', 'UA-3879787-1');
</script>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="format-detection" content="telephone=no">
<link rel="canonical" href="https://www.toyo.ac.jp/toyo2020/basic_data/">
<title>基礎データ | 東洋大学はいま2020</title>
<meta name="description" content="在籍学生数をはじめとした、学校法人東洋大学の基礎データ">
<meta property="og:title" content="基礎データ | 東洋大学はいま2020">
<meta property="og:type" content="article">
<meta property="og:url" content="https://www.toyo.ac.jp/toyo2020/basic_data/">
<meta property="og:image" content="https://www.t

#### Scraping (parse web contents)  

In [6]:
bs = BeautifulSoup(r.text, 'html.parser')

#### Retrieve first table

In [7]:
table_all = bs.select('.data-table')
table = table_all[0]

#### Filter headings (columns)   

In [8]:
cols = []
for col in table.thead.find_all('th'):
    cols.append(col.text)

#### Filter data (rows)

In [9]:
rows = []
for tr in table.tbody.find_all('tr'):
    row = []
    for th in tr.find_all('th'):
        row.append(th.text)
    for td in tr.find_all('td'):
        row.append(td.text)
    rows.append(row)

#### Output to CSV  

In [10]:
df = pd.DataFrame(rows, columns=cols)
display(df)

df.to_csv(csv_out, index=False)

Unnamed: 0,学部,男性,女性,合計
0,文学部,1580,2019,3599
1,経済学部,1884,786,2670
2,経営学部,1853,1035,2888
3,法学部,1424,756,2180
4,社会学部,1307,1907,3214
5,国際地域学部（※1）,48,47,95
6,国際学部,591,936,1527
7,国際観光学部,403,1072,1475
8,情報連携学部,1213,336,1549
9,ライフデザイン学部,982,1323,2305
