In [2]:
import pandas as pd
import numpy as np
import json
from pyecharts.charts import Bar, Pie
from pyecharts import options as opts
from pyecharts.globals import ThemeType
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
# http://allselenium.info/wait-for-elements-python-selenium-webdriver/
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
from urllib.parse import urlparse
# https://docs.python.org/3/library/dataclasses.html
from dataclasses import dataclass
# https://github.com/lidatong/dataclasses-json/stargazers
from dataclasses_json import dataclass_json
# https://github.com/konradhalas/dacite
from dacite import from_dict
# convert string to buffer, used in pd.read_csv
from io import StringIO
# https://github.com/ManrajGrover/halo
from halo import Halo
# https://realpython.com/python-logging/#the-logging-module
import logging
from time import sleep, time


In [3]:
# get intput data from csv string
inputData = '''
序号,学校名称,门户网站网址,主管部门,所在地,办学层次
1,云南大学,http://www.ynu.edu.cn/,云南省,昆明市,本科
2,昆明理工大学,http://www.kmust.edu.cn,云南省,昆明市,本科
3,云南农业大学,https://www.ynau.edu.cn/,云南省,昆明市,本科
4,西南林业大学,http://www.swfu.edu.cn/,云南省,昆明市,本科
5,昆明医科大学,http://www.kmmc.cn,云南省,昆明市,本科
6,大理大学,http://www.dali.edu.cn,云南省,大理白族自治州,本科
7,云南中医药大学,http://www.ynutcm.edu.cn/,云南省,昆明市,本科
8,云南师范大学,https://www.ynnu.edu.cn/,云南省,昆明市,本科
9,昭通学院,http://www.ztu.edu.cn/,云南省,昭通市,本科
10,曲靖师范学院,http://www.qjnu.edu.cn/,云南省,曲靖市,本科
11,普洱学院,http://www.peuni.cn/,云南省,普洱市,本科
12,保山学院,http://www.bsnc.cn/,云南省,保山市,本科
13,红河学院,http://www.uoh.edu.cn/,云南省,红河哈尼族彝族自治州,本科
14,云南财经大学,http://www.ynufe.edu.cn/,云南省,昆明市,本科
15,云南艺术学院,https://www.ynart.edu.cn/,云南省,昆明市,本科
16,云南民族大学,http://www.ynni.edu.cn/,云南省,昆明市,本科
17,玉溪师范学院,http://www.yxnu.edu.cn/,云南省,玉溪市,本科
18,楚雄师范学院,http://www.cxtc.edu.cn/,云南省,楚雄彝族自治州,本科
19,云南警官学院,https://www.ynpc.edu.cn/,云南省,昆明市,本科
20,昆明学院,http://www.kmu.edu.cn,云南省,昆明市,本科
21,文山学院,http://www.wsu.edu.cn/,云南省,文山壮族苗族自治州,本科
22,云南经济管理学院,https://www.ynjgy.com/,云南省教育厅,昆明市,本科
23,云南大学滇池学院,https://www.ynudcc.cn/,云南省教育厅,昆明市,本科
24,丽江旅游文化学院,http://www.lywhxy.com,云南省教育厅,丽江市,本科
25,昆明理工大学津桥学院,http://www.oxbridge.edu.cn/,云南省教育厅,昆明市,本科
26,云南师范大学商学院,http://www.ynnubs.com/,云南省教育厅,昆明市,本科
27,昆明文理学院（云南师范大学）,http://www.caskm.cn/,云南省教育厅,昆明市,本科
28,昆明医科大学海源学院,http://www.kyhyxy.com/,云南省教育厅,昆明市,本科
29,云南艺术学院文华学院,http://www.whxyart.cn/,云南省教育厅,昆明市,本科
30,云南工商学院,https://www.yngsxy.net/,云南省教育厅,昆明市,本科
31,滇西科技师范学院,http://www.wynu.edu.cn/,云南省,临沧市,本科
32,滇西应用技术大学,http://www.wyuas.edu.cn,云南省,大理市,本科
'''

# df=pd.read_csv('resources/云南省普通高等学校名单（本科）.csv')
df=pd.read_csv(StringIO(inputData))
siteInfo = []
for (name, url) in df[['学校名称','门户网站网址']].values:
    siteInfo.append({'name':name, 'url':url})
# only use first n for testing
# siteInfo = siteInfo[:2]

In [4]:
def getPingUrl(originalUrl):
    '''
    获取seo的网址
    '''
    hostname=urlparse(originalUrl).hostname
    return f'https://www.ping.cn/ping/{hostname}'

In [5]:

from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import WebDriverException

def _find_element(driver, by):
    """Looks up an element. Logs and re-raises ``WebDriverException``
    if thrown."""
    try:
        return driver.find_element(*by)
    except NoSuchElementException as e:
        raise e
    except WebDriverException as e:
        raise e
class texts_to_be_present_in_element(object):
    """ An expectation for checking if the given texts is present in the
    specified element.
    locator, texts
    """
    def __init__(self, locator, texts_):
        self.locator = locator
        self.texts = texts_

    def __call__(self, driver):
        try:
            element_text = _find_element(driver, self.locator).text
            return any([text == element_text for text in self.texts])
        except StaleElementReferenceException:
            return False

In [6]:
option = webdriver.ChromeOptions()
chrome_prefs = {}
option.experimental_options["prefs"] = chrome_prefs
chrome_prefs["profile.default_content_settings"] ={"images": 2}
chrome_prefs["profile.managed_default_content_settings"] = {"images": 2}

browser = webdriver.Chrome(options=option)

total = len(siteInfo)
for idx, siteInfoItem in enumerate(siteInfo):
    pingUrl = getPingUrl(siteInfoItem['url'])
    spinner = Halo(text=f'[{idx+1:02}/{total:02}] processing {pingUrl}', spinner='dots')
    spinner.start()
    browser.get(pingUrl)
    # wait some seconds for page load finish
    sleep(30)
    # wait for message span
    wait = WebDriverWait(browser, 120)
    wait.until(texts_to_be_present_in_element((By.CSS_SELECTOR, "div.main > div.mainMsg1 > span"), ['', '已检测结束']))

    # elem = browser.find_element_by_css_selector('div.page04d6 > div:nth-child(4) > p')
    elem = _find_element(browser, (By.CSS_SELECTOR, 'div.page04d6 > div:nth-child(4) > p'))
    pingAverage = elem.text
    siteInfoItem['pingAverage'] = pingAverage
    spinner.succeed(text=f'process {pingUrl} finished with {pingAverage}!')

browser.quit()

print(siteInfo)

v process https://www.ping.cn/ping/www.ynu.edu.cn finished with 66.175ms!
v process https://www.ping.cn/ping/www.kmust.edu.cn finished with 63.431ms!
v process https://www.ping.cn/ping/www.ynau.edu.cn finished with 0ms!
v process https://www.ping.cn/ping/www.swfu.edu.cn finished with 48.684ms!
v process https://www.ping.cn/ping/www.kmmc.cn finished with 50.374ms!
v process https://www.ping.cn/ping/www.dali.edu.cn finished with 75.008ms!
v process https://www.ping.cn/ping/www.ynutcm.edu.cn finished with 0ms!
v process https://www.ping.cn/ping/www.ynnu.edu.cn finished with 0ms!
v process https://www.ping.cn/ping/www.ztu.edu.cn finished with 0ms!
v process https://www.ping.cn/ping/www.qjnu.edu.cn finished with 56.415ms!
v process https://www.ping.cn/ping/www.peuni.cn finished with 57.403ms!
v process https://www.ping.cn/ping/www.bsnc.cn finished with 59.179ms!
v process https://www.ping.cn/ping/www.uoh.edu.cn finished with 0ms!
v process https://www.ping.cn/ping/www.ynufe.edu.cn finished 

In [7]:
browser = webdriver.Chrome()

total = len(siteInfo)
for idx, siteInfoItem in enumerate(siteInfo):
    url = siteInfoItem['url']
    spinner = Halo(text=f'[{idx+1:02}/{total:02}] processing {url}', spinner='dots')
    spinner.start()
    start = time()
    browser.get(url)
    end = time()
    transferSize = browser.execute_script('return window.performance.getEntriesByType("resource").reduce((total,item) => total + item.transferSize, 0)')
    transferCount = browser.execute_script('return window.performance.getEntriesByType("resource").length')
    loadTime = end - start
    title = browser.title
    siteInfoItem['loadTime'] = loadTime
    siteInfoItem['transferSize'] = transferSize
    siteInfoItem['transferCount'] = transferCount
    spinner.succeed(text=f'process {pingUrl} finished with loadTime: {loadTime}, transferSize: {transferSize}, transferCount: {transferCount}, title: {title}')

browser.quit()

print(siteInfo)

v process https://www.ping.cn/ping/www.wyuas.edu.cn finished with loadTime: 2.1581311225891113, transferSize: 5483520, transferCount: 55, title: 云南大学
v process https://www.ping.cn/ping/www.wyuas.edu.cn finished with loadTime: 4.233257055282593, transferSize: 11399338, transferCount: 60, title: 昆明理工大学
v process https://www.ping.cn/ping/www.wyuas.edu.cn finished with loadTime: 2.5715060234069824, transferSize: 5536048, transferCount: 67, title: 云南农业大学
v process https://www.ping.cn/ping/www.wyuas.edu.cn finished with loadTime: 4.138960838317871, transferSize: 4797642, transferCount: 28, title: 西南林业大学 – 西南林业大学
v process https://www.ping.cn/ping/www.wyuas.edu.cn finished with loadTime: 13.473953485488892, transferSize: 36842939, transferCount: 68, title: 昆明医科大学
v process https://www.ping.cn/ping/www.wyuas.edu.cn finished with loadTime: 2.4139199256896973, transferSize: 8198444, transferCount: 51, title: 大理大学
v process https://www.ping.cn/ping/www.wyuas.edu.cn finished with loadTime: 1.35613

In [8]:
xaxis = list(map(lambda item: item['name'], siteInfo))
yaxisPingAverage = list(map(lambda item: float(item['pingAverage'].replace('ms','')), siteInfo))

bar = (
    Bar({"theme": ThemeType.SHINE})
    .add_xaxis(xaxis)
    .add_yaxis("ping平均延时（ms）", yaxisPingAverage)
    .set_global_opts(
        title_opts=opts.TitleOpts(title="ping平均延时情况"),
        datazoom_opts=opts.DataZoomOpts(),
        xaxis_opts=opts.AxisOpts(
            type_="category",
            axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow"),
        ),
        yaxis_opts=opts.AxisOpts(
            min_=-10,
        ),)
)
bar.render_notebook()



In [9]:
xaxis = list(map(lambda item: item['name'], siteInfo))
yaxisLoadTime = list(map(lambda item: "{:.2f}".format(float(item['loadTime']) * 1000), siteInfo))

bar = (
    Bar({"theme": ThemeType.SHINE})
    .add_xaxis(xaxis)
    .add_yaxis("加载时间（ms）", yaxisLoadTime)
    .set_global_opts(
        title_opts=opts.TitleOpts(title="加载时间情况"),
        datazoom_opts=opts.DataZoomOpts(),
        xaxis_opts=opts.AxisOpts(
            type_="category",
            axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow"),
        ),
        yaxis_opts=opts.AxisOpts(
            min_=-10,
        ),)
)
bar.render_notebook()



In [10]:
xaxis = list(map(lambda item: item['name'], siteInfo))
yaxisTransferSize = list(map(lambda item: "{:.2f}".format(float(item['transferSize']) / 1024), siteInfo))

bar = (
    Bar({"theme": ThemeType.SHINE})
    .add_xaxis(xaxis)
    .add_yaxis("加载网络资源大小（KB）", yaxisTransferSize)
    .set_global_opts(
        title_opts=opts.TitleOpts(title="加载网络资源大小情况"),
        datazoom_opts=opts.DataZoomOpts(),
        xaxis_opts=opts.AxisOpts(
            type_="category",
            axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow"),
        ),
        yaxis_opts=opts.AxisOpts(
            min_=-10,
        ),)
)
bar.render_notebook()



In [11]:
xaxis = list(map(lambda item: item['name'], siteInfo))
yaxisTransferCount = list(map(lambda item: int(item['transferCount']), siteInfo))

bar = (
    Bar({"theme": ThemeType.SHINE})
    .add_xaxis(xaxis)
    .add_yaxis("加载网络资源数量", yaxisTransferCount)
    .set_global_opts(
        title_opts=opts.TitleOpts(title="加载网络资源数量情况"),
        datazoom_opts=opts.DataZoomOpts(),
        xaxis_opts=opts.AxisOpts(
            type_="category",
            axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow"),
        ),
        yaxis_opts=opts.AxisOpts(
            min_=-10,
        ),)
)
bar.render_notebook()



In [12]:
df['pingAverage(ms)'] = list(map(lambda item: item['pingAverage'] if item['pingAverage'] != '0ms' else '-', siteInfo))
df['loadTime(ms)'] = list(map(lambda item: "{:.2f}".format(float(item['loadTime']) * 1000), siteInfo))
df['transferSize(KB)'] = list(map(lambda item: "{:.2f}".format(float(item['transferSize']) / 1024), siteInfo))
df['transferCount'] = list(map(lambda item: int(item['transferCount']), siteInfo))

df.to_excel('resources/network_metric.xlsx',index=False)

