<a href="https://colab.research.google.com/github/simodepth/Core-Web-Vitals/blob/main/Site_Speed_and_Core_Web_Vitals_Extraction_for_a_Single_URL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Scrape Core Web Vitals and Page Experience indicators 


---

We are going to scrape all the page experience indicators with the PSI API and ultimately embed the findings onto a new data frame which we will generate with Pandas

🙏🏻 Credits to **@Daniel Heredia Mejias**

In [79]:
import urllib.request, json
import numpy as np
import pandas as pd
import seaborn as sns


In [80]:
url = input()

https://www.boucheron.com/fr_fr/


In [81]:
device = "mobile"

url = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed?url=" + url + "&strategy=" + device + "&locale=en&key=AIzaSyBlGR605WFWTs7i5OJJ9NGthhl7VloZ48c"
#Note that you can insert your URL with the parameter URL and you can also modify the device parameter if you would like to get the data for desktop.
 
try:
    response = urllib.request.urlopen(url) # Recommended to use API Key
except Exception as e:
    print(e)

data = json.loads(response.read())




In [82]:
#@title Embed Core Web Vitals into a Data Frame
fcp = data["loadingExperience"]["metrics"]["FIRST_CONTENTFUL_PAINT_MS"]["percentile"] #into seconds (/1000)
lcp = data["loadingExperience"]["metrics"]["LARGEST_CONTENTFUL_PAINT_MS"]["percentile"]/100
cls = data["loadingExperience"]["metrics"]["CUMULATIVE_LAYOUT_SHIFT_SCORE"]["percentile"]/100
fid = data["loadingExperience"]["metrics"]["FIRST_INPUT_DELAY_MS"]["percentile"] #into seconds (/1000)

fcp_score = data["loadingExperience"]["metrics"]["FIRST_CONTENTFUL_PAINT_MS"]["category"]
fid_score = data["loadingExperience"]["metrics"]["FIRST_INPUT_DELAY_MS"]["category"]
lcp_score = data["loadingExperience"]["metrics"]["LARGEST_CONTENTFUL_PAINT_MS"]["category"]
cls_score = data["loadingExperience"]["metrics"]["CUMULATIVE_LAYOUT_SHIFT_SCORE"]["category"]


df = pd.DataFrame(columns = ['fcp','lcp','cls','fid'])
df.loc['metrics'] = [fcp,lcp,cls,fid] 
df.loc['scores'] = [fcp_score,fid_score,lcp_score,cls_score]
df.round(decimals = 1)
df

# To receive the same outputs from PSI, don't forget to move the comma AFTER THE FIRST DECIMAL
# e.g fcp = 1.4


Unnamed: 0,fcp,lcp,cls,fid
metrics,1390.0,26.25,0.24,1595.0
scores,FAST,SLOW,AVERAGE,AVERAGE


In [83]:
#@title Download the Rounded Output
df.to_csv('core_web_vitals_df.csv')

In [84]:
#@title Adding a bit of Conditional Formatting

def highlight_rows(row):
    value = row.loc['scores']
    if value == 'SLOW':
        color = '#FFB3BA' # Red
    elif value == 'FAST':
        color = '#BAFFC9' # Green
    else:
        color = '#FFE8B3' # Orange
    return ['background-color: {}'.format(color) for r in row]

df.style.apply(highlight_rows, axis=0)


Unnamed: 0,fcp,lcp,cls,fid
metrics,1390.000000,26.250000,0.240000,1595.000000
scores,FAST,SLOW,AVERAGE,AVERAGE


In [72]:
#@title Download the Formatted Output
df.style.apply(highlight_rows, axis=0)\
        .to_excel('styled_df.xlsx', engine='openpyxl')

In [58]:
#@title Get the Overall Core Web Vitals Score for the Audited Site
#⚠️don't forget to paste the site URL in the loc⚠️

overall_score = data["lighthouseResult"]["categories"]["performance"]["score"] * 100
df = pd.DataFrame(columns = ['overall_score'])
df.loc['https://www.boucheron.com/fr_fr/'] = overall_score 

#blocking time
blocking_time_score = data["lighthouseResult"]["audits"]["total-blocking-time"]["score"]
blocking_time_duration = data["lighthouseResult"]["audits"]["total-blocking-time"]["displayValue"]

#first meaningful paint
fmp_score = data["lighthouseResult"]["audits"]["first-meaningful-paint"]["score"]
fmp = data["lighthouseResult"]["audits"]["first-meaningful-paint"]["displayValue"]

#Long Tasks
total_tasks = data["lighthouseResult"]["audits"]["diagnostics"]["details"]["items"][0]["numTasks"]
total_tasks_time = data["lighthouseResult"]["audits"]["diagnostics"]["details"]["items"][0]["totalTaskTime"]
long_tasks = data["lighthouseResult"]["audits"]["diagnostics"]["details"]["items"][0]["numTasksOver50ms"]

#Mainthread Work
mainthread_score = data["lighthouseResult"]["audits"]["mainthread-work-breakdown"]["score"]
mainthread_duration = data["lighthouseResult"]["audits"]["mainthread-work-breakdown"]["displayValue"]
 
#Iteration to get each process duration
listprocesses = []
for x in range (len(data["lighthouseResult"]["audits"]["mainthread-work-breakdown"]["details"]["items"])):
    duration = data["lighthouseResult"]["audits"]["mainthread-work-breakdown"]["details"]["items"][x]["duration"]
    process = data["lighthouseResult"]["audits"]["mainthread-work-breakdown"]["details"]["items"][x]["groupLabel"]
    list1 = [duration,process]
    listprocesses.append(list1)

#Render-Blocking Resources
blocking_resources_score = data["lighthouseResult"]["audits"]["render-blocking-resources"]["score"]
blocking_resoures_savings = data["lighthouseResult"]["audits"]["render-blocking-resources"]["displayValue"]
 
listblockingresources = []
for x in range (len(data["lighthouseResult"]["audits"]["render-blocking-resources"]["details"]["items"])):
    url = data["lighthouseResult"]["audits"]["render-blocking-resources"]["details"]["items"][x]["url"]
    totalbytes = data["lighthouseResult"]["audits"]["render-blocking-resources"]["details"]["items"][x]["totalBytes"]
    wastedbytes = data["lighthouseResult"]["audits"]["render-blocking-resources"]["details"]["items"][x]["wastedMs"]
    list1 = [url, totalbytes, wastedbytes]
    listblockingresources.append(list1)

#unused javascript
unused_js_score = data["lighthouseResult"]["audits"]["unused-javascript"]["score"]
unused_js_savings = data["lighthouseResult"]["audits"]["unused-javascript"]["displayValue"]
 
listunusedjavascript = []
for x in range (len(data["lighthouseResult"]["audits"]["unused-javascript"]["details"]["items"])):
    url = data["lighthouseResult"]["audits"]["unused-javascript"]["details"]["items"][x]["url"]
    totalbytes = data["lighthouseResult"]["audits"]["unused-javascript"]["details"]["items"][x]["totalBytes"]
    wastedbytes = data["lighthouseResult"]["audits"]["unused-javascript"]["details"]["items"][x]["wastedBytes"]
    wastedpercentage= data["lighthouseResult"]["audits"]["unused-javascript"]["details"]["items"][x]["wastedPercent"]
    list1 = [url, totalbytes, wastedbytes, wastedpercentage]
    listunusedjavascript.append(list1)

#Rel Preconnect
rel_preconnect_score = data["lighthouseResult"]["audits"]["uses-rel-preconnect"]["score"]
rel_preconnect_warning = data["lighthouseResult"]["audits"]["uses-rel-preconnect"]["warnings"]

# List of Modern Images
modern_images_score = data["lighthouseResult"]["audits"]["modern-image-formats"]["score"]
modern_images_savings = data["lighthouseResult"]["audits"]["modern-image-formats"]["displayValue"]
 
listmodernimages = []
for x in range (len(data["lighthouseResult"]["audits"]["modern-image-formats"]["details"]["items"])):
    url = data["lighthouseResult"]["audits"]["modern-image-formats"]["details"]["items"][x]["url"]
    wastedbytes = data["lighthouseResult"]["audits"]["modern-image-formats"]["details"]["items"][x]["wastedBytes"]
    totalbytes = data["lighthouseResult"]["audits"]["modern-image-formats"]["details"]["items"][x]["totalBytes"]
    list1 = [url, wastedbytes, totalbytes]
    listmodernimages.append(list1)

#Cache Memory Check
cache_memory_score = data["lighthouseResult"]["audits"]["uses-long-cache-ttl"]["score"]
resources_to_cache = data["lighthouseResult"]["audits"]["uses-long-cache-ttl"]["displayValue"]
 
listcache = []
for x in range (len(data["lighthouseResult"]["audits"]["uses-long-cache-ttl"]["details"]["items"])):
    cachelifetime = data["lighthouseResult"]["audits"]["uses-long-cache-ttl"]["details"]["items"][x]["cacheLifetimeMs"]
    totalbytes = data["lighthouseResult"]["audits"]["uses-long-cache-ttl"]["details"]["items"][x]["totalBytes"]
    wastedbytes = data["lighthouseResult"]["audits"]["uses-long-cache-ttl"]["details"]["items"][x]["wastedBytes"]
    url = data["lighthouseResult"]["audits"]["uses-long-cache-ttl"]["details"]["items"][x]["url"]
    list1 = [cachelifetime, totalbytes, wastedbytes, url]
    listcache.append(list1)

#How many Layout Shifts
layout_shift_elements = data["lighthouseResult"]["audits"]["layout-shift-elements"]["displayValue"]
 
listpath_selector = []
for x in range (len(data["lighthouseResult"]["audits"]["layout-shift-elements"]["details"]["items"])):
    path = data["lighthouseResult"]["audits"]["layout-shift-elements"]["details"]["items"][x]["node"]["path"]
    selector = data["lighthouseResult"]["audits"]["layout-shift-elements"]["details"]["items"][x]["node"]["selector"]
    list1 = [path, selector]
    listpath_selector.append(list1)

#How many LCPs
lcp_elements = data["lighthouseResult"]["audits"]["largest-contentful-paint-element"]["displayValue"]
 
listpath_selector = []
for x in range (len(data["lighthouseResult"]["audits"]["largest-contentful-paint-element"]["details"]["items"])):
    path = data["lighthouseResult"]["audits"]["largest-contentful-paint-element"]["details"]["items"][x]["node"]["path"]
    selector = data["lighthouseResult"]["audits"]["largest-contentful-paint-element"]["details"]["items"][x]["node"]["selector"]
    list1 = [path, selector]
    listpath_selector.append(list1)



In [76]:
#@title Site Speed Output in a Data Frame

data = {'Metrics': ['fcp','lcp','cls','fid', 'blocking_time_duration', 'fmp','overall_score','total_tasks','total_tasks_time','long_tasks','mainthread_duration','blocking_resoures_savings ','unused_js_savings','rel_preconnect_warning','modern_images_savings','resources_to_cache','layout_shift_elements','lcp_elements'],
        'Values': [fcp, lcp, cls, fid, blocking_time_duration, fmp, overall_score, total_tasks,total_tasks_time,long_tasks, mainthread_duration, blocking_resoures_savings,unused_js_savings,rel_preconnect_warning, modern_images_savings, resources_to_cache,layout_shift_elements,lcp_elements]
        }

df = pd.DataFrame(data, columns= ['Metrics', 'Values'])
df.head(18).style.set_table_styles(
[{'selector': 'th',
  'props': [('background', '#7CAE00'), 
            ('color', 'white'),
            ('font-family', 'verdana')]},
 
 {'selector': 'td',
  'props': [('font-family', 'verdana')]},

 {'selector': 'tr:nth-of-type(odd)',
  'props': [('background', '#DCDCDC')]}, 
 
 {'selector': 'tr:nth-of-type(even)',
  'props': [('background', 'white')]},
 
]
).hide_index()

Metrics,Values
fcp,1390
lcp,26.250000
cls,0.240000
fid,1595
blocking_time_duration,"27,680 ms"
fmp,2.7 s
overall_score,22.000000
total_tasks,24368
total_tasks_time,22593.520000
long_tasks,38


#⚠️ ALERT


---


##fcp, lcp, cls, fid ====> move the comma to the first decimal to resemble the output to Page Speed Insights!

In [None]:
#@title Download the Output
df.to_excel(r'iCloud Drive\Scrivania\PSI.xlsx', index = True, header=True)