In [56]:
urls = [
    "https://www.thestar.com/",
    "https://www.theglobeandmail.com/",
    "https://www.cbc.ca/",
    "https://nationalpost.com/",
    "https://globalnews.ca/",
    "https://www.ctvnews.ca/",
    "https://www.blogto.com/",
    "https://www.cp24.com/",
    "https://www.sportsnet.ca/",
    "https://www.tsn.ca/"
]

In [65]:
import requests
import os
import json
import time
import pandas as pd
import matplotlib.pyplot as plt 
from dotenv import load_dotenv
load_dotenv()

class PageSpeedCollector:
    def __init__(self):
        self.api_key = os.getenv('API_KEY')
        self.data = []
    
    def request_pagespeed_data(self, urls):
        self.data = []
        for url in urls:
            time.sleep(1)
            api_url = f"https://www.googleapis.com/pagespeedonline/v5/runPagespeed?url={url}&key={self.api_key}&strategy=MOBILE"
            try:
                req = requests.get(api_url)
                res = req.json()
                scores = {
                    "Outlet": url,
                    "CLS": res["loadingExperience"]["metrics"]["CUMULATIVE_LAYOUT_SHIFT_SCORE"]["percentile"],
                    "FCP": res["loadingExperience"]["metrics"]["FIRST_CONTENTFUL_PAINT_MS"]["percentile"]/100,
                    "FID": res["loadingExperience"]["metrics"]["FIRST_INPUT_DELAY_MS"]["percentile"],
                    "LCP": res["loadingExperience"]["metrics"]["LARGEST_CONTENTFUL_PAINT_MS"]["percentile"]/100
                }
                self.data.append(scores)
                print(f"Successfully fetched {url}")
            except:
                print(f"Failed to fetch {url}")
        self.save_to_files()
        print("Complete. Access via thisobj.data")
    
    def save_to_files(self):
        with open('data.json', 'w') as outfile:
            json.dump(self.data, outfile)
        print(f"data.json updated")
        df = pd.DataFrame(self.data)
        df.to_csv("data.csv", index=False)
        print(f"data.csv updated")
    
    def load_dataframe_from_json(self):
        df = pd.read_json("data.json")
        return df
    
    def plot_data_to_chart(self):
        plot_data = [
            (i["Outlet"], i["CLS"], i["FCP"], i["FID"], i["LCP"]) for i in self.data
        ]
        df=pd.DataFrame(plot_data,columns=["Outlet", "CLS","FCP","FID","LCP"])
        df.plot(x="Outlet", y=["CLS","FCP","FID","LCP"], kind="bar",figsize=(9,8))
        plt.show()
    
       

In [66]:
test = PageSpeedCollector()
test.request_pagespeed_data(urls)

Successfully fetched https://www.thestar.com/
Successfully fetched https://www.theglobeandmail.com/
Successfully fetched https://www.cbc.ca/
Successfully fetched https://nationalpost.com/
Successfully fetched https://globalnews.ca/
Successfully fetched https://www.ctvnews.ca/
Successfully fetched https://www.blogto.com/
Successfully fetched https://www.cp24.com/
Successfully fetched https://www.sportsnet.ca/
Failed to fetch https://www.tsn.ca/
data.json updated
data.csv updated
Complete. Access via thisobj.data
