# Mobile Metrics

Loads and saves webpagetest.org performance tests.

Needs a local installation of the webpagetest tools https://github.com/WPO-Foundation/webpagetest and a mobile device or a key for the webpagetest API.

Feel free to contact me for help.

© Paul Ronga under Apache-2 Licence (see LICENCE.txt).

In [1]:
import pandas as pd
import requests
from IPython.display import HTML
import json
import datetime

In [2]:
# Link to local PHP files
RUN_TEST_URL = 'http://rospo.local/~paul/webpagetest/run_mobile.php'
GET_TEST_URL = 'http://rospo.local/~paul/webpagetest/get.php?test_id={}'

In [3]:
# dataframe containing media id, name and URLs
medias = pd.read_csv('df/media_list.csv')

# media id as string
medias['media_id'] = medias['media_id'].apply(lambda x: str(x))

medias.head(2)

Unnamed: 0,media_id,Name,URL_short,URL,URL_mobile
0,19,La Tribune de Genève,tdg.ch,https://www.tdg.ch/,https://m.tdg.ch
1,20,24 heures,24heures.ch,https://www.24heures.ch,https://m.24heures.ch


# 1. Lauch tests

Lauch tests (average of 3 runs) for each media on a device monitored with the webpagetest system.

In [4]:
reports = {}

In [5]:
for i, row in medias.iterrows():
    url = row['URL_mobile']
    if url != url or url == 'x':
        url = row['URL']

    print('Running test for', url)

    payload = {'url': url}
    r = requests.post(RUN_TEST_URL, json=payload)
    
    print(r.text, end='\n\n')
    result = r.json()
    reports[row['media_id']] = result['id']

Running test for https://m.tdg.ch
{"id":"180624_R0_acb0748e369a8e0ad3675a7980151216", "url":"https://m.tdg.ch"}

Running test for https://m.24heures.ch
{"id":"180624_MW_4840f5590f3f2b11b266abf09c599300", "url":"https://m.24heures.ch"}

Running test for https://www.letemps.ch
{"id":"180624_FY_349af15c32f0aa31afa670d176e7abe8", "url":"https://www.letemps.ch"}

Running test for https://mobile.lemonde.fr
{"id":"180624_9T_e3652291d111468baeffd03c2228731b", "url":"https://mobile.lemonde.fr"}

Running test for https://www.rts.ch/info/
{"id":"180624_9S_e12c1ef201322f102c549df9bfe0280e", "url":"https://www.rts.ch/info/"}

Running test for https://m.20min.ch/ro/
{"id":"180624_ZV_70d506cf37c836194f5006d9564facde", "url":"https://m.20min.ch/ro/"}

Running test for https://m.lematin.ch
{"id":"180624_QN_e4d6a8bd2db53d998b53a82c7dc68d80", "url":"https://m.lematin.ch"}

Running test for https://www.mediapart.fr
{"id":"180624_K2_b3c8123bf323364e717e8fa1c41553a8", "url":"https://www.mediapart.fr"}

Runn

# 2. Get test results

Wait about a quarter of hour before trying to get these results.

In [6]:
df_speed = pd.DataFrame(columns=['Name', 'media_id', 'location', 'visualComplete', 'fullyLoaded', 'requests', 'report_url'])

In [13]:
for i, row in medias.iterrows():
    print('Getting results for', row['Name'], '...')
    
    if len(df_speed[df_speed['media_id'] == row['media_id']]) > 0:
        print('Result already fetched.')
    else:    
        r = requests.get(GET_TEST_URL.format( reports[row['media_id']] ))
        print(r)
        try:
            data = r.json()
        except ValueError as e:
            print(e)
            print(r.text)

        missing_key = False

        if 'data' in data.keys() and 'average' in data['data'].keys():    
            print('Successful first view runs:', data['data']['successfulFVRuns'])

            print('Location and device were:', data['data']['location'], 'with connectivity:', data['data']['connectivity'])
            
            for key in ['visualComplete', 'fullyLoaded', 'requests']:
                if key not in data['data']['average']['firstView'].keys():
                    print('Missing key:', key)
                    print(data['data']['average']['firstView'].keys())
                    if key == 'visualComplete' and 'fullyLoaded' in data['data']['average']['firstView'].keys():
                        data['data']['average']['firstView']['visualComplete'] = data['data']['average']['firstView']['fullyLoaded']
                    else:
                        missing_key = True

            if not missing_key:
                df_speed = df_speed.append(pd.DataFrame([[
                    row['Name'],
                    row['media_id'],
                    data['data']['location'],
                    int(data['data']['average']['firstView']['visualComplete']),
                    int(data['data']['average']['firstView']['fullyLoaded']),
                    int(data['data']['average']['firstView']['requests']),
                    data['data']['summary']
                ]], columns=['Name', 'media_id', 'location', 'visualComplete', 'fullyLoaded', 'requests', 'report_url']))
            
                with open('df/archive/mobile/{}.json'.format(data['data']['testId']), 'w') as outfile:
                    json.dump(data['data'], outfile)
                print('Saved to disk and json stored in df/archive/mobile/', end='\n\n')
        else:
            print('No result for {} yet.'.format(row['Name']))

Getting results for La Tribune de Genève ...
Result already fetched.
Getting results for 24 heures ...
<Response [200]>
No result for 24 heures yet.
Getting results for Le Temps ...
<Response [200]>
No result for Le Temps yet.
Getting results for Le Monde ...
<Response [200]>
No result for Le Monde yet.
Getting results for RTS info ...
<Response [200]>
No result for RTS info yet.
Getting results for 20 minutes (ch) ...
<Response [200]>
No result for 20 minutes (ch) yet.
Getting results for Le Matin ...
<Response [200]>
No result for Le Matin yet.
Getting results for Mediapart ...
<Response [200]>
No result for Mediapart yet.
Getting results for Le Figaro ...
<Response [200]>
No result for Le Figaro yet.
Getting results for Libération ...
<Response [200]>
No result for Libération yet.
Getting results for La Côte ...
<Response [200]>
No result for La Côte yet.
Getting results for Arcinfo ...
<Response [200]>
No result for Arcinfo yet.
Getting results for Le Nouvelliste ...
<Response [200

In [14]:
data

{'statusCode': '100', 'statusText': 'Test Pending'}

In [204]:
df_speed['fullyLoaded'] = df_speed['fullyLoaded'].apply(lambda x: int(x))

In [205]:
df_speed.sort_values('fullyLoaded')

Unnamed: 0,Name,media_id,location,visualComplete,fullyLoaded,requests,report_url
4,RTS info,23,Dulles_iPhone8:iPhone 8 iOS 11,8550,15290,165,http://www.webpagetest.org/result/180624_12_5f...
3,Le Monde,22,Dulles_iPhone8:iPhone 8 iOS 11,10850,17024,98,http://www.webpagetest.org/result/180624_CM_a0...
0,Libération,28,Dulles_iPhone8:iPhone 8 iOS 11,12385,19536,132,http://www.webpagetest.org/result/180624_PP_09...
0,Le Nouvelliste,31,Dulles_iPhone8:iPhone 8 iOS 11,19183,20011,146,http://www.webpagetest.org/result/180624_WD_44...
5,20 minutes (ch),24,Dulles_iPhone8:iPhone 8 iOS 11,11983,21527,117,http://www.webpagetest.org/result/180624_MA_74...
0,Le Courrier,33,Dulles_iPhone8:iPhone 8 iOS 11,23451,23499,69,http://www.webpagetest.org/result/180624_A5_a6...
0,Le Figaro,27,Dulles_iPhone8:iPhone 8 iOS 11,16617,27157,228,http://www.webpagetest.org/result/180624_N3_b4...
2,Le Temps,21,Dulles_iPhone8:iPhone 8 iOS 11,12802,27433,181,http://www.webpagetest.org/result/180624_DR_36...
0,La Liberté,32,Dulles_iPhone8:iPhone 8 iOS 11,24802,28133,146,http://www.webpagetest.org/result/180624_RD_2b...
1,24 heures,20,Dulles_iPhone8:iPhone 8 iOS 11,0,28283,281,http://www.webpagetest.org/result/180624_5M_13...


In [101]:
medias['mobile_index'] = medias['mobile_speed'].apply(lambda x: round(100 - (int(x)-2000)/100))
medias

Unnamed: 0,media_id,Name,URL_short,URL,URL_mobile,mobile_speed,mobile_index
0,19,La Tribune de Genève,tdg.ch,https://www.tdg.ch/,https://m.tdg.ch,2478,95
1,20,24 heures,24heures.ch,https://www.24heures.ch,https://m.24heures.ch,2817,92
2,21,Le Temps,letemps.ch,https://www.letemps.ch,,6746,53
3,22,Le Monde,lemonde.fr,https://www.lemonde.fr,https://mobile.lemonde.fr,3248,88
4,23,RTS info,rts.ch/info,https://www.rts.ch/info/,,2796,92
5,24,20 minutes (ch),20min.ch/ro,https://www.20min.ch/ro/,https://m.20min.ch/ro/,3157,88
6,25,Le Matin,lematin.ch,https://www.lematin.ch/,https://m.lematin.ch,2435,96
7,26,Mediapart,mediapart.fr,https://www.mediapart.fr,x,4570,74
8,27,Le Figaro,lefigaro.fr,https://www.lefigaro.fr/,x,6759,52
9,28,Libération,liberation.fr,https://www.liberation.fr/,,3966,80


In [104]:
medias[['media_id', 'Name', 'mobile_speed', 'mobile_report']]

Unnamed: 0,media_id,Name,mobile_speed,mobile_report
0,19,La Tribune de Genève,2478,180623_FQ_0b83a6fb30254f242e1abd9b5320fb1c
1,20,24 heures,2817,180623_N6_98736418a81d2347b24d9337a6c0db61
2,21,Le Temps,6746,180623_JN_9f15c3c9e3b70ce0bd5b5142d976d96c
3,22,Le Monde,3248,180623_X6_375d1e83ded108502148fe30d9e51a48
4,23,RTS info,2796,180623_FR_26d88275c2b5c515d70982577dd23abb
5,24,20 minutes (ch),3157,180623_SD_40dd125337350ddc62099ecc547d1c63
6,25,Le Matin,2435,180623_F0_488ac13e048cfe4e777f42ef2f0e5347
7,26,Mediapart,4570,180623_DQ_6d58cfc0a5ca68e317f7a036b9883829
8,27,Le Figaro,6759,180623_60_2db92d4119a6763e587dd671e841aa74
9,28,Libération,3966,180623_M9_7d8f30ce809f637d46dd47ee8ee2c7d0


In [210]:
outputfile = 'df/archive/mobile_metrics_{}.csv'.format( datetime.datetime.now().strftime('%Y-%m-%d') )
print('Saving to {}...'.format(outputfile))

Saving to df/archive/mobile_metrics_2018-06-24_bk-sunday.csv...


In [211]:
medias.to_csv(outputfile) # archive
medias.to_csv('df/mobile_metrics.csv') # temp file

In [116]:
df = pd.read_csv('/Users/paul/Sites/d3_v5/indices/data/media_metrics_2018-06-17.csv')

In [117]:
df_mobile = pd.read_csv('df/mobile_metrics.csv', usecols=['media_id', 'mobile_speed', 'mobile_index', 'mobile_report'])

In [118]:
df.merge(df_mobile, on='media_id').to_csv('/Users/paul/Sites/d3_v5/indices/data/media_metrics_2018-06-17.csv')

### NB

** This script only uses loading time. Optimization is evaluated with PageSpeed. But these 9 scores are also available: **
```'score_cache',
'score_cdn',
'score_combine',
'score_compress',
'score_cookies',
'score_etags',
'score_gzip',
'score_keep-alive',
'score_minify'```