# Mobile Metrics

Loads and saves webpagetest.org performance tests.

Needs a local installation of the webpagetest tools https://github.com/WPO-Foundation/webpagetest and a mobile device or a key for the webpagetest API.

A complete test may last for up to 2 hours.

Feel free to contact me for help: https://www.quel-media.com/about.html#contact

© Paul Ronga under Apache-2 Licence (see LICENCE.txt).

In [1]:
import pandas as pd
import requests
from IPython.display import HTML
import json
import datetime

In [2]:
# Link to local PHP files
RUN_TEST_URL = 'http://rospo.local/~paul/webpagetest/run_mobile.php'
GET_TEST_URL = 'http://rospo.local/~paul/webpagetest/get.php?test_id={}'

In [3]:
# dataframe containing media id, name and URLs
medias = pd.read_csv('df/media_list.csv')

# remove Konbini ch and fr
medias = medias[medias['media_id'] < 34].copy()

# media id as string
medias['media_id'] = medias['media_id'].apply(lambda x: str(x))

medias.head(2)

Unnamed: 0,media_id,Name,URL_short,URL,URL_mobile
0,19,La Tribune de Genève,tdg.ch,https://www.tdg.ch/,https://m.tdg.ch
1,20,24 heures,24heures.ch,https://www.24heures.ch,https://m.24heures.ch


# 1. Lauch tests

Lauch tests (average of 3 runs) for each media on a device monitored with the webpagetest system.

In [4]:
reports = {}

In [5]:
for i, row in medias.iterrows():
    url = row['URL_mobile']
    if url != url or url == 'x':
        url = row['URL']

    print('Running test for', url)

    payload = {'url': url}
    r = requests.post(RUN_TEST_URL, json=payload)
    
    print(r.text, end='\n\n')
    result = r.json()
    reports[row['media_id']] = result['id']

Running test for https://m.tdg.ch
{"id":"180719_ST_e32123a493048f7940cd99b72cd8275f", "url":"https://m.tdg.ch"}

Running test for https://m.24heures.ch
{"id":"180719_VV_99adcd3ac830309c274cc17c9762f016", "url":"https://m.24heures.ch"}

Running test for https://www.letemps.ch
{"id":"180719_2B_a54a630b628f8512302e1d9f7dcf21d9", "url":"https://www.letemps.ch"}

Running test for https://mobile.lemonde.fr
{"id":"180719_A0_2aff7a58a3d4571210a54d78b360a68b", "url":"https://mobile.lemonde.fr"}

Running test for https://www.rts.ch/info/
{"id":"180719_6Y_0f7bc01bbb0ffb8ca4055611a8704584", "url":"https://www.rts.ch/info/"}

Running test for https://m.20min.ch/ro/
{"id":"180719_00_17eaeb5f803cc110c9d1753292afcc5c", "url":"https://m.20min.ch/ro/"}

Running test for https://m.lematin.ch
{"id":"180719_NJ_1c29ddc5c30f8025d1a7e9af2ebc4d7f", "url":"https://m.lematin.ch"}

Running test for https://www.mediapart.fr
{"id":"180719_YX_cb2d6236fd4f521961b99d92834c88f6", "url":"https://www.mediapart.fr"}

Runn

# 2. Get test results

Wait about one hour before trying to get these results.

In [6]:
reports

{'19': '180719_ST_e32123a493048f7940cd99b72cd8275f',
 '20': '180719_VV_99adcd3ac830309c274cc17c9762f016',
 '21': '180719_2B_a54a630b628f8512302e1d9f7dcf21d9',
 '22': '180719_A0_2aff7a58a3d4571210a54d78b360a68b',
 '23': '180719_6Y_0f7bc01bbb0ffb8ca4055611a8704584',
 '24': '180719_00_17eaeb5f803cc110c9d1753292afcc5c',
 '25': '180719_NJ_1c29ddc5c30f8025d1a7e9af2ebc4d7f',
 '26': '180719_YX_cb2d6236fd4f521961b99d92834c88f6',
 '27': '180719_47_21174ee97b3292abcf92cff39ea83e7d',
 '28': '180719_ZW_042ee9ded6c3b172b848558d4a7ba24d',
 '29': '180719_KZ_0c6842aa24ebde870830f9076807385a',
 '30': '180719_45_bbb70cf3b161db99daafb30f10569dd2',
 '31': '180719_HW_65b55c3ffbd2719aa32d46146e0fb0b1',
 '32': '180719_0Q_465dbbb5642e46d33e1628b3151c8d56',
 '33': '180719_22_8c6e96e4780e1111d1f47b0f2a234444'}

In [7]:
df_speed = pd.DataFrame(columns=['Name', 'media_id', 'location', 'visualComplete', 'fullyLoaded', 'requests', 'report_url', 'completed'])

In [8]:
# r = requests.get(GET_TEST_URL.format( reports[row['media_id']] ))

In [9]:
from IPython.display import HTML
#HTML(r.text)

In [10]:
for i, row in medias.iterrows():
    print('Getting results for', row['Name'], '...')
    
    if len(df_speed[df_speed['media_id'] == row['media_id']]) > 0:
        print('Result already fetched.')
    else:    
        r = requests.get(GET_TEST_URL.format( reports[row['media_id']] ))
        print(r)
        try:
            data = r.json()
        except ValueError as e:
            print(e)
            print(r.text)

        missing_key = False

        if 'data' in data.keys() and 'average' in data['data'].keys():    
            print('Successful first view runs:', data['data']['successfulFVRuns'])

            print('Location and device were:', data['data']['location'], 'with connectivity:', data['data']['connectivity'])
            
            for key in ['visualComplete', 'fullyLoaded', 'requests']:
                if key not in data['data']['average']['firstView'].keys():
                    print('Missing key:', key)
                    print(data['data']['average']['firstView'].keys())
                    if key == 'visualComplete' and 'fullyLoaded' in data['data']['average']['firstView'].keys():
                        data['data']['average']['firstView']['visualComplete'] = data['data']['average']['firstView']['fullyLoaded']
                    else:
                        missing_key = True

            if not missing_key:
                df_speed = df_speed.append(pd.DataFrame([[
                    row['Name'],
                    row['media_id'],
                    data['data']['location'],
                    int(data['data']['average']['firstView']['visualComplete']),
                    int(data['data']['average']['firstView']['fullyLoaded']),
                    int(data['data']['average']['firstView']['requests']),
                    data['data']['summary'],
                    data['data']['completed']
                ]], columns=['Name', 'media_id', 'location', 'visualComplete', 'fullyLoaded', 'requests', 'report_url', 'completed']))
            
                with open('df/archive/mobile/{}.json'.format(data['data']['testId']), 'w') as outfile:
                    json.dump(data['data'], outfile)
                print('Saved to disk and json stored in df/archive/mobile/', end='\n\n')
        else:
            print('No result for {} yet.'.format(row['Name']))

Getting results for La Tribune de Genève ...
<Response [200]>
No result for La Tribune de Genève yet.
Getting results for 24 heures ...
<Response [200]>
No result for 24 heures yet.
Getting results for Le Temps ...
<Response [200]>
No result for Le Temps yet.
Getting results for Le Monde ...
<Response [200]>
No result for Le Monde yet.
Getting results for RTS info ...
<Response [200]>
No result for RTS info yet.
Getting results for 20 minutes (ch) ...
<Response [200]>
No result for 20 minutes (ch) yet.
Getting results for Le Matin ...
<Response [200]>
No result for Le Matin yet.
Getting results for Mediapart ...
<Response [200]>
No result for Mediapart yet.
Getting results for Le Figaro ...
<Response [200]>
No result for Le Figaro yet.
Getting results for Libération ...
<Response [200]>
No result for Libération yet.
Getting results for La Côte ...
<Response [200]>
No result for La Côte yet.
Getting results for Arcinfo ...
<Response [200]>
No result for Arcinfo yet.
Getting results for 

In [11]:
df_speed.sort_values('fullyLoaded')
#df_speed.sort_values('visualComplete')

Unnamed: 0,Name,media_id,location,visualComplete,fullyLoaded,requests,report_url,completed


In [12]:
df_speed['mobile_index'] = df_speed['fullyLoaded'].apply(lambda x: round(100 - (int(x)-5000)/1000))
df_speed.sort_values('mobile_index')

Unnamed: 0,Name,media_id,location,visualComplete,fullyLoaded,requests,report_url,completed,mobile_index


In [13]:
outputfile = 'df/archive/mobile_metrics_{}.csv'.format( datetime.datetime.now().strftime('%Y-%m-%d') )
print('Saving to {}...'.format(outputfile))

Saving to df/archive/mobile_metrics_2018-07-19.csv...


In [14]:
df_speed.to_csv(outputfile) # archive
df_speed.to_csv('df/mobile_metrics.csv') # temp file

### stop here

In [15]:
df = pd.read_csv('/Users/paul/Sites/d3_v5/indices/data/media_metrics_2018-06-17.csv')

In [16]:
df_mobile = pd.read_csv('df/mobile_metrics.csv', usecols=['media_id', 'location', 'visualComplete', 'fullyLoaded',
       'requests', 'report_url', 'mobile_index'])

In [17]:
df.merge(df_mobile, on='media_id').to_csv('/Users/paul/Sites/d3_v5/indices/data/media_metrics_2018-06-17.csv')

### NB

** This script only uses loading time. Optimization is evaluated with PageSpeed. But these 9 scores are also available: **
```'score_cache',
'score_cdn',
'score_combine',
'score_compress',
'score_cookies',
'score_etags',
'score_gzip',
'score_keep-alive',
'score_minify'```