## What percentage of live users are on mobile?

In [1]:
# %pip install user-agents
from user_agents import parse

In [2]:
import pandas as pd

In [3]:
# small 
# df = pd.read_csv("downloaded-logs-20220728-155658.csv")

# large (10K rows)
rawdf = pd.read_csv("sympy-GAE-data/downloaded-logs-20220728-160156.csv")

In [4]:
rawdf.columns

Index(['httpRequest.status', 'insertId', 'labels.clone_id', 'logName',
       'operation.first', 'operation.id', 'operation.last',
       'operation.producer', 'protoPayload.appEngineRelease',
       'protoPayload.appId', 'protoPayload.cost', 'protoPayload.endTime',
       'protoPayload.finished', 'protoPayload.first', 'protoPayload.host',
       'protoPayload.httpVersion', 'protoPayload.instanceIndex',
       'protoPayload.ip', 'protoPayload.latency', 'protoPayload.method',
       'protoPayload.referrer', 'protoPayload.requestId',
       'protoPayload.resource', 'protoPayload.responseSize',
       'protoPayload.spanId', 'protoPayload.startTime', 'protoPayload.status',
       'protoPayload.traceId', 'protoPayload.traceSampled',
       'protoPayload.userAgent', 'protoPayload.versionId', 'receiveLocation',
       'receiveTimestamp', 'receivedLocation', 'resource.labels.module_id',
       'resource.labels.project_id', 'resource.labels.version_id',
       'resource.labels.zone', 'resource.

In [5]:
rawdf.shape

(10000, 43)

In [6]:
columns_to_extract = {
    "httpRequest.status": "status",
    "protoPayload.ip": "ip",
    "protoPayload.startTime": "time",
    "protoPayload.host": "host",
    "protoPayload.referrer": "referrer",
    "protoPayload.userAgent": "userAgent",
}

df = rawdf.copy()[columns_to_extract.keys()]
df.columns = columns_to_extract.values()
# df

### Exploring referrer

In [7]:
with pd.option_context('display.max_rows', None):
    df2 = df.dropna(subset=["referrer"])
    rdf = df2[~df2['referrer'].str.contains("docs\.sympy\.org|www\.osgeo\.cn")]
    print(rdf["referrer"].value_counts())

http://certik.github.io/                                                                       186
http://man.hubwiz.com/                                                                          61
http://mattpap.github.io/                                                                       29
http://devdoc.net/                                                                              24
http://www.devdoc.net/                                                                          21
https://live.sympy.org/                                                                          4
http://devdoc.net/python/sympy-1.0/_modules/sympy/functions/special/bessel.html                  2
https://python-videos-for-beginners.readthedocs.io/en/latest/                                    2
http://www.caacle.com/                                                                           2
https://docs-sympy-org.translate.goog/                                                           2
http://dev

### Exploring user agent fields

In [8]:
uas = df["userAgent"].value_counts()
uas[0:10]

Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36                           3104
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36                      650
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0                                                           620
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.134 Safari/537.36 Edg/103.0.1264.71     577
Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0                                                             376
Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0                                                                     304
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36                                      281

In [9]:
df.loc[585,"userAgent"]

'Mozilla/5.0 (iPhone; CPU iPhone OS 15_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Mobile/15E148 Safari/604.1'

In [10]:
ua = parse(df.loc[585,"userAgent"])

In [11]:
ua.browser.family, ua.browser.version_string

('Mobile Safari', '15.5')

In [12]:
ua.os.family, ua.os.version_string

('iOS', '15.5')

In [13]:
ua.device.family, ua.device.brand, ua.device.model 

('iPhone', 'Apple', 'iPhone')

In [14]:
ua.is_mobile

True

In [15]:
def userAgent2cols(row):
    userAgent = row["userAgent"]
    ua = parse(userAgent)
    return (ua.browser.family, ua.browser.version_string,
            ua.os.family, ua.os.version_string,
            ua.device.family, ua.device.brand, ua.device.model,
            ua.is_mobile)

ua_cols = ["browser", "browser.version",
           "os", "os.version",
           "device.family", "device.brand", "device.model",
           "mobile"]
df[ua_cols] = df[["userAgent"]].apply(userAgent2cols, axis=1, result_type='expand')
df

Unnamed: 0,status,ip,time,host,referrer,userAgent,browser,browser.version,os,os.version,device.family,device.brand,device.model,mobile
0,304,85.220.88.150,2022-07-28T19:38:41.509223Z,live.sympy.org,https://docs.sympy.org/,Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Geck...,Firefox,102.0,Linux,,Other,,,False
1,304,179.56.107.1,2022-07-28T19:37:06.643810Z,live.sympy.org,https://docs.sympy.org/,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,Chrome,103.0.0,Windows,10,Other,,,False
2,304,190.60.236.18,2022-07-28T19:36:58.918386Z,live.sympy.org,https://docs.sympy.org/,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,Edge,103.0.1264,Windows,10,Other,,,False
3,200,62.131.13.95,2022-07-28T19:36:51.189129Z,live.sympy.org,https://docs.sympy.org/,Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...,Safari,15.5,Mac OS X,10.15.7,Mac,Apple,Mac,False
4,304,2601:44:203:42c0:256c:616e:27c:89cd,2022-07-28T19:36:43.705907Z,live.sympy.org,https://docs.sympy.org/,Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/53...,Chrome,103.0.5060,Linux,,Other,,,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,304,183.227.183.58,2022-07-26T09:53:34.314241Z,live.sympy.org,https://www.osgeo.cn/sympy/modules/vector/fiel...,Mozilla/5.0 (iPhone; CPU iPhone OS 15_5 like M...,Mobile Safari UI/WKWebView,,iOS,15.5,iPhone,Apple,iPhone,True
9996,204,131.188.6.165,2022-07-26T09:53:26.765839Z,live.sympy.org,https://docs.sympy.org/,Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/...,Firefox,102.0,Windows,10,Other,,,False
9997,304,80.194.16.22,2022-07-26T09:53:19.975565Z,live.sympy.org,https://docs.sympy.org/,Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6...,Chrome,103.0.0,Mac OS X,10.14.6,Mac,Apple,Mac,False
9998,304,199.101.192.35,2022-07-26T09:53:14.354415Z,live.sympy.org,https://docs.sympy.org/,Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7...,Chrome,103.0.0,Mac OS X,10.15.7,Mac,Apple,Mac,False


In [16]:
df["mobile"].value_counts(normalize=True)

False    0.9335
True     0.0665
Name: mobile, dtype: float64

In [17]:
# select only mobile visitors
mdf = df[df["mobile"] == True]

In [18]:
mdf.groupby(["os", "os.version"])[["os", "os.version"]].value_counts()

os       os.version
Android  10             84
         11            135
         12            121
         4.4.4           1
         5.0             4
         6.0.1          33
         7.0            15
         7.1.1           3
         7.1.2           1
         8.0.0          25
         8.1.0          12
         9              57
iOS      10.2.1          1
         11.1.1          1
         12.5            1
         12.5.5          1
         13.3            1
         13.5.1          2
         14.3            1
         14.4            1
         14.4.2          1
         14.6            1
         14.7.1          1
         14.8            2
         14.8.1          4
         15.0.2          6
         15.1            2
         15.2            3
         15.2.1          1
         15.3.1          4
         15.4            4
         15.4.1         10
         15.5          106
         15.6           14
         16.0            4
         9.1             2
dtype: i