Skip to content
This repository has been archived by the owner on Feb 20, 2023. It is now read-only.

For #8803: add frameworkStart telemetry #9788

Merged
merged 7 commits into from Apr 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
54 changes: 54 additions & 0 deletions app/metrics.yaml
Expand Up @@ -1984,3 +1984,57 @@ addons:
notification_emails:
- fenix-core@mozilla.com
expires: "2020-09-01"

startup.timeline:
framework_start:
send_in_pings:
- startup-timeline
type: timespan
time_unit: nanosecond
description: >
The duration the Android framework takes to start before letting us run code in
`*Application.init`. This is calculated from `appInitTimestamp - processStartTimestamp`.
`processStartTimestamp` is derived from the clock tick time unit, which is expected to be
less granular than nanoseconds. Therefore, we convert and round our timestamps to clock ticks
before computing the difference and convert back to nanoseconds to report.

For debugging purposes, `clock_ticks_per_second`, which may vary between devices, is also
reported as a metric.
bugs:
- https://github.com/mozilla-mobile/fenix/issues/8803
data_reviews:
- https://github.com/mozilla-mobile/fenix/pull/9788#pullrequestreview-394228626
notification_emails:
- perf-android-fe@mozilla.com
- mcomella@mozilla.com
expires: "2020-07-15"
framework_start_error:
send_in_pings:
- startup-timeline
type: boolean
description: >
An error when attempting to record `framework_start` - the application init timestamp returned
a negative value - which is likely indicative of a bug in the implementation.
bugs:
- https://github.com/mozilla-mobile/fenix/issues/8803
data_reviews:
- https://github.com/mozilla-mobile/fenix/pull/9788#pullrequestreview-394228626
notification_emails:
- perf-android-fe@mozilla.com
- mcomella@mozilla.com
expires: "2020-07-15"
clock_ticks_per_second:
send_in_pings:
- startup-timeline
type: counter
description: >
The number of clock tick time units that occur in one second on this particular device. This
value is expected to be used in conjunction with the `framework_start` metric.
bugs:
- https://github.com/mozilla-mobile/fenix/issues/8803
data_reviews:
- https://github.com/mozilla-mobile/fenix/pull/9788#pullrequestreview-394228626
notification_emails:
- perf-android-fe@mozilla.com
- mcomella@mozilla.com
expires: "2020-07-15"
16 changes: 16 additions & 0 deletions app/pings.yaml
Expand Up @@ -29,3 +29,19 @@ installation:
- https://github.com/mozilla-mobile/fenix/pull/8074#issuecomment-586512202
notification_emails:
- fenix-core@mozilla.com

startup-timeline:
description: >
This ping is intended to provide an understanding of startup performance.

The ping is intended to be captured by performance testing automation to report results
there, in addition to user telemetry. We place these metrics into their own ping in order
to isolate them and make this process easier.
include_client_id: false
bugs:
- https://github.com/mozilla-mobile/fenix/issues/8803
data_reviews:
- https://github.com/mozilla-mobile/fenix/pull/9788#pullrequestreview-394228626
notification_emails:
- perf-android-fe@mozilla.com
- esmyth@mozilla.com
13 changes: 13 additions & 0 deletions app/src/main/java/org/mozilla/fenix/FenixApplication.kt
Expand Up @@ -38,6 +38,7 @@ import org.mozilla.fenix.FeatureFlags.webPushIntegration
import org.mozilla.fenix.components.Components
import org.mozilla.fenix.components.metrics.MetricServiceType
import org.mozilla.fenix.ext.settings
import org.mozilla.fenix.perf.StartupTimeline
import org.mozilla.fenix.push.PushFxaIntegration
import org.mozilla.fenix.push.WebPushEngineIntegration
import org.mozilla.fenix.session.NotificationSessionObserver
Expand All @@ -49,6 +50,10 @@ import org.mozilla.fenix.utils.Settings
@SuppressLint("Registered")
@Suppress("TooManyFunctions", "LargeClass")
open class FenixApplication : LocaleAwareApplication() {
init {
recordOnInit() // DO NOT MOVE ANYTHING ABOVE HERE: the timing of this measurement is critical.
}
mcomella marked this conversation as resolved.
Show resolved Hide resolved

private val logger = Logger("FenixApplication")

open val components by lazy { Components(this) }
Expand Down Expand Up @@ -365,4 +370,12 @@ open class FenixApplication : LocaleAwareApplication() {
Logger.error("Failed to initialize web extension support", e)
}
}

protected fun recordOnInit() {
// This gets called by more than one process. Ideally we'd only run this in the main process
// but the code to check which process we're in crashes because the Context isn't valid yet.
//
// This method is not covered by our internal crash reporting: be very careful when modifying it.
StartupTimeline.onApplicationInit() // DO NOT MOVE ANYTHING ABOVE HERE: the timing is critical.
}
}
6 changes: 5 additions & 1 deletion app/src/main/java/org/mozilla/fenix/HomeActivity.kt
Expand Up @@ -33,6 +33,7 @@ import mozilla.components.concept.engine.EngineView
import mozilla.components.feature.contextmenu.ext.DefaultSelectionActionDelegate
import mozilla.components.service.fxa.sync.SyncReason
import mozilla.components.support.base.feature.UserInteractionHandler
import mozilla.components.support.ktx.android.arch.lifecycle.addObservers
import mozilla.components.support.ktx.android.content.share
import mozilla.components.support.ktx.kotlin.isUrl
import mozilla.components.support.ktx.kotlin.toNormalizedUrl
Expand Down Expand Up @@ -146,7 +147,10 @@ open class HomeActivity : LocaleAwareAppCompatActivity() {
}
supportActionBar?.hide()

lifecycle.addObserver(webExtensionPopupFeature)
lifecycle.addObservers(
webExtensionPopupFeature,
StartupTimeline.homeActivityLifecycleObserver
)
StartupTimeline.onActivityCreateEndHome(this)
}

Expand Down
@@ -0,0 +1,86 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

package org.mozilla.fenix.perf

import android.os.Process
import android.os.SystemClock
import kotlin.math.roundToLong
import org.mozilla.fenix.GleanMetrics.StartupTimeline as Telemetry

/**
* A class to measure the time the Android framework executes before letting us execute our own code
* for the first time in Application's initializer: this value is captured in the
* [Telemetry.frameworkStart] probe.
*
* Since we cannot execute code at process start, this measurement does not fit within the Glean
* Timespan metric start/stop programming model so it lives in its own class.
*/
internal class StartupFrameworkStartMeasurement(
private val stat: Stat = Stat(),
private val telemetry: Telemetry = Telemetry,
private val getElapsedRealtimeNanos: () -> Long = SystemClock::elapsedRealtimeNanos
) {

private var isMetricSet = false

private var applicationInitNanos = -1L
private var isApplicationInitCalled = false

fun onApplicationInit() {
// This gets called from multiple processes: don't do anything expensive. See call site for details.
//
// In the main process, there are multiple Application impl so we ensure it's only set by
// the first one.
if (!isApplicationInitCalled) {
isApplicationInitCalled = true
applicationInitNanos = getElapsedRealtimeNanos()
}
mcomella marked this conversation as resolved.
Show resolved Hide resolved
}

/**
* Sets the values for metrics to record in glean.
*
* We defer these metrics, rather than setting them as soon as the values are available,
* because they are slow to fetch and we don't want to impact startup.
*/
fun setExpensiveMetric() {
// The application is only init once per process lifetime so we only set this value once.
if (isMetricSet) return
isMetricSet = true

if (applicationInitNanos < 0) {
telemetry.frameworkStartError.set(true)
} else {
telemetry.frameworkStart.setRawNanos(getFrameworkStartNanos())

// frameworkStart is derived from the number of clock ticks per second. To ensure this
// value does not throw off our result, we capture it too.
telemetry.clockTicksPerSecond.add(stat.clockTicksPerSecond.toInt())
}
}

private fun getFrameworkStartNanos(): Long {
// Get our timestamps in ticks: we expect ticks to be less granular than nanoseconds so,
// to ensure our measurement uses the correct number of significant figures, we convert
// everything to ticks before getting the result.
//
// Similarly, we round app init to a whole integer tick value because process start only
// comes in integer ticks values.
val processStartTicks = stat.getProcessStartTimeTicks(Process.myPid())
val applicationInitTicks = applicationInitNanos.nanosToTicks().roundToLong()

val frameworkStartTicks = applicationInitTicks - processStartTicks

// Glean only takes whole unit nanoseconds so we round to that. I'm not sure but it may be
// possible that capturing nanos in a double will produce a rounding error that chops off
// significant values. However, since we expect to be using a much smaller portion of the
// nano field - if ticks are actually less granular than nanoseconds - I don't expect for
// this to be a problem.
return frameworkStartTicks.ticksToNanos().roundToLong()
}

private fun Long.nanosToTicks(): Double = stat.convertNanosToTicks(this)
private fun Long.ticksToNanos(): Double = stat.convertTicksToNanos(this)
}
52 changes: 51 additions & 1 deletion app/src/main/java/org/mozilla/fenix/perf/StartupTimeline.kt
Expand Up @@ -5,8 +5,17 @@
package org.mozilla.fenix.perf

import androidx.annotation.UiThread
import androidx.lifecycle.Lifecycle
import androidx.lifecycle.LifecycleObserver
import androidx.lifecycle.OnLifecycleEvent
import kotlinx.coroutines.GlobalScope
import kotlinx.coroutines.launch
import mozilla.components.service.glean.private.NoReasonCodes
import mozilla.components.service.glean.private.PingType
import org.mozilla.fenix.GleanMetrics.Pings
import org.mozilla.fenix.HomeActivity
import org.mozilla.fenix.home.sessioncontrol.viewholders.topsites.TopSiteItemViewHolder
import org.mozilla.fenix.perf.StartupTimeline.onApplicationInit
import org.mozilla.fenix.perf.StartupTimelineStateMachine.StartupActivity
import org.mozilla.fenix.perf.StartupTimelineStateMachine.StartupDestination
import org.mozilla.fenix.perf.StartupTimelineStateMachine.StartupState
Expand All @@ -20,12 +29,27 @@ import org.mozilla.fenix.perf.StartupTimelineStateMachine.StartupState
* This class, and its dependencies, may need to be modified for any changes in startup.
*
* This class is not thread safe and should only be called from the main thread.
*
* [onApplicationInit] is called from multiple processes. To minimize overhead, the class
* dependencies are lazily initialized.
*/
@UiThread
object StartupTimeline {

private var state: StartupState = StartupState.Cold(StartupDestination.UNKNOWN)
private val reportFullyDrawn = StartupReportFullyDrawn()

private val reportFullyDrawn by lazy { StartupReportFullyDrawn() }
private val frameworkStartMeasurement by lazy { StartupFrameworkStartMeasurement() }
internal val homeActivityLifecycleObserver by lazy {
StartupHomeActivityLifecycleObserver(frameworkStartMeasurement)
}

fun onApplicationInit() {
// This gets called from multiple processes: don't do anything expensive. See call site for details.
//
// This method also gets called multiple times if there are multiple Application implementations.
frameworkStartMeasurement.onApplicationInit()
}

fun onActivityCreateEndIntentReceiver() {
advanceState(StartupActivity.INTENT_RECEIVER)
Expand All @@ -45,3 +69,29 @@ object StartupTimeline {
state = StartupTimelineStateMachine.getNextState(state, startingActivity)
}
}

/**
* A [LifecycleObserver] for [HomeActivity] focused on startup performance measurement.
*/
internal class StartupHomeActivityLifecycleObserver(
private val frameworkStartMeasurement: StartupFrameworkStartMeasurement,
private val startupTimeline: PingType<NoReasonCodes> = Pings.startupTimeline
) : LifecycleObserver {

@OnLifecycleEvent(Lifecycle.Event.ON_STOP)
fun onStop() {
GlobalScope.launch { // use background thread due to expensive metrics.
// Ensure any last metrics are set before submission.
mcomella marked this conversation as resolved.
Show resolved Hide resolved
frameworkStartMeasurement.setExpensiveMetric()

// Startup metrics placed in the Activity should be re-recorded each time the Activity
// is started so we need to clear the ping lifetime by submitting once per each startup.
// It's less complex to add it here rather than the visual completeness task manager.
//
// N.B.: this submission location may need to be changed if we add metrics outside of the
// HomeActivity startup path (e.g. if the user goes directly to a separate activity and
// closes the app, they will never hit this) to appropriately adjust for the ping lifetimes.
startupTimeline.submit()
}
}
}
66 changes: 66 additions & 0 deletions app/src/main/java/org/mozilla/fenix/perf/Stat.kt
@@ -0,0 +1,66 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

package org.mozilla.fenix.perf

import android.os.SystemClock
import android.system.Os
import android.system.OsConstants
import androidx.annotation.VisibleForTesting
import androidx.annotation.VisibleForTesting.PRIVATE
import java.io.File
import java.util.concurrent.TimeUnit

private const val FIELD_POS_STARTTIME = 21 // starttime naming matches field in man page.

/**
* Functionality from stat on the proc pseudo-filesystem common to unix systems. /proc contains
* information related to active processes. /proc/$pid/stat contains information about the status of
* the process by the given process id (pid).
*
* See the man page - `man 5 proc` - on linux for more information:
* http://man7.org/linux/man-pages/man5/proc.5.html
*/
open class Stat {

@VisibleForTesting(otherwise = PRIVATE)
open fun getStatText(pid: Int): String = File("/proc/$pid/stat").readText()

// See `man 3 sysconf` for details on Os.sysconf and OsConstants:
// http://man7.org/linux/man-pages/man3/sysconf.3.html
open val clockTicksPerSecond: Long get() = Os.sysconf(OsConstants._SC_CLK_TCK)
private val nanosPerClockTick = TimeUnit.SECONDS.toNanos(1).let { nanosPerSecond ->
// We use nanos per clock tick, rather than clock ticks per nanos, to mitigate float/double
// rounding errors: this way we can use integer values and divide the larger value by the smaller one.
nanosPerSecond / clockTicksPerSecond.toDouble()
}

/**
* Gets the process start time since system boot in ticks, including time spent in suspension/deep sleep.
* This value can be compared against [SystemClock.elapsedRealtimeNanos]: you can convert between
* measurements using [convertTicksToNanos] and [convertNanosToTicks].
*
* Ticks are "an arbitrary unit for measuring internal system time": https://superuser.com/a/101202
* They are not aligned with CPU frequency and do not change at runtime but can theoretically
* change between devices. On the Pixel 2, one tick is equivalent to one centisecond.
*
* We confirmed that this measurement and elapsedRealtimeNanos both include suspension time, and
* are thus comparable, by* looking at their source:
* - /proc/pid/stat starttime is set using boottime:
* https://github.com/torvalds/linux/blob/79e178a57dae819ae724065b47c25720494cc9f2/fs/proc/array.c#L536
* - elapsedRealtimeNanos is set using boottime:
* https://cs.android.com/android/platform/superproject/+/master:system/core/libutils/SystemClock.cpp;l=60-68;drc=bab16584ce0525742b5370682c9132b2002ee110
*
* Perf note: this call reads from the pseudo-filesystem using the java File APIs, which isn't
* likely to be a very optimized call path.
*
* Implementation inspired by https://stackoverflow.com/a/42195623.
*/
fun getProcessStartTimeTicks(pid: Int): Long {
return getStatText(pid).split(' ')[FIELD_POS_STARTTIME].toLong()
}

fun convertTicksToNanos(ticks: Long): Double = ticks * nanosPerClockTick
fun convertNanosToTicks(nanos: Long): Double = nanos / nanosPerClockTick
}
Expand Up @@ -13,8 +13,9 @@ import org.mozilla.fenix.session.PerformanceActivityLifecycleCallbacks
* An application class which knows how to migrate Fennec data.
*/
class MigratingFenixApplication : FenixApplication() {

init {
recordOnInit() // DO NOT MOVE ANYTHING ABOVE HERE: the timing of this measurement is critical.
mcomella marked this conversation as resolved.
Show resolved Hide resolved

PerformanceActivityLifecycleCallbacks.isTransientActivityInMigrationVariant = {
if (it is MigrationDecisionActivity) true else false
}
Expand Down