Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

#3517 / Fix Referer plugin to correct calculating of visitors uniqueness #12

Closed
wants to merge 1 commit into from

1 participant

@MatenViro

Patch to fix a ticket 3517.
Fix incorrect count of unique visitors in all referrers reports.
Improve performance of method Referers::archiveDayAggregateVisitors

@MatenViro MatenViro closed this
@mark-de mark-de referenced this pull request from a commit
Commit has since been removed from the repository and is no longer available.
This was referenced
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
This page is out of date. Refresh to see the latest.
Showing with 200 additions and 73 deletions.
  1. +102 −26 core/ArchiveProcessing/Day.php
  2. +98 −47 plugins/Referers/Referers.php
View
128 core/ArchiveProcessing/Day.php
@@ -4,7 +4,7 @@
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
- * @version $Id$
+ * @version $Id: Day.php 7206 2012-10-15 15:11:34Z matt $
*
* @category Piwik
* @package Piwik
@@ -273,7 +273,67 @@ public function queryVisitsSimple($select, $orderBy = false, $groupByCols = fals
return $this->db->fetchAll($query['sql'], $query['bind']);
}
}
-
+
+ /**
+ * Performs a simple query on the log_visit table within the time range this archive
+ * represents. Returns opened query
+ *
+ * @param string $select The SELECT clause.
+ * @param string|bool $orderBy The ORDER BY clause (without the 'ORDER BY' part). Set to
+ * false to specify no ORDER BY.
+ * @param array|bool $groupByCols An array of column names to group by. Set to false to
+ * specify no GROUP BY.
+ * @param bool $oneResultRow Whether only one row is expected or not. If set to true,
+ * this function returns one row, if false, an array of rows.
+ * @return array
+ */
+ public function getPlainVisitsCursor($additionFields, $orderBy = false, $groupByCols = false)
+ {
+ $select = array(
+ '`log_visit`.`visit_total_actions` as `'.Piwik_Archive::INDEX_NB_ACTIONS.'`',
+ '`log_visit`.`visit_total_time` as `'.Piwik_Archive::INDEX_SUM_VISIT_LENGTH.'`',
+ '`log_visit`.`visit_total_actions` < 2 as `'.Piwik_Archive::INDEX_BOUNCE_COUNT.'`',
+ '`log_visit`.`visit_goal_converted` as `'.Piwik_Archive::INDEX_NB_VISITS_CONVERTED.'`'
+ );
+
+ if(is_array($additionFields) && $additionFields)
+ {
+ foreach($additionFields as &$field)
+ {
+ $field = '`log_visit`.`'.$field.'` as `'.$field.'`';
+ }
+ $select = array_merge($select, $additionFields);
+ }
+
+ $select = implode(",", $select);
+
+ if ($orderBy && is_array($orderBy))
+ {
+ $orderByConcat = array();
+ foreach ($orderBy as $field => $direction) {
+ $orderByConcat[] = '`log_visit`.`'.$field.'` '.$direction;
+ }
+ $orderBy = implode(',', $orderByConcat);
+ }
+
+ $from = "log_visit";
+ $where = "`log_visit`.`visit_last_action_time` >= ?
+ and `log_visit`.`visit_last_action_time` <= ?
+ and `log_visit`.`idsite` = ?";
+
+ $groupBy = false;
+ if ($groupByCols and !empty($groupByCols))
+ {
+ $groupBy = implode(',', $groupByCols);
+ }
+
+ $bind = array($this->getStartDatetimeUTC(), $this->getEndDatetimeUTC(), $this->idsite);
+
+ $query = $this->getSegment()->getSelectQuery($select, $from, $where, $bind, $orderBy, $groupBy);
+
+ return $this->db->query($query['sql'], $query['bind']);
+ }
+
/**
* Helper function that returns a DataTable containing the $select fields / value pairs.
* IMPORTANT: The $select must return only one row!!
@@ -869,17 +929,17 @@ public function getNewInterestRowLabeled( $label )
}
/**
- * Adds the given row $newRowToAdd to the existing $oldRowToUpdate passed by reference
+ * Adds the given row $newRowToAdd to the existing $oldRowToUpdate. Both passed by reference for better performance
*
* The rows are php arrays Name => value
*
- * @param array $newRowToAdd
- * @param array $oldRowToUpdate
+ * @param array &$newRowToAdd
+ * @param array &$oldRowToUpdate
* @param bool $onlyMetricsAvailableInActionsTable
* @param bool $doNotSumVisits
- * @return
+ * @return void
*/
- public function updateInterestStats( $newRowToAdd, &$oldRowToUpdate, $onlyMetricsAvailableInActionsTable = false, $doNotSumVisits = false)
+ public function updateInterestStatsByRef(&$newRowToAdd, &$oldRowToUpdate, $onlyMetricsAvailableInActionsTable = false, $doNotSumVisits = false)
{
// Pre 1.2 format: string indexed rows are returned from the DB
// Left here for Backward compatibility with plugins doing custom SQL queries using these metrics as string
@@ -887,27 +947,28 @@ public function updateInterestStats( $newRowToAdd, &$oldRowToUpdate, $onlyMetric
{
if(!$doNotSumVisits)
{
- $oldRowToUpdate[Piwik_Archive::INDEX_NB_UNIQ_VISITORS] += $newRowToAdd['nb_uniq_visitors'];
- $oldRowToUpdate[Piwik_Archive::INDEX_NB_VISITS] += $newRowToAdd['nb_visits'];
+ $oldRowToUpdate[Piwik_Archive::INDEX_NB_UNIQ_VISITORS] += $newRowToAdd['nb_uniq_visitors'];
+ $oldRowToUpdate[Piwik_Archive::INDEX_NB_VISITS] += $newRowToAdd['nb_visits'];
}
$oldRowToUpdate[Piwik_Archive::INDEX_NB_ACTIONS] += $newRowToAdd['nb_actions'];
- if($onlyMetricsAvailableInActionsTable)
- {
- return;
- }
- $oldRowToUpdate[Piwik_Archive::INDEX_MAX_ACTIONS] = (float)max($newRowToAdd['max_actions'], $oldRowToUpdate[Piwik_Archive::INDEX_MAX_ACTIONS]);
- $oldRowToUpdate[Piwik_Archive::INDEX_SUM_VISIT_LENGTH] += $newRowToAdd['sum_visit_length'];
- $oldRowToUpdate[Piwik_Archive::INDEX_BOUNCE_COUNT] += $newRowToAdd['bounce_count'];
- $oldRowToUpdate[Piwik_Archive::INDEX_NB_VISITS_CONVERTED] += $newRowToAdd['nb_visits_converted'];
- return;
+ if($onlyMetricsAvailableInActionsTable)
+ {
+ return;
+ }
+ $oldRowToUpdate[Piwik_Archive::INDEX_MAX_ACTIONS] = (float)max($newRowToAdd['max_actions'], $oldRowToUpdate[Piwik_Archive::INDEX_MAX_ACTIONS]);
+ $oldRowToUpdate[Piwik_Archive::INDEX_SUM_VISIT_LENGTH] += $newRowToAdd['sum_visit_length'];
+ $oldRowToUpdate[Piwik_Archive::INDEX_BOUNCE_COUNT] += $newRowToAdd['bounce_count'];
+ $oldRowToUpdate[Piwik_Archive::INDEX_NB_VISITS_CONVERTED] += $newRowToAdd['nb_visits_converted'];
+ return;
}
+
if(!$doNotSumVisits)
{
$oldRowToUpdate[Piwik_Archive::INDEX_NB_UNIQ_VISITORS] += $newRowToAdd[Piwik_Archive::INDEX_NB_UNIQ_VISITORS];
$oldRowToUpdate[Piwik_Archive::INDEX_NB_VISITS] += $newRowToAdd[Piwik_Archive::INDEX_NB_VISITS];
}
$oldRowToUpdate[Piwik_Archive::INDEX_NB_ACTIONS] += $newRowToAdd[Piwik_Archive::INDEX_NB_ACTIONS];
-
+
// Hack for Price tracking on Ecommerce product/category pages
// The price is not summed, but AVG is taken in the SQL query
$index = Piwik_Archive::INDEX_ECOMMERCE_ITEM_PRICE_VIEWED;
@@ -915,18 +976,33 @@ public function updateInterestStats( $newRowToAdd, &$oldRowToUpdate, $onlyMetric
{
$oldRowToUpdate[$index] = (float)$newRowToAdd[$index];
}
-
- if($onlyMetricsAvailableInActionsTable)
- {
- return;
- }
+
+ if($onlyMetricsAvailableInActionsTable)
+ {
+ return;
+ }
$oldRowToUpdate[Piwik_Archive::INDEX_MAX_ACTIONS] = (float)max($newRowToAdd[Piwik_Archive::INDEX_MAX_ACTIONS], $oldRowToUpdate[Piwik_Archive::INDEX_MAX_ACTIONS]);
$oldRowToUpdate[Piwik_Archive::INDEX_SUM_VISIT_LENGTH] += $newRowToAdd[Piwik_Archive::INDEX_SUM_VISIT_LENGTH];
$oldRowToUpdate[Piwik_Archive::INDEX_BOUNCE_COUNT] += $newRowToAdd[Piwik_Archive::INDEX_BOUNCE_COUNT];
$oldRowToUpdate[Piwik_Archive::INDEX_NB_VISITS_CONVERTED] += $newRowToAdd[Piwik_Archive::INDEX_NB_VISITS_CONVERTED];
-
}
-
+
+ /**
+ * Adds the given row $newRowToAdd to the existing $oldRowToUpdate passed by reference
+ *
+ * The rows are php arrays Name => value
+ *
+ * @param array $newRowToAdd
+ * @param array $oldRowToUpdate
+ * @param bool $onlyMetricsAvailableInActionsTable
+ * @param bool $doNotSumVisits
+ * @return void
+ */
+ public function updateInterestStats( $newRowToAdd, &$oldRowToUpdate, $onlyMetricsAvailableInActionsTable = false, $doNotSumVisits = false)
+ {
+ $this->updateInterestStatsByRef($newRowToAdd, $oldRowToUpdate, $onlyMetricsAvailableInActionsTable, $doNotSumVisits);
+ }
+
/**
* Given an array of stats, it will process the sum of goal conversions
* and sum of revenue and add it in the stats array in two new fields.
View
145 plugins/Referers/Referers.php
@@ -4,7 +4,7 @@
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
- * @version $Id$
+ * @version $Id: Referers.php 7024 2012-09-19 10:53:53Z matt $
*
* @category Piwik_Plugins
* @package Piwik_Referers
@@ -355,7 +355,7 @@ static public function getCleanKeyword($label)
? self::getKeywordNotDefinedString()
: $label;
}
-
+
/**
* Hooks on daily archive to trigger various log processing
*
@@ -369,14 +369,14 @@ public function archiveDay( $notification )
*/
$this->archiveProcessing = $notification->getNotificationObject();
if(!$this->archiveProcessing->shouldProcessReportsForPlugin($this->getPluginName())) return;
-
+
$this->archiveDayAggregateVisits($this->archiveProcessing);
$this->archiveDayAggregateGoals($this->archiveProcessing);
Piwik_PostEvent('Referers.archiveDay', $this);
$this->archiveDayRecordInDatabase($this->archiveProcessing);
$this->cleanup();
}
-
+
protected function cleanup()
{
destroy($this->interestBySearchEngine);
@@ -401,8 +401,16 @@ protected function cleanup()
*/
protected function archiveDayAggregateVisits(Piwik_ArchiveProcessing_Day $archiveProcessing)
{
- $dimension = array("referer_type", "referer_name", "referer_keyword", "referer_url");
- $query = $archiveProcessing->queryVisitsByDimension($dimension);
+ $query = $archiveProcessing->getPlainVisitsCursor(
+ array(
+ 'referer_type',
+ 'referer_name',
+ 'referer_keyword',
+ 'referer_url',
+ 'idvisitor'
+ ),
+ array('idvisit' => 'asc') // order by visit identifier. we're haven't index in DB for sorting by time of a first visit
+ );
$this->interestBySearchEngine =
$this->interestByKeyword =
@@ -414,70 +422,113 @@ protected function archiveDayAggregateVisits(Piwik_ArchiveProcessing_Day $archiv
$this->interestByCampaign =
$this->interestByType =
$this->distinctUrls = array();
- while($row = $query->fetch() )
+
+ /* storage for calculated visitors */
+ $visitors = array();
+
+ while (($row = $query->fetch()) !== false)
{
- if(empty($row['referer_type']))
+ /* Create a new variable is more fast than redoing search in hash */
+ $idvisitor = $row['idvisitor'];
+
+ /* Don't need transfer this constants from database */
+ $row[Piwik_Archive::INDEX_NB_VISITS] = 1;
+ $row[Piwik_Archive::INDEX_NB_UNIQ_VISITORS] = isset($visitors[$idvisitor]) ? 0 : 1;
+ $row[Piwik_Archive::INDEX_MAX_ACTIONS] = $row[Piwik_Archive::INDEX_NB_ACTIONS];
+
+ $visitors[$idvisitor] = true;
+
+ if (empty($row['referer_type']))
{
$row['referer_type'] = Piwik_Common::REFERER_TYPE_DIRECT_ENTRY;
}
else
{
- switch($row['referer_type'])
+ switch ($row['referer_type'])
{
case Piwik_Common::REFERER_TYPE_SEARCH_ENGINE:
- if(empty($row['referer_keyword']))
- {
+ if (empty($row['referer_keyword']))
$row['referer_keyword'] = self::LABEL_KEYWORD_NOT_DEFINED;
- }
- if(!isset($this->interestBySearchEngine[$row['referer_name']])) $this->interestBySearchEngine[$row['referer_name']]= $archiveProcessing->getNewInterestRow();
- if(!isset($this->interestByKeyword[$row['referer_keyword']])) $this->interestByKeyword[$row['referer_keyword']]= $archiveProcessing->getNewInterestRow();
- if(!isset($this->interestBySearchEngineAndKeyword[$row['referer_name']][$row['referer_keyword']])) $this->interestBySearchEngineAndKeyword[$row['referer_name']][$row['referer_keyword']]= $archiveProcessing->getNewInterestRow();
- if(!isset($this->interestByKeywordAndSearchEngine[$row['referer_keyword']][$row['referer_name']])) $this->interestByKeywordAndSearchEngine[$row['referer_keyword']][$row['referer_name']]= $archiveProcessing->getNewInterestRow();
-
- $archiveProcessing->updateInterestStats( $row, $this->interestBySearchEngine[$row['referer_name']]);
- $archiveProcessing->updateInterestStats( $row, $this->interestByKeyword[$row['referer_keyword']]);
- $archiveProcessing->updateInterestStats( $row, $this->interestBySearchEngineAndKeyword[$row['referer_name']][$row['referer_keyword']]);
- $archiveProcessing->updateInterestStats( $row, $this->interestByKeywordAndSearchEngine[$row['referer_keyword']][$row['referer_name']]);
- break;
-
+
+ $refererName = $row['referer_name'];
+ $refererKeyword = $row['referer_keyword'];
+
+ if (!isset($this->interestBySearchEngine[$refererName]))
+ $this->interestBySearchEngine[$refererName] = $archiveProcessing->getNewInterestRow();
+
+ if (!isset($this->interestByKeyword[$refererKeyword]))
+ $this->interestByKeyword[$refererKeyword] = $archiveProcessing->getNewInterestRow();
+
+ if (!isset($this->interestBySearchEngineAndKeyword[$refererName][$refererKeyword]))
+ $this->interestBySearchEngineAndKeyword[$refererName][$refererKeyword] = $archiveProcessing->getNewInterestRow();
+
+ if (!isset($this->interestByKeywordAndSearchEngine[$refererKeyword][$refererName]))
+ $this->interestByKeywordAndSearchEngine[$refererKeyword][$refererName] = $archiveProcessing->getNewInterestRow();
+
+ $archiveProcessing->updateInterestStatsByRef($row, $this->interestBySearchEngine[$refererName]);
+ $archiveProcessing->updateInterestStatsByRef($row, $this->interestByKeyword[$refererKeyword]);
+ $archiveProcessing->updateInterestStatsByRef($row, $this->interestBySearchEngineAndKeyword[$refererName][$refererKeyword]);
+ $archiveProcessing->updateInterestStatsByRef($row, $this->interestByKeywordAndSearchEngine[$refererKeyword][$refererName]);
+
+ break;
+
case Piwik_Common::REFERER_TYPE_WEBSITE:
-
- if(!isset($this->interestByWebsite[$row['referer_name']])) $this->interestByWebsite[$row['referer_name']]= $archiveProcessing->getNewInterestRow();
- $archiveProcessing->updateInterestStats( $row, $this->interestByWebsite[$row['referer_name']]);
-
- if(!isset($this->interestByWebsiteAndUrl[$row['referer_name']][$row['referer_url']])) $this->interestByWebsiteAndUrl[$row['referer_name']][$row['referer_url']]= $archiveProcessing->getNewInterestRow();
- $archiveProcessing->updateInterestStats( $row, $this->interestByWebsiteAndUrl[$row['referer_name']][$row['referer_url']]);
-
- if(!isset($this->distinctUrls[$row['referer_url']]))
- {
- $this->distinctUrls[$row['referer_url']] = true;
- }
- break;
-
+ $refererName = $row['referer_name'];
+ $refererUrl = $row['referer_url'];
+
+ if (!isset($this->distinctUrls[$refererUrl]))
+ $this->distinctUrls[$refererUrl] = true;
+
+ if (!isset($this->interestByWebsite[$refererName]))
+ $this->interestByWebsite[$refererName] = $archiveProcessing->getNewInterestRow();
+
+ if (!isset($this->interestByWebsiteAndUrl[$refererName][$refererUrl]))
+ $this->interestByWebsiteAndUrl[$refererName][$refererUrl] = $archiveProcessing->getNewInterestRow();
+
+ $archiveProcessing->updateInterestStatsByRef($row, $this->interestByWebsite[$refererName]);
+ $archiveProcessing->updateInterestStatsByRef($row, $this->interestByWebsiteAndUrl[$refererName][$refererUrl]);
+
+ break;
+
case Piwik_Common::REFERER_TYPE_CAMPAIGN:
- if(!empty($row['referer_keyword']))
+ $refererName = $row['referer_name'];
+
+ if (!isset($this->interestByCampaign[$refererName]))
+ $this->interestByCampaign[$refererName] = $archiveProcessing->getNewInterestRow();
+
+ $archiveProcessing->updateInterestStatsByRef($row, $this->interestByCampaign[$refererName]);
+
+ if (!empty($row['referer_keyword']))
{
- if(!isset($this->interestByCampaignAndKeyword[$row['referer_name']][$row['referer_keyword']])) $this->interestByCampaignAndKeyword[$row['referer_name']][$row['referer_keyword']]= $archiveProcessing->getNewInterestRow();
- $archiveProcessing->updateInterestStats( $row, $this->interestByCampaignAndKeyword[$row['referer_name']][$row['referer_keyword']]);
+ $refererKeyword = $row['referer_keyword'];
+
+ if (!isset($this->interestByCampaignAndKeyword[$refererName][$refererKeyword]))
+ $this->interestByCampaignAndKeyword[$refererName][$refererKeyword] = $archiveProcessing->getNewInterestRow();
+
+ $archiveProcessing->updateInterestStatsByRef($row, $this->interestByCampaignAndKeyword[$refererName][$refererKeyword]);
}
- if(!isset($this->interestByCampaign[$row['referer_name']])) $this->interestByCampaign[$row['referer_name']]= $archiveProcessing->getNewInterestRow();
- $archiveProcessing->updateInterestStats( $row, $this->interestByCampaign[$row['referer_name']]);
- break;
-
+
+ break;
+
case Piwik_Common::REFERER_TYPE_DIRECT_ENTRY:
// direct entry are aggregated below in $this->interestByType array
break;
-
+
default:
throw new Exception("Non expected referer_type = " . $row['referer_type']);
break;
}
}
- if(!isset($this->interestByType[$row['referer_type']] )) $this->interestByType[$row['referer_type']] = $archiveProcessing->getNewInterestRow();
- $archiveProcessing->updateInterestStats($row, $this->interestByType[$row['referer_type']]);
+
+ $refererType = $row['referer_type'];
+
+ if (!isset($this->interestByType[$refererType]))
+ $this->interestByType[$refererType] = $archiveProcessing->getNewInterestRow();
+
+ $archiveProcessing->updateInterestStatsByRef($row, $this->interestByType[$refererType]);
}
}
-
+
/**
* Daily Goal archiving: processes reports of Goal conversions by Keyword,
* Goal conversions by Referer Websites, etc.
Something went wrong with that request. Please try again.