Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#3517 / Fix Referer plugin to correct calculating of visitors uniqueness #12

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 102 additions & 26 deletions core/ArchiveProcessing/Day.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
* @version $Id$
* @version $Id: Day.php 7206 2012-10-15 15:11:34Z matt $
*
* @category Piwik
* @package Piwik
Expand Down Expand Up @@ -273,7 +273,67 @@ public function queryVisitsSimple($select, $orderBy = false, $groupByCols = fals
return $this->db->fetchAll($query['sql'], $query['bind']);
}
}


/**
* Performs a simple query on the log_visit table within the time range this archive
* represents. Returns opened query
*
* @param string $select The SELECT clause.
* @param string|bool $orderBy The ORDER BY clause (without the 'ORDER BY' part). Set to
* false to specify no ORDER BY.
* @param array|bool $groupByCols An array of column names to group by. Set to false to
* specify no GROUP BY.
* @param bool $oneResultRow Whether only one row is expected or not. If set to true,
* this function returns one row, if false, an array of rows.
* @return array
*/
public function getPlainVisitsCursor($additionFields, $orderBy = false, $groupByCols = false)
{
$select = array(
'`log_visit`.`visit_total_actions` as `'.Piwik_Archive::INDEX_NB_ACTIONS.'`',
'`log_visit`.`visit_total_time` as `'.Piwik_Archive::INDEX_SUM_VISIT_LENGTH.'`',
'`log_visit`.`visit_total_actions` < 2 as `'.Piwik_Archive::INDEX_BOUNCE_COUNT.'`',
'`log_visit`.`visit_goal_converted` as `'.Piwik_Archive::INDEX_NB_VISITS_CONVERTED.'`'
);

if(is_array($additionFields) && $additionFields)
{
foreach($additionFields as &$field)
{
$field = '`log_visit`.`'.$field.'` as `'.$field.'`';
}
$select = array_merge($select, $additionFields);
}

$select = implode(",", $select);

if ($orderBy && is_array($orderBy))
{
$orderByConcat = array();
foreach ($orderBy as $field => $direction) {
$orderByConcat[] = '`log_visit`.`'.$field.'` '.$direction;
}
$orderBy = implode(',', $orderByConcat);
}

$from = "log_visit";
$where = "`log_visit`.`visit_last_action_time` >= ?
and `log_visit`.`visit_last_action_time` <= ?
and `log_visit`.`idsite` = ?";

$groupBy = false;
if ($groupByCols and !empty($groupByCols))
{
$groupBy = implode(',', $groupByCols);
}

$bind = array($this->getStartDatetimeUTC(), $this->getEndDatetimeUTC(), $this->idsite);

$query = $this->getSegment()->getSelectQuery($select, $from, $where, $bind, $orderBy, $groupBy);

return $this->db->query($query['sql'], $query['bind']);
}

/**
* Helper function that returns a DataTable containing the $select fields / value pairs.
* IMPORTANT: The $select must return only one row!!
Expand Down Expand Up @@ -869,64 +929,80 @@ public function getNewInterestRowLabeled( $label )
}

/**
* Adds the given row $newRowToAdd to the existing $oldRowToUpdate passed by reference
* Adds the given row $newRowToAdd to the existing $oldRowToUpdate. Both passed by reference for better performance
*
* The rows are php arrays Name => value
*
* @param array $newRowToAdd
* @param array $oldRowToUpdate
* @param array &$newRowToAdd
* @param array &$oldRowToUpdate
* @param bool $onlyMetricsAvailableInActionsTable
* @param bool $doNotSumVisits
* @return
* @return void
*/
public function updateInterestStats( $newRowToAdd, &$oldRowToUpdate, $onlyMetricsAvailableInActionsTable = false, $doNotSumVisits = false)
public function updateInterestStatsByRef(&$newRowToAdd, &$oldRowToUpdate, $onlyMetricsAvailableInActionsTable = false, $doNotSumVisits = false)
{
// Pre 1.2 format: string indexed rows are returned from the DB
// Left here for Backward compatibility with plugins doing custom SQL queries using these metrics as string
if(!isset($newRowToAdd[Piwik_Archive::INDEX_NB_VISITS]))
{
if(!$doNotSumVisits)
{
$oldRowToUpdate[Piwik_Archive::INDEX_NB_UNIQ_VISITORS] += $newRowToAdd['nb_uniq_visitors'];
$oldRowToUpdate[Piwik_Archive::INDEX_NB_VISITS] += $newRowToAdd['nb_visits'];
$oldRowToUpdate[Piwik_Archive::INDEX_NB_UNIQ_VISITORS] += $newRowToAdd['nb_uniq_visitors'];
$oldRowToUpdate[Piwik_Archive::INDEX_NB_VISITS] += $newRowToAdd['nb_visits'];
}
$oldRowToUpdate[Piwik_Archive::INDEX_NB_ACTIONS] += $newRowToAdd['nb_actions'];
if($onlyMetricsAvailableInActionsTable)
{
return;
}
$oldRowToUpdate[Piwik_Archive::INDEX_MAX_ACTIONS] = (float)max($newRowToAdd['max_actions'], $oldRowToUpdate[Piwik_Archive::INDEX_MAX_ACTIONS]);
$oldRowToUpdate[Piwik_Archive::INDEX_SUM_VISIT_LENGTH] += $newRowToAdd['sum_visit_length'];
$oldRowToUpdate[Piwik_Archive::INDEX_BOUNCE_COUNT] += $newRowToAdd['bounce_count'];
$oldRowToUpdate[Piwik_Archive::INDEX_NB_VISITS_CONVERTED] += $newRowToAdd['nb_visits_converted'];
return;
if($onlyMetricsAvailableInActionsTable)
{
return;
}
$oldRowToUpdate[Piwik_Archive::INDEX_MAX_ACTIONS] = (float)max($newRowToAdd['max_actions'], $oldRowToUpdate[Piwik_Archive::INDEX_MAX_ACTIONS]);
$oldRowToUpdate[Piwik_Archive::INDEX_SUM_VISIT_LENGTH] += $newRowToAdd['sum_visit_length'];
$oldRowToUpdate[Piwik_Archive::INDEX_BOUNCE_COUNT] += $newRowToAdd['bounce_count'];
$oldRowToUpdate[Piwik_Archive::INDEX_NB_VISITS_CONVERTED] += $newRowToAdd['nb_visits_converted'];
return;
}

if(!$doNotSumVisits)
{
$oldRowToUpdate[Piwik_Archive::INDEX_NB_UNIQ_VISITORS] += $newRowToAdd[Piwik_Archive::INDEX_NB_UNIQ_VISITORS];
$oldRowToUpdate[Piwik_Archive::INDEX_NB_VISITS] += $newRowToAdd[Piwik_Archive::INDEX_NB_VISITS];
}
$oldRowToUpdate[Piwik_Archive::INDEX_NB_ACTIONS] += $newRowToAdd[Piwik_Archive::INDEX_NB_ACTIONS];

// Hack for Price tracking on Ecommerce product/category pages
// The price is not summed, but AVG is taken in the SQL query
$index = Piwik_Archive::INDEX_ECOMMERCE_ITEM_PRICE_VIEWED;
if(!empty($newRowToAdd[$index]))
{
$oldRowToUpdate[$index] = (float)$newRowToAdd[$index];
}
if($onlyMetricsAvailableInActionsTable)
{
return;
}

if($onlyMetricsAvailableInActionsTable)
{
return;
}
$oldRowToUpdate[Piwik_Archive::INDEX_MAX_ACTIONS] = (float)max($newRowToAdd[Piwik_Archive::INDEX_MAX_ACTIONS], $oldRowToUpdate[Piwik_Archive::INDEX_MAX_ACTIONS]);
$oldRowToUpdate[Piwik_Archive::INDEX_SUM_VISIT_LENGTH] += $newRowToAdd[Piwik_Archive::INDEX_SUM_VISIT_LENGTH];
$oldRowToUpdate[Piwik_Archive::INDEX_BOUNCE_COUNT] += $newRowToAdd[Piwik_Archive::INDEX_BOUNCE_COUNT];
$oldRowToUpdate[Piwik_Archive::INDEX_NB_VISITS_CONVERTED] += $newRowToAdd[Piwik_Archive::INDEX_NB_VISITS_CONVERTED];

}


/**
* Adds the given row $newRowToAdd to the existing $oldRowToUpdate passed by reference
*
* The rows are php arrays Name => value
*
* @param array $newRowToAdd
* @param array $oldRowToUpdate
* @param bool $onlyMetricsAvailableInActionsTable
* @param bool $doNotSumVisits
* @return void
*/
public function updateInterestStats( $newRowToAdd, &$oldRowToUpdate, $onlyMetricsAvailableInActionsTable = false, $doNotSumVisits = false)
{
$this->updateInterestStatsByRef($newRowToAdd, $oldRowToUpdate, $onlyMetricsAvailableInActionsTable, $doNotSumVisits);
}

/**
* Given an array of stats, it will process the sum of goal conversions
* and sum of revenue and add it in the stats array in two new fields.
Expand Down
145 changes: 98 additions & 47 deletions plugins/Referers/Referers.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
*
* @link http://piwik.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
* @version $Id$
* @version $Id: Referers.php 7024 2012-09-19 10:53:53Z matt $
*
* @category Piwik_Plugins
* @package Piwik_Referers
Expand Down Expand Up @@ -355,7 +355,7 @@ static public function getCleanKeyword($label)
? self::getKeywordNotDefinedString()
: $label;
}

/**
* Hooks on daily archive to trigger various log processing
*
Expand All @@ -369,14 +369,14 @@ public function archiveDay( $notification )
*/
$this->archiveProcessing = $notification->getNotificationObject();
if(!$this->archiveProcessing->shouldProcessReportsForPlugin($this->getPluginName())) return;

$this->archiveDayAggregateVisits($this->archiveProcessing);
$this->archiveDayAggregateGoals($this->archiveProcessing);
Piwik_PostEvent('Referers.archiveDay', $this);
$this->archiveDayRecordInDatabase($this->archiveProcessing);
$this->cleanup();
}

protected function cleanup()
{
destroy($this->interestBySearchEngine);
Expand All @@ -401,8 +401,16 @@ protected function cleanup()
*/
protected function archiveDayAggregateVisits(Piwik_ArchiveProcessing_Day $archiveProcessing)
{
$dimension = array("referer_type", "referer_name", "referer_keyword", "referer_url");
$query = $archiveProcessing->queryVisitsByDimension($dimension);
$query = $archiveProcessing->getPlainVisitsCursor(
array(
'referer_type',
'referer_name',
'referer_keyword',
'referer_url',
'idvisitor'
),
array('idvisit' => 'asc') // order by visit identifier. we're haven't index in DB for sorting by time of a first visit
);

$this->interestBySearchEngine =
$this->interestByKeyword =
Expand All @@ -414,70 +422,113 @@ protected function archiveDayAggregateVisits(Piwik_ArchiveProcessing_Day $archiv
$this->interestByCampaign =
$this->interestByType =
$this->distinctUrls = array();
while($row = $query->fetch() )

/* storage for calculated visitors */
$visitors = array();

while (($row = $query->fetch()) !== false)
{
if(empty($row['referer_type']))
/* Create a new variable is more fast than redoing search in hash */
$idvisitor = $row['idvisitor'];

/* Don't need transfer this constants from database */
$row[Piwik_Archive::INDEX_NB_VISITS] = 1;
$row[Piwik_Archive::INDEX_NB_UNIQ_VISITORS] = isset($visitors[$idvisitor]) ? 0 : 1;
$row[Piwik_Archive::INDEX_MAX_ACTIONS] = $row[Piwik_Archive::INDEX_NB_ACTIONS];

$visitors[$idvisitor] = true;

if (empty($row['referer_type']))
{
$row['referer_type'] = Piwik_Common::REFERER_TYPE_DIRECT_ENTRY;
}
else
{
switch($row['referer_type'])
switch ($row['referer_type'])
{
case Piwik_Common::REFERER_TYPE_SEARCH_ENGINE:
if(empty($row['referer_keyword']))
{
if (empty($row['referer_keyword']))
$row['referer_keyword'] = self::LABEL_KEYWORD_NOT_DEFINED;
}
if(!isset($this->interestBySearchEngine[$row['referer_name']])) $this->interestBySearchEngine[$row['referer_name']]= $archiveProcessing->getNewInterestRow();
if(!isset($this->interestByKeyword[$row['referer_keyword']])) $this->interestByKeyword[$row['referer_keyword']]= $archiveProcessing->getNewInterestRow();
if(!isset($this->interestBySearchEngineAndKeyword[$row['referer_name']][$row['referer_keyword']])) $this->interestBySearchEngineAndKeyword[$row['referer_name']][$row['referer_keyword']]= $archiveProcessing->getNewInterestRow();
if(!isset($this->interestByKeywordAndSearchEngine[$row['referer_keyword']][$row['referer_name']])) $this->interestByKeywordAndSearchEngine[$row['referer_keyword']][$row['referer_name']]= $archiveProcessing->getNewInterestRow();

$archiveProcessing->updateInterestStats( $row, $this->interestBySearchEngine[$row['referer_name']]);
$archiveProcessing->updateInterestStats( $row, $this->interestByKeyword[$row['referer_keyword']]);
$archiveProcessing->updateInterestStats( $row, $this->interestBySearchEngineAndKeyword[$row['referer_name']][$row['referer_keyword']]);
$archiveProcessing->updateInterestStats( $row, $this->interestByKeywordAndSearchEngine[$row['referer_keyword']][$row['referer_name']]);
break;


$refererName = $row['referer_name'];
$refererKeyword = $row['referer_keyword'];

if (!isset($this->interestBySearchEngine[$refererName]))
$this->interestBySearchEngine[$refererName] = $archiveProcessing->getNewInterestRow();

if (!isset($this->interestByKeyword[$refererKeyword]))
$this->interestByKeyword[$refererKeyword] = $archiveProcessing->getNewInterestRow();

if (!isset($this->interestBySearchEngineAndKeyword[$refererName][$refererKeyword]))
$this->interestBySearchEngineAndKeyword[$refererName][$refererKeyword] = $archiveProcessing->getNewInterestRow();

if (!isset($this->interestByKeywordAndSearchEngine[$refererKeyword][$refererName]))
$this->interestByKeywordAndSearchEngine[$refererKeyword][$refererName] = $archiveProcessing->getNewInterestRow();

$archiveProcessing->updateInterestStatsByRef($row, $this->interestBySearchEngine[$refererName]);
$archiveProcessing->updateInterestStatsByRef($row, $this->interestByKeyword[$refererKeyword]);
$archiveProcessing->updateInterestStatsByRef($row, $this->interestBySearchEngineAndKeyword[$refererName][$refererKeyword]);
$archiveProcessing->updateInterestStatsByRef($row, $this->interestByKeywordAndSearchEngine[$refererKeyword][$refererName]);

break;

case Piwik_Common::REFERER_TYPE_WEBSITE:

if(!isset($this->interestByWebsite[$row['referer_name']])) $this->interestByWebsite[$row['referer_name']]= $archiveProcessing->getNewInterestRow();
$archiveProcessing->updateInterestStats( $row, $this->interestByWebsite[$row['referer_name']]);

if(!isset($this->interestByWebsiteAndUrl[$row['referer_name']][$row['referer_url']])) $this->interestByWebsiteAndUrl[$row['referer_name']][$row['referer_url']]= $archiveProcessing->getNewInterestRow();
$archiveProcessing->updateInterestStats( $row, $this->interestByWebsiteAndUrl[$row['referer_name']][$row['referer_url']]);

if(!isset($this->distinctUrls[$row['referer_url']]))
{
$this->distinctUrls[$row['referer_url']] = true;
}
break;

$refererName = $row['referer_name'];
$refererUrl = $row['referer_url'];

if (!isset($this->distinctUrls[$refererUrl]))
$this->distinctUrls[$refererUrl] = true;

if (!isset($this->interestByWebsite[$refererName]))
$this->interestByWebsite[$refererName] = $archiveProcessing->getNewInterestRow();

if (!isset($this->interestByWebsiteAndUrl[$refererName][$refererUrl]))
$this->interestByWebsiteAndUrl[$refererName][$refererUrl] = $archiveProcessing->getNewInterestRow();

$archiveProcessing->updateInterestStatsByRef($row, $this->interestByWebsite[$refererName]);
$archiveProcessing->updateInterestStatsByRef($row, $this->interestByWebsiteAndUrl[$refererName][$refererUrl]);

break;

case Piwik_Common::REFERER_TYPE_CAMPAIGN:
if(!empty($row['referer_keyword']))
$refererName = $row['referer_name'];

if (!isset($this->interestByCampaign[$refererName]))
$this->interestByCampaign[$refererName] = $archiveProcessing->getNewInterestRow();

$archiveProcessing->updateInterestStatsByRef($row, $this->interestByCampaign[$refererName]);

if (!empty($row['referer_keyword']))
{
if(!isset($this->interestByCampaignAndKeyword[$row['referer_name']][$row['referer_keyword']])) $this->interestByCampaignAndKeyword[$row['referer_name']][$row['referer_keyword']]= $archiveProcessing->getNewInterestRow();
$archiveProcessing->updateInterestStats( $row, $this->interestByCampaignAndKeyword[$row['referer_name']][$row['referer_keyword']]);
$refererKeyword = $row['referer_keyword'];

if (!isset($this->interestByCampaignAndKeyword[$refererName][$refererKeyword]))
$this->interestByCampaignAndKeyword[$refererName][$refererKeyword] = $archiveProcessing->getNewInterestRow();

$archiveProcessing->updateInterestStatsByRef($row, $this->interestByCampaignAndKeyword[$refererName][$refererKeyword]);
}
if(!isset($this->interestByCampaign[$row['referer_name']])) $this->interestByCampaign[$row['referer_name']]= $archiveProcessing->getNewInterestRow();
$archiveProcessing->updateInterestStats( $row, $this->interestByCampaign[$row['referer_name']]);
break;


break;

case Piwik_Common::REFERER_TYPE_DIRECT_ENTRY:
// direct entry are aggregated below in $this->interestByType array
break;

default:
throw new Exception("Non expected referer_type = " . $row['referer_type']);
break;
}
}
if(!isset($this->interestByType[$row['referer_type']] )) $this->interestByType[$row['referer_type']] = $archiveProcessing->getNewInterestRow();
$archiveProcessing->updateInterestStats($row, $this->interestByType[$row['referer_type']]);

$refererType = $row['referer_type'];

if (!isset($this->interestByType[$refererType]))
$this->interestByType[$refererType] = $archiveProcessing->getNewInterestRow();

$archiveProcessing->updateInterestStatsByRef($row, $this->interestByType[$refererType]);
}
}

/**
* Daily Goal archiving: processes reports of Goal conversions by Keyword,
* Goal conversions by Referer Websites, etc.
Expand Down