In [2]:
import pandas as pd
import numpy as np

In [4]:
#Read data, grab only the first table, not the date table
ua_file = pd.read_csv('Acura_UA_Pages.csv', header = 6, thousands=',')
n_values = np.logical_not(ua_file.isnull()).sum(axis=1)
cut_off = n_values[n_values == 0].index[0]
ua_file = ua_file[:cut_off -1]
ua_file['Users'] = ua_file['Users'].str.replace(',','')
g4_file = pd.read_csv('Acura_GA4_Pages.csv', header = 6, thousands=',')
g4_file = g4_file[['Page path + query string', 'Total users', 'Sessions']]
g4_file.rename(columns={'Page path + query string': 'Page', 'Total users': 'Users'}, inplace = True)

#Add data source column

ua_file['data_source'] = 'UA'
g4_file['data_source'] = 'G4'

#Append data, reset index
data = ua_file.append(g4_file).reset_index()
data.drop(columns={'index'}, inplace = True)
data

Unnamed: 0,Page,Users,Sessions,data_source
0,/,28273,32604.0,UA
1,/buildyouracura,19486,8758.0,UA
2,/mdx,16009,13302.0,UA
3,/buildyouracura/colours,15318,96.0,UA
4,/rdx,12694,9230.0,UA
...,...,...,...,...
52256,/mdx?gclid=EAIaIQobChMIsbTixt-98gIVyR-tBh2pnAa...,1,1.0,G4
52257,/mdx?gclid=EAIaIQobChMIsba-5-y_8gIVuz6tBh0Zkgv...,1,1.0,G4
52258,/mdx?gclid=EAIaIQobChMIsfDctsHD8gIV2CCtBh3Rjwc...,1,1.0,G4
52259,/mdx?gclid=EAIaIQobChMIsfKcsK7A8gIVUBmtBh1Z2Qc...,1,0.0,G4


In [5]:
#Create pivot table
piv = pd.pivot_table(data, values = ['Users', 'Sessions'], index = 'Page', columns = 'data_source', aggfunc = {'Users':sum, 'Sessions':sum})
piv.columns = [c[1] + ' ' +  c[0] for c in piv.columns]
piv['G4 Users'] = piv['G4 Users'].astype('float')
piv['UA Users'] = piv['UA Users'].astype('float')

#Add Difference, Variance Columns
piv['Session Difference'] = piv['G4 Sessions'] - piv['UA Sessions']
piv['Session Variance (%)'] = (piv['G4 Sessions'] - piv['UA Sessions'])/ piv['UA Sessions']
piv['Users Difference'] = piv['G4 Users'] - piv['UA Users']
piv['Users Variance (%)'] = (piv['G4 Users'] - piv['UA Users'])/ piv['UA Users']
piv = piv[[ 'G4 Users', 'UA Users', 'Users Difference', 'Users Variance (%)', 'G4 Sessions', 'UA Sessions', 'Session Difference', 'Session Variance (%)']]
piv.sort_values(by = ['G4 Users'], ascending = False, inplace = True)

In [6]:
piv

Unnamed: 0_level_0,G4 Users,UA Users,Users Difference,Users Variance (%),G4 Sessions,UA Sessions,Session Difference,Session Variance (%)
Page,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
/,20457.0,28273.0,-7816.0,-0.276447,21883.0,32604.0,-10721.0,-0.328825
/buildyouracura/colours,15621.0,15318.0,303.0,0.019781,27.0,96.0,-69.0,-0.718750
/buildyouracura/accessories,12710.0,12551.0,159.0,0.012668,26.0,32.0,-6.0,-0.187500
/buildyouracura/trims,12634.0,12398.0,236.0,0.019035,48.0,502.0,-454.0,-0.904382
/buildyouracura/models,11996.0,11888.0,108.0,0.009085,71.0,73.0,-2.0,-0.027397
...,...,...,...,...,...,...,...,...
/zh/zh/zh/mdx/accessories,,1.0,,,,0.0,,
/zh/zh/zh/newsdetails/nca/en/news/release/Acura-NSX-GT3-Racecar-Prepares-for-Competition,,1.0,,,,1.0,,
/zh/zh/zh/sitemap,,1.0,,,,0.0,,
/zh/zh/zh/tlx/packages,,1.0,,,,0.0,,


In [7]:
piv.to_excel('Acura_Pivot_Page.xlsx')