In [2]:
import pandas as pd
import plotly.express as px
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import chart_studio.plotly as py
import plotly.offline as pyoff
import plotly.graph_objs as go
from plotly.subplots import make_subplots

In [85]:
pd.set_option('display.max_columns', None)

In [181]:
orders = pd.read_csv('./Data/orders_export.csv')


Columns (31,41,44,69,70) have mixed types.Specify dtype option on import or set low_memory=False.



In [182]:
orders['Financial Status'].unique()

array(['paid', nan, 'partially_refunded', 'refunded', 'pending'],
      dtype=object)

In [183]:
orders_1 = pd.read_csv('./Data/orders_export_1.csv')
orders_2 = pd.read_csv('./Data/orders_export_2.csv')

In [184]:
orders_com = orders_1.append(orders_2, ignore_index=True)

In [185]:
# orders = orders[~orders['Paid at'].isnull()]
# orders_com = orders_com[~orders_com['Paid at'].isnull()]

In [186]:
orders_com['Financial Status'].unique()

array(['paid', nan, 'refunded', 'partially_refunded', 'pending', 'voided'],
      dtype=object)

In [188]:
orders['Order_Date'] = pd.to_datetime(orders['Created at'], format="%Y-%m-%d %H:%M:%S %z", utc=True)
orders['Date'] = pd.to_datetime(orders['Order_Date'].dt.date)
orders['Date_YM'] = orders['Date'].apply(lambda x: x.strftime('%Y-%m'))

In [189]:
orders_com['Order_Date'] = pd.to_datetime(orders_com['Created at'], format="%Y-%m-%d %H:%M:%S %z", utc=True)
orders_com['Date'] = pd.to_datetime(orders_com['Order_Date'].dt.date)
orders_com['Date_YM'] = orders_com['Date'].apply(lambda x: x.strftime('%Y-%m'))

In [190]:
summary_de = orders.groupby(['Date_YM', 'Billing Country']).agg({'Subtotal':pd.Series.sum, 'Shipping':pd.Series.sum, 'Taxes':pd.Series.sum, 'Total':pd.Series.sum, 'Discount Amount':pd.Series.sum, 'Refunded Amount':pd.Series.sum}).reset_index()
summary_com = orders_com.groupby(['Date_YM', 'Billing Country']).agg({'Subtotal':pd.Series.sum, 'Shipping':pd.Series.sum, 'Taxes':pd.Series.sum, 'Total':pd.Series.sum, 'Discount Amount':pd.Series.sum, 'Refunded Amount':pd.Series.sum}).reset_index()

In [191]:
summary_de['TO_EXSHIP_EXVAT'] = summary_de['Subtotal'] - summary_de['Taxes']
summary_com['TO_EXSHIP_EXVAT'] = summary_com['Subtotal'] - summary_com['Taxes']

In [194]:
summary_de['TO_EXSHIP_EXVAT_EXRET'] = summary_de['TO_EXSHIP_EXVAT'] - summary_de['Refunded Amount']
summary_com['TO_EXSHIP_EXVAT_EXRET'] = summary_com['TO_EXSHIP_EXVAT'] - summary_com['Refunded Amount']

In [195]:
summary_de.to_clipboard(decimal=',', index=False)

In [196]:
summary_com.to_clipboard(decimal=',', index=False)

### GA Transactions

In [107]:
ga = pd.read_excel('./Data/ga_koi_dach.xlsx')

In [125]:
ga['Transaction ID'] = ga['Transaction ID'].astype(str)

In [127]:
ga['tx_id_length'] = ga['Transaction ID'].apply(lambda x: len(x))

In [130]:
ga.tx_id_length.unique()

array([8, 4, 9, 5], dtype=int64)

In [138]:
ga['transaction_id'] = ""

In [144]:
mask1 = (ga.tx_id_length==4)
mask2 = (ga.tx_id_length==5)
mask3 = (ga.tx_id_length==8)
mask4 = (ga.tx_id_length==9)

In [148]:
ga.loc[mask1,['transaction_id']] = "2000" + ga['Transaction ID']
ga.loc[mask2,['transaction_id']] = "1000" + ga['Transaction ID']
ga.loc[mask3,['transaction_id']] = ga['Transaction ID']
ga.loc[mask4,['transaction_id']] = ga['Transaction ID']

In [180]:
ga.Revenue.sum()

986853.5900000001

In [256]:
orders['Name'] = orders['Name'].apply(lambda x: str(x))
orders_com['Name'] = orders_com['Name'].apply(lambda x: str(x))

In [257]:
all_orders = orders.append(orders_com)

In [304]:
merge = all_orders.loc[:,['Name','Date','Billing Country','Subtotal','Shipping','Taxes','Total','Refunded Amount']]

In [305]:
merge.columns = ['transaction_id','Date','Billing_Country','Subtotal','Shipping','Taxes','Total','Refunded_Amount']

In [306]:
merge.columns = merge.columns.str.lower()

In [307]:
ga.columns = ga.columns.str.lower()

In [308]:
merge = merge.set_index('transaction_id')

In [309]:
merge = merge.add_prefix('be_')

In [310]:
merge = merge.reset_index()

In [311]:
merge

Unnamed: 0,transaction_id,be_date,be_billing_country,be_subtotal,be_shipping,be_taxes,be_total,be_refunded_amount
0,20005269,2021-02-23,DE,139.99,5.50,25.25,145.49,0.0
1,20005268,2021-02-23,DE,119.99,5.50,21.77,125.49,0.0
2,20005267,2021-02-23,DE,104.98,5.50,19.17,110.48,0.0
3,20005267,2021-02-23,,,,,,
4,20005266,2021-02-23,DE,219.98,0.00,38.17,219.98,0.0
...,...,...,...,...,...,...,...,...
51851,KOI1005,2018-08-16,NL,399.76,0.00,69.38,399.76,0.0
51852,KOI1004,2018-08-14,NL,49.97,3.95,8.67,53.92,0.0
51853,KOI1003,2018-08-14,NL,184.92,0.00,32.09,184.92,0.0
51854,KOI1003,2018-08-14,,,,,,


In [312]:
merge['transaction_id'] = merge['transaction_id'].apply(lambda x: str(x))
ga['transaction_id'] = ga['transaction_id'].apply(lambda x: str(x))

In [313]:
ga_df = ga.merge(merge,on='transaction_id', how='left')

In [314]:
ga_df = ga_df[~ga_df.be_subtotal.isnull()]

In [315]:
ga_df['date_ym'] = ga_df['be_date'].apply(lambda x: x.strftime('%Y-%m'))

In [316]:
ga_df

Unnamed: 0,transaction id,revenue,tax,delivery,refund amount,quantity,tx_id_length,transaction_id,be_date,be_billing_country,be_subtotal,be_shipping,be_taxes,be_total,be_refunded_amount,date_ym
0,20001980,1361.80,236.34,0.0,0,20,8,20001980,2020-03-01,DE,1361.80,0.0,236.34,1361.80,962.87,2020-03
18,2382,1259.73,218.61,0.0,0,15,4,20002382,2020-05-02,DE,419.91,0.0,72.87,419.91,0.00,2020-05
23,3290,1194.90,207.38,0.0,0,12,4,20003290,2020-09-01,DE,1194.90,0.0,207.38,1194.90,1059.90,2020-09
32,2889,1189.75,206.48,0.0,0,11,4,20002889,2020-07-25,DE,1189.75,0.0,206.48,1189.75,409.96,2020-07
43,3815,1139.92,197.83,0.0,0,8,4,20003815,2020-10-14,DE,1139.92,0.0,197.83,1139.92,309.98,2020-10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11589,16868,7.49,0.35,5.5,0,1,5,100016868,2020-08-11,DE,1.99,5.5,0.35,7.49,0.00,2020-08
11590,20002315,5.50,0.00,5.5,0,1,8,20002315,2020-04-23,DE,0.00,5.5,0.00,5.50,0.00,2020-04
11591,20002342,5.50,0.00,5.5,0,1,8,20002342,2020-04-27,DE,0.00,5.5,0.00,5.50,0.00,2020-04
11592,2315,5.50,0.00,5.5,0,1,4,20002315,2020-04-23,DE,0.00,5.5,0.00,5.50,0.00,2020-04


In [317]:
ga_summary = ga_df.groupby(['date_ym','be_billing_country']).agg({'revenue':pd.Series.sum, 'tax':pd.Series.sum, 'be_subtotal':pd.Series.sum, 'be_shipping':pd.Series.sum, 'be_taxes':pd.Series.sum, 'be_total':pd.Series.sum, 'be_refunded_amount':pd.Series.sum}).reset_index()

In [321]:
ga_summary[~ga_summary.be_billing_country.isin(['AT','CH','DE'])].sum()

date_ym               2020-012020-012020-012020-012020-022020-022020...
be_billing_country    ESFRGBNLBEFRGBNLROBEFRNLROUSFRCZFRNLUSLINLUSFR...
revenue                                                         9452.24
tax                                                             1573.76
be_subtotal                                                     8901.79
be_shipping                                                      415.00
be_taxes                                                        1551.21
be_total                                                        9316.79
be_refunded_amount                                              1533.65
dtype: object