In [1]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
df = pd.read_csv('Sales Transaction v.4a.csv')

In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 536350 entries, 0 to 536349
Data columns (total 8 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   TransactionNo  536350 non-null  object 
 1   Date           536350 non-null  object 
 2   ProductNo      536350 non-null  object 
 3   ProductName    536350 non-null  object 
 4   Price          536350 non-null  float64
 5   Quantity       536350 non-null  int64  
 6   CustomerNo     536295 non-null  float64
 7   Country        536350 non-null  object 
dtypes: float64(2), int64(1), object(5)
memory usage: 32.7+ MB


In [3]:
df['Date'] = pd.to_datetime(df['Date'])

In [4]:
df

Unnamed: 0,TransactionNo,Date,ProductNo,ProductName,Price,Quantity,CustomerNo,Country
0,581482,2019-12-09,22485,Set Of 2 Wooden Market Crates,21.47,12,17490.0,United Kingdom
1,581475,2019-12-09,22596,Christmas Star Wish List Chalkboard,10.65,36,13069.0,United Kingdom
2,581475,2019-12-09,23235,Storage Tin Vintage Leaf,11.53,12,13069.0,United Kingdom
3,581475,2019-12-09,23272,Tree T-Light Holder Willie Winkie,10.65,12,13069.0,United Kingdom
4,581475,2019-12-09,23239,Set Of 4 Knick Knack Tins Poppies,11.94,6,13069.0,United Kingdom
...,...,...,...,...,...,...,...,...
536345,C536548,2018-12-01,22168,Organiser Wood Antique White,18.96,-2,12472.0,Germany
536346,C536548,2018-12-01,21218,Red Spotty Biscuit Tin,14.09,-3,12472.0,Germany
536347,C536548,2018-12-01,20957,Porcelain Hanging Bell Small,11.74,-1,12472.0,Germany
536348,C536548,2018-12-01,22580,Advent Calendar Gingham Sack,16.35,-4,12472.0,Germany


In [5]:
product_sell = df.groupby('ProductName')['Quantity'].sum().reset_index()

In [6]:
df['TotalAmount'] = df['Price']*df['Quantity']

In [7]:
import plotly.express as px
product_sell_top = product_sell[product_sell['Quantity']>20000]
fig = px.bar(product_sell_top, x=product_sell_top['ProductName'], y=product_sell_top['Quantity'], text_auto='position', title='Products with Total Sell Over 20k Quantities')
fig.update_layout(xaxis_title='Product Name', yaxis_title='Total Sell')
fig.show()

In [62]:
product_sell_low = product_sell[product_sell['Quantity']<0]
fig = px.bar(product_sell_low, x=product_sell_low['ProductName'], y=product_sell_low['Quantity'], text_auto='position', title='Products with Bad Sell')
fig.update_layout(xaxis_title='Product Name', yaxis_title='Total Sell')
fig.show()

In [8]:
product_revenue = df.groupby('ProductName')['TotalAmount'].sum().reset_index()

In [9]:
top_total_product_revenue = product_revenue[product_revenue['TotalAmount']>200000].round(2)
fig = px.bar(top_total_product_revenue, x=top_total_product_revenue['ProductName'], y=top_total_product_revenue['TotalAmount'], text_auto='position', title='Products with Total Revenue Over 200k')
fig.update_layout(xaxis_title='Product Name', yaxis_title='Total Revenue')
fig.show()

In [10]:
product_revenue_by_country = df.groupby(['Country','ProductName'])[['TotalAmount','Quantity']].sum().reset_index()
import numpy as np
top_product_revenue_by_country = pd.DataFrame(np.array(['Antique Silver T-Light Glass', 'Assorted Colour Bird Ornament', 'Assorted Colours Silk Fan', 'Brocade Ring Purse', 'Charlotte Bag Suki Design', 'Cream Hanging Heart T-Light Holder', 'Heart Of Wicker Small',
            'Jumbo Bag Red Retrospot','Mini Paint Set Vintage','Pack Of 12 London Tissues','Pack Of 60 Pink Paisley Cake Cases','Pack Of 72 Retrospot Cake Case',"Paper Chain Kit 50'S Christmas",'Paper Craft Little Birdie','Party Bunting','Popcorn Holder','Rabbit Night Light','Red Harmonica In Box',
            'Regency Cakestand 3 Tier'	,'Victorian Glass Hanging T-Light' ,'World War 2 Gliders Asstd Designs']), columns=['ProductName'])

top_product_revenue_by_country = top_product_revenue_by_country.merge(product_revenue_by_country, on='ProductName', how='inner')

In [11]:
top_product_revenue_by_country = top_product_revenue_by_country.groupby(['Country','ProductName'])[['TotalAmount','Quantity']].sum().reset_index()

In [12]:
fig = px.treemap(top_product_revenue_by_country, path=[px.Constant('Countries'),'Country','ProductName'], values='Quantity', hover_data='TotalAmount')
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
fig.update_traces(marker=dict(cornerradius=5))
fig.update_layout(title="Top Product's(Revenue over 200k) with Net-Quantity Sell and Net-Profit in High Selling Countries")
fig.show()

In [13]:
cancelled_orders = df[df['Quantity']<0]
cancelled_orders = cancelled_orders.groupby(['Date','TransactionNo','CustomerNo','ProductName','Price'])[['Quantity','TotalAmount']].sum().reset_index().sort_values(by=['Price'], ascending=False)
cancelled_orders = cancelled_orders[cancelled_orders['TotalAmount']<-1000]

In [14]:
non_cancelled_orders = df[df['Quantity']>0]
non_cancelled_orders = non_cancelled_orders.groupby(['Date','TransactionNo','CustomerNo','ProductName','Price'])[['Quantity','TotalAmount']].sum().reset_index().sort_values(by=['Price'], ascending=False)

In [15]:
order_cancel = cancelled_orders.merge(non_cancelled_orders, on=['Date','CustomerNo','ProductName'], how='inner')
order_cancel['TotalAmount'] = order_cancel['TotalAmount_x']+order_cancel['TotalAmount_y']
order_cancel['TotalQuantity'] = order_cancel['Quantity_x']+order_cancel['Quantity_y']


In [16]:
order_cancel = order_cancel.groupby(['ProductName','TotalAmount_x','TotalAmount_y'])[['TotalAmount','TotalQuantity']].sum().reset_index().sort_values(by=['TotalAmount_x'], ascending=True)
most_effected_orders = order_cancel[order_cancel['TotalAmount_x']<-10000]

In [17]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=most_effected_orders['ProductName'], y=most_effected_orders['TotalAmount'], name='Proft Amount',))
fig.add_trace(go.Scatter(x=most_effected_orders['ProductName'], y=most_effected_orders['TotalAmount_x'], name='Loss Amount'))
fig.add_trace(go.Scatter(x=most_effected_orders['ProductName'], y=most_effected_orders['TotalAmount_y'], name='Total Amount'))
fig.update_layout(xaxis_title='Product Name', yaxis_title='Total Amount', title='Product with High Sell Deflection Caused by Order Cancellation')
fig.show()

In [18]:
customer_expanse = df.groupby('CustomerNo')['TotalAmount'].sum().reset_index()
customer_expanse['CustomerNo'] = customer_expanse['CustomerNo'].astype(str)

In [19]:
top_total_customer_expanse = customer_expanse[customer_expanse['TotalAmount']>500000].round(2)

fig = px.bar(top_total_customer_expanse, x=top_total_customer_expanse['CustomerNo'], y=top_total_customer_expanse['TotalAmount'], text_auto='position')
fig.update_layout(xaxis_title='Customer No.', yaxis_title='Total Amount', title='Top Buyers(Spent More than 500k)')
fig.show()

In [63]:
dff = df[df['TotalAmount']<0]
low_total_customer_expanse = dff.groupby('CustomerNo')['TotalAmount'].sum().reset_index()
low_total_customer_expanse['CustomerNo'] = low_total_customer_expanse['CustomerNo'].astype(str)
low_total_customer_expanse = low_total_customer_expanse[low_total_customer_expanse['TotalAmount']<-10000].round(2)

fig = px.bar(low_total_customer_expanse, x=low_total_customer_expanse['CustomerNo'], y=low_total_customer_expanse['TotalAmount'], text_auto='position')
fig.update_layout(xaxis_title='Customer No.', yaxis_title='Total Amount', title='Customer with High Amount of Order Cancellation(Min Order Amount 10k)')
fig.show()

In [24]:
product_revenue_overall = df.groupby(['Date','Country'])['TotalAmount'].sum().reset_index()
product_revenue_uk = product_revenue_overall[product_revenue_overall['Country']=='United Kingdom']
product_revenue_other_countries = product_revenue_overall[product_revenue_overall['Country'] !='United Kingdom']
product_revenue_other_countries = product_revenue_other_countries.groupby('Date')['TotalAmount'].sum().reset_index()

In [25]:

fig = go.Figure()
fig.add_trace(go.Scatter(x=product_revenue_uk['Date'], y=product_revenue_uk['TotalAmount'], name='United Kingdom'))
fig.add_trace(go.Scatter(x=product_revenue_other_countries['Date'], y=product_revenue_other_countries['TotalAmount'], name='Other Countries'))
fig.update_layout(xaxis_title='Date', yaxis_title='Net Amount', title='Product Revenue in United Kingdom vs Other Countries Over the Time')
fig.show()

In [26]:
product_sell_overall = df.groupby(['Date','Country'])['Quantity'].sum().reset_index()
product_sell_uk = product_sell_overall[product_sell_overall['Country']=='United Kingdom']
product_sell_other_countries = product_sell_overall[product_sell_overall['Country'] !='United Kingdom']
product_sell_other_countries = product_sell_other_countries.groupby('Date')['Quantity'].sum().reset_index()

In [27]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=product_sell_uk['Date'], y=product_sell_uk['Quantity'], name='United Kingdom'))
fig.add_trace(go.Scatter(x=product_sell_other_countries['Date'], y=product_sell_other_countries['Quantity'], name='Other Countries'))
fig.update_layout(xaxis_title='Date',yaxis_title='Net Quantity', title='Product Sell in United Kingdom vs Other Countries Over the Time')
fig.show()

In [28]:
customer_overall = df.groupby(['Date','Country'])['CustomerNo'].count().reset_index()
customer_uk = customer_overall[customer_overall['Country']=='United Kingdom']
customer_other_countries = customer_overall[customer_overall['Country'] !='United Kingdom']
customer_other_countries = customer_other_countries.groupby('Date')['CustomerNo'].sum().reset_index()

In [29]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=customer_uk['Date'], y=customer_uk['CustomerNo'], name='United Kingdom'))
fig.add_trace(go.Scatter(x=customer_other_countries['Date'], y=customer_other_countries['CustomerNo'], name='Other Countries'))
fig.update_layout(xaxis_title='Date',yaxis_title='Total Customer', title='Total Number of Customer in United Kingdom vs Other Countries Over the Time')
fig.show()

In [30]:
total_loss = df[df['TotalAmount']<0]
total_profit = df[df['TotalAmount']>0]

In [31]:
total_loss.sort_values(by=['TotalAmount'], ascending=True)
total_loss_by_date = total_loss.groupby('Date')['TotalAmount'].sum().reset_index()
total_loss_by_date.rename(columns={'TotalAmount':'TotalAmountt'}, inplace=True)

In [32]:
total_profit.sort_values(by=['TotalAmount'], ascending=False)
total_profit_by_date = total_profit.groupby('Date')['TotalAmount'].sum().reset_index()

In [33]:
total_revenue_by_date = total_profit_by_date.merge(total_loss_by_date, on=['Date'], how='outer')
total_revenue_by_date['TotalAmount'] = total_revenue_by_date['TotalAmount']+total_revenue_by_date['TotalAmountt']
total_revenue_by_date = total_revenue_by_date.drop(columns='TotalAmountt', axis='columns')

In [34]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=total_profit_by_date['Date'], y=total_profit_by_date['TotalAmount'], name='Total Profit'))
fig.add_trace(go.Scatter(x=total_loss_by_date['Date'], y=total_loss_by_date['TotalAmountt'], name='Total Loss'))
fig.add_trace(go.Scatter(x=total_revenue_by_date['Date'], y=total_revenue_by_date['TotalAmount'], name='Total Revenue'))
fig.update_layout(xaxis_title='Date',yaxis_title='Total Profit', title='Total Revenue vs Total Profit vs Total Loss(Loss Refer the Cancellation of Orders) Over the Time')
fig.show()

In [35]:
total_loss_by_country = total_loss.groupby('Country')['TotalAmount'].sum().reset_index()
total_loss_by_country = total_loss_by_country.sort_values(by=['TotalAmount'], ascending=True)
total_loss_by_country.head()

Unnamed: 0,Country,TotalAmount
24,United Kingdom,-2530628.3
7,EIRE,-52765.88
11,Germany,-21277.87
23,USA,-15418.27
20,Spain,-15273.47


In [36]:
total_profit_by_country = total_profit.groupby('Country')['TotalAmount'].sum().reset_index()
total_profit_by_country = total_profit_by_country.sort_values(by=['TotalAmount'], ascending=False)
total_profit_by_country.head()

Unnamed: 0,Country,TotalAmount
36,United Kingdom,52524658.47
24,Netherlands,2151553.59
10,EIRE,1713410.95
14,Germany,1371543.27
13,France,1330652.89


In [37]:
other_countries_total_profit = total_profit_by_country[total_profit_by_country['Country']!='United Kingdom']['TotalAmount'].sum()
uk_total_profit = total_profit_by_country[total_profit_by_country['Country']=='United Kingdom']
other_countries_total_loss = total_loss_by_country[total_loss_by_country['Country']!='United Kingdom']['TotalAmount'].sum()
uk_total_loss = total_loss_by_country[total_loss_by_country['Country']=='United Kingdom']

In [38]:
uk_revenue = uk_total_profit.merge(uk_total_loss, on='Country')
uk_revenue['Revenue'] = uk_revenue['TotalAmount_x']+uk_revenue['TotalAmount_y']

In [39]:
import numpy as np
other_countries_total_profitt = pd.DataFrame(np.array([['Other Countries', other_countries_total_profit]]),columns=['Country', 'TotalAmount'])
other_countries_total_losss = pd.DataFrame(np.array([['Other Countries', other_countries_total_loss]]),columns=['Country', 'TotalAmount'])
other_countries_revenue = other_countries_total_profitt.merge(other_countries_total_losss, on='Country')
other_countries_revenue['Revenue'] = (other_countries_revenue['TotalAmount_x']).astype(float)+(other_countries_revenue['TotalAmount_y']).astype(float)
other_countries_revenue['TotalAmount_x'] = other_countries_revenue['TotalAmount_x'].astype('float')
other_countries_revenue['TotalAmount_y'] = other_countries_revenue['TotalAmount_y'].astype(float)

In [40]:
total_revenue = pd.concat([uk_revenue,other_countries_revenue],axis=0)

In [41]:
fig = go.Figure()

fig.add_trace(go.Bar(x=total_revenue['Country'], y=total_revenue['TotalAmount_x'], name='Total Profit'))
fig.add_trace(go.Bar(x=total_revenue['Country'], y=total_revenue['TotalAmount_y'], name='Total Loss'))
fig.add_trace(go.Bar(x=total_revenue['Country'], y=total_revenue['Revenue'], name='Total Revenue'))
fig.update_layout(xaxis_title='Country', yaxis_title='Total Amount', title='Profit, Loss, and Revenue by United Kingdom vs Other Countries')
fig.show()
    

In [42]:
total_profit_by_date = total_profit.groupby(['Date','Country'])['TotalAmount'].sum().reset_index()
uk_profit_by_date = total_profit_by_date[total_profit_by_date['Country']=='United Kingdom']
total_loss_by_date = total_loss.groupby(['Date','Country'])['TotalAmount'].sum().reset_index()
uk_loss_by_date = total_loss_by_date[total_loss_by_date['Country']=='United Kingdom']

In [43]:
uk_revenue_by_date = uk_profit_by_date.merge(uk_loss_by_date, on=['Date'], how='outer')
uk_revenue_by_date['TotalAmount'] = uk_revenue_by_date['TotalAmount_x']+uk_revenue_by_date['TotalAmount_y']

In [44]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=uk_profit_by_date['Date'], y=uk_profit_by_date['TotalAmount'], name='Profit'))
fig.add_trace(go.Scatter(x=uk_loss_by_date['Date'], y=uk_loss_by_date['TotalAmount'], name='Loss'))
fig.add_trace(go.Scatter(x=uk_revenue_by_date['Date'], y=uk_revenue_by_date['TotalAmount'], name='Reveue'))
fig.update_layout(xaxis_title='Date', yaxis_title='Total Amount',title='Revenue vs Profit vs Loss in United Kingdom by Date')

fig.show()