# Importing Libraries

In [None]:
#data processing
import pandas as pd
#linear algebra
import numpy as np
#data visualisation
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns


# Data imputing

In [None]:
#loading datasets
orders=pd.read_csv('../input/retail-store-analysis/TR_OrderDetails.csv')
products=pd.read_csv('../input/retail-store-analysis/TR_Products.csv')
props=pd.read_csv('../input/retail-store-analysis/TR_PropertyInfo.csv')

In [None]:
#understanding shape of the data
print('Orders, Products and Props datasets have',orders.shape,products.shape,props.shape,'rows and columns respectively')

In [None]:
#merging the datasets
df=orders.merge(products,left_on='ProductID',right_on='ProductID',how='left')
df.head()

In [None]:
data=df.merge(props,left_on='PropertyID',right_on='Prop ID',how='left')
data.head()

In [None]:
#Calculating Total Sales
data['Total Sales']= data['Quantity']*data['Prop ID']
data.head()

In [None]:
data.shape

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
#check for nulls
data.isnull().sum()

In [None]:
#1. Maximum quantity sold in any transaction
data['Quantity'].max()

In [None]:
#2. Unique products in all the transactions
print("There are" ,data['ProductName'].nunique(),"unique products in the store")

In [None]:
data['ProductName'].unique

In [None]:
#Total sales of each product
#By quantity
Top_by_quant=pd.DataFrame(data.groupby(['ProductName']).sum()['Quantity']).sort_values(by='Quantity',ascending=False)
Top_by_quant=Top_by_quant.reset_index()
#Plotting Top 10
plt.figure(figsize=(12,3))
graph1=sns.barplot(data=Top_by_quant.head(10),x='ProductName',y='Quantity',palette='rocket')
graph1.set(title='Top 10 Products by Quantity')
plt.xticks(rotation=90)
#plt.show()

for p in graph1.patches:
    graph1.annotate(format(p.get_height(), '1.0f'), 
                   (p.get_x() + p.get_width() / 2., p.get_height()), 
                   ha = 'center', va = 'center', 
                   xytext = (0, 9) ,
                   textcoords = 'offset points')

In [None]:
#By Sales
Top_by_sales=pd.DataFrame(data.groupby(['ProductName']).sum()['Total Sales']).sort_values(by='Total Sales',ascending=False)
Top_by_sales=Top_by_sales.reset_index()
#Plotting Top 10
plt.figure(figsize=(12,3))
graph2=sns.barplot(data=Top_by_sales.head(10),x='ProductName',y='Total Sales',palette='mako')
graph2.set(title='Top 10 Products by Dollar Sales')
plt.xticks(rotation=90)
#plt.show()

for a in graph2.patches:
    graph2.annotate(format(a.get_height(),'1.0f'),
                   (a.get_x()+a.get_width()/2,a.get_height()),
                   ha="center",va="center",
                   xytext=(0,9),
                   textcoords='offset points')

In [None]:
#Products distribution by category
prod_cat=pd.DataFrame(data.groupby(['ProductCategory','ProductName']).sum()['Quantity'])
prod_cat=prod_cat.reset_index().sort_values(by=['ProductCategory','Quantity'],ascending=False)
#Top 5 Products in each category by quantity sold
Top_5_prod_cat=prod_cat.groupby('ProductCategory').head(5)

In [None]:
plt.figure(figsize=(16,5))
graph3=sns.barplot(data=Top_5_prod_cat,x='ProductName',y='Quantity',hue='ProductCategory',dodge=False,palette='magma')
graph3.set(title='Top 5 Products in each category by quantity sold')
plt.xticks(rotation=90)
#plt.show()

for a in graph3.patches:
    graph3.annotate(format(a.get_height(),'1.0f'),
                   (a.get_x()+a.get_width()/2,a.get_height()),
                   ha="center",va="center",
                   xytext=(0,9),
                   textcoords='offset points')

In [None]:
#Distribution of stores amongst states
stores_by_city=pd.DataFrame(data.groupby(['PropertyState']).nunique()['PropertyCity'])
stores_by_city=stores_by_city.reset_index().sort_values(by='PropertyCity',ascending=False)
plt.figure(figsize=(12,5))
graph4=sns.barplot(data=stores_by_city,x='PropertyState',y='PropertyCity',color='teal')
graph4.set(title='Number of Stores by State')
plt.xticks(rotation=90)
#plt.show()

for a in graph4.patches:
    graph4.annotate(format(a.get_height(),'1.0f'),
                   (a.get_x()+a.get_width()/2,a.get_height()),
                   ha="center",va="center",
                   xytext=(0,9),
                   textcoords='offset points')


In [None]:
#Top 5 properties that registered highest sales
highest_sales=pd.DataFrame(data.groupby(['PropertyCity']).sum()['Total Sales'])
highest_sales=highest_sales.reset_index().sort_values(by='Total Sales',ascending=False)
highest_sales.head(5)