In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# 1. Business situation analysis

In [None]:
# import related lib
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# read data and reset the columns
day_sell_data = pd.read_csv('../input/total-sale-2018-yearly-data-of-grocery-shop/Day_sell_24_12_18.csv',sep=';')

columns = ['date','net_purchase','gross_sale','tax','margin']
day_sell_data.columns = columns
day_sell_data.head()

In [None]:
# change the datatype
day_sell_data['date'] = pd.to_datetime(day_sell_data['date'])
day_sell_data['dayofweek'] = day_sell_data['date'].dt.dayofweek

columns.remove('date')
for col in columns:
    day_sell_data[col] = day_sell_data[col].apply(lambda x : str(x).replace(',','.'))
    day_sell_data[col] = day_sell_data[col].astype(float)


In [None]:
# create new columns：pay_back and rateofpayback to see the rate of payback in the gross sale
day_sell_data['pay_back'] = day_sell_data['gross_sale'] - day_sell_data['net_purchase']
day_sell_data['rateofpayback'] = day_sell_data['pay_back'] / day_sell_data['gross_sale'] * 100

In [None]:
# draw lineplot
# lineplot function
def lineplot(colname):
    sns.set(style='whitegrid')
    ax = sns.lineplot(x='date', y=colname, data=day_sell_data)
    ax.set_xticks([])
    ax.set_xlabel('date',fontsize=30)
    ax.set_ylabel(colname,fontsize=30)
    plt.gcf().set_size_inches(32, 18) #increase the size of the figure
    plt.xticks(fontsize=30)
    plt.yticks(fontsize=30)
    plt.show()
    plt.close()

columns = list(day_sell_data.columns)
columns.remove('date')
columns.remove('dayofweek')
for col in columns:
    lineplot(col)

From these plots we can see that except the last column 'rateofpayback', other columns'data **look like relatively stable** and we can get some knowledge of the range of these data but not clear enough, so for better learning the data range of these data, we need to draw their **box figures**.

In [None]:
# drop the last data which is error
'''
date:None
net_purchase:5414124,75
gross_sale:1218719,16
tax:1220682,59
margin:365027,61
'''
day_sell_data.drop(len(day_sell_data)-1,inplace=True)

# figure_plot function
def figure_plot(type,colname):
    sns.set(style='whitegrid')
    ax = None
    if type == 'line':
        ax = sns.lineplot(x='date', y=colname, data=day_sell_data)
        ax.set_xticks([])
        ax.set_xlabel('date',fontsize=30)
        ax.set_ylabel(colname,fontsize=30)
    elif type == 'box':
        ax = sns.boxplot(y=day_sell_data[colname])
        ax.set_ylabel(colname,fontsize=30)
    plt.gcf().set_size_inches(32, 18) #increase the size of the figure
    plt.xticks(fontsize=30)
    plt.yticks(fontsize=30)
    plt.show()
    plt.close()

for col in columns:
    figure_plot('box',col)

From these box figure we can learn the data range of these data for helping us have a better understanding of this shop's business condition. 

# 2. Commodity analysis

Problem •-There is a significant amount of poorly rotating goods in the assortment of the store; there is also a group of goods generating significant losses.Shall the owners change the profil of shop or limit some of the products groups?

For solving this problem let we start the analysis process to the commodities in this shop

In [None]:
product_sell = pd.read_csv('../input/total-sale-2018-yearly-data-of-grocery-shop/SELL_1.csv',sep=';',encoding='ISO-8859-1')
columns = list(product_sell.columns)
columns.remove('Date')
for col in columns:
    product_sell[col] = product_sell[col].apply(lambda x : str(x).replace(',','.'))
product_sell.head()

Ok, now first, let we have a globel understanding about this shop's commodities

In [None]:
Pgroup = product_sell['Pgroup']
Pgroup.value_counts()

In [None]:
product_sell['pwa_sn'] = product_sell['pwa_sn'].astype(float)
product_sell.groupby('Pgroup')['pwa_sn'].sum().sort_values(ascending=False)

# 3. Commodity rotation analysis

In [None]:
rotation = pd.read_csv('../input/total-sale-2018-yearly-data-of-grocery-shop/ROTATION_of_products01.01.2018-09.01.2019.csv',sep=';',encoding='ISO-8859-1')
rotation.head()

We can see that the format of this data is not convenient to use pandas for analysis, so I use the Excel to import and analyze it. After completing the overall observation of the data, it is found that, except for the product name and product sales, which have high credibility and less missing data, other data items have many missing data and low credibility, especially the most important feature: rotation in days and rotation in times. Therefore, for the issue of commodity rotation, I don't know how to analyze it currently, and more data may be needed.
Finally, I would like to thank the data provider for providing this dataset