In [1]:
import os
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
import pendulum
import importlib
import re

%matplotlib inline  

In [2]:
import data
import color
import categories
import regression
import echarts
import charts
from cytoolz import *
from cytoolz.itertoolz import accumulate
from operator import add

In [3]:
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline

In [4]:
echarts.init_echarts()

In [5]:
sales =  [
    data.read_store_sales('F01451', begin='2017-07-01', end='2018-12-31'),
    data.read_store_sales('F01450', begin='2017-07-01', end='2018-12-31')
]

In [6]:
# receipt = [
#     data.read_receipt_sales('F01451', begin='2018-01-01', end='2018-12-31'),
#     data.read_receipt_sales('F01450', begin='2018-01-01', end='2018-12-31')
# ]

In [7]:
sales[0]['margin'] = sales[0]['profit'] / sales[0]['sales'] * 100
sales[1]['margin'] = sales[1]['profit'] / sales[1]['sales'] * 100
sales[0] = sales[0].replace([np.inf, -np.inf], np.nan).dropna()
sales[1] = sales[1].replace([np.inf, -np.inf], np.nan).dropna()

In [8]:
importlib.reload(charts)

<module 'charts' from '/home/ribelo/code/python/trennwand/charts.py'>

In [9]:
charts.scatter_plot(sales[0], sales[1], begin_date='2017-09-01', column='sales', title='obrót', reg="poly")

In [10]:
charts.scatter_plot(sales[0], sales[1], begin_date='2017-09-01', column='profit', title='zysk', reg='poly')

In [11]:
charts.scatter_plot(sales[0], sales[1], min_y=0, begin_date='2017-09-01', column='sales', category_id='12', reg='poly')

In [12]:
charts.margin_scatter_plot(sales[0], sales[1], begin_date='2017-10-01', min_y=0)

In [13]:
charts.calendar_plot(sales[0], column='sales', begin_date='2017-09-01', end_date='2018-09-30')

In [15]:
charts.calendar_plot(sales[1], column='sales', begin_date='2017-09-01', end_date='2018-09-30', min_val=0)

In [16]:
d1 = pendulum.now().set(day=1).format('YYYY-MM-DD')
d2 = pendulum.now().subtract(days=1).format('YYYY-MM-DD')
d3 = pendulum.now().set(day=1).subtract(months=1).format('YYYY-MM-DD')
d4 = pendulum.now().subtract(months=1, days=1).format('YYYY-MM-DD')
print(d1, d2, d3, d4)

t1 = sales[0].loc[(sales[0]['date'] >= d1) & (sales[0]['date'] <= d2)]['sales'].sum().round(0)
t2 = sales[0].loc[(sales[0]['date'] >= d3) & (sales[0]['date'] <= d4)]['sales'].sum().round(0)
print("s1 ", t1, t2, t1 - t2, round(t1 / (pendulum.now().day - 1) * pendulum.now().days_in_month))
t1 = sales[1].loc[(sales[1]['date'] >= d1) & (sales[1]['date'] <= d2)]['sales'].sum().round(0)
t2 = sales[1].loc[(sales[1]['date'] >= d3) & (sales[1]['date'] <= d4)]['sales'].sum().round(0)
print("s3 ", t1, t2, t1 - t2, round(t1 / (pendulum.now().day - 1) * pendulum.now().days_in_month))

2018-09-01 2018-09-02 2018-08-01 2018-08-02
s1  14104.0 16458.0 -2354.0 211560.0
s3  7980.0 10730.0 -2750.0 119700.0


In [27]:
s1 = sales[0].loc[(sales[0]['date'] > '2018-06-01')].groupby('name').agg({'sales':np.sum})
s1 = s1.sort_values('sales', ascending=False)
s1_sum = s1.sum()['sales']
p1 = partition_all(s1.shape[0] // 99, s1['sales'])
s1_percent = list(accumulate(add, [sum(part) / s1_sum for part in p1]))
s1_percent.insert(0, 0)
s1_percent.append(100)

s3 = sales[1].loc[(sales[1]['date'] >  '2018-06-01')].groupby('name').agg({'sales':np.sum})
s3 = s3.sort_values('sales', ascending=False)
s3_sum = s3.sum()['sales']
p3 = partition_all(s3.shape[0] // 99, s3['sales'])
s3_percent = list(accumulate(add, [sum(part) / s3_sum for part in p3]))
s3_percent.insert(0, 0)
s3_percent.append(100)

opts = {'title': {'text': 'obrót skumulowany'},
        'toolbox': {'feature': {'saveAsImage': {'pixelRatio': 2}}},
        'legend': {'align': 'right'},
        'tooltip': {},
        'xAxis': {'axisPointer': {'show': True},
                  'data': np.linspace(0, 1, 101).tolist()},
        'yAxis': {'min': 0, 'max': 1},
        'series': [
            {'type': 'line',
             'name': 's1',
             'data': s1_percent},
            {'type': 'line',
             'name': 's3',
             'data': s3_percent}]}
echarts.plot(opts)

In [28]:
s1 = s1_sales.loc[s1_sales['date'] > pendulum.parse('2017-10-01')]
s1 = s1.groupby('date').agg({'sales': np.sum, 'profit': np.sum})
s1_series = [{'value': [row[1], row[2]],
              'itemStyle': {'color': color.day_of_week(row[0].weekday())}}
             for row in s1.itertuples()]
lr = LinearRegression()
X = np.linspace(0, s1['sales'].max(), 10)
lr.fit(s1['sales'][:,np.newaxis], s1['profit'])
s1_reg = lr.predict(X[:,np.newaxis]).tolist()
s1_reg = list(zip(X, s1_reg))



s3 = s3_sales.loc[s3_sales['date'] > pendulum.parse('2017-10-01')]
s3 = s3.groupby('date').agg({'sales': np.sum, 'profit': np.sum})
s3_series = [{'value': [row[1], row[2]],
              'itemStyle': {'color': color.day_of_week(row[0].weekday())}}
             for row in s3.itertuples()]
lr = LinearRegression()
X = np.linspace(0, s1['sales'].max(), 10)
lr.fit(s3['sales'][:,np.newaxis], s3['profit'])
s3_reg = lr.predict(X[:,np.newaxis]).tolist()
s3_reg = list(zip(X, s3_reg))

opts = {'title': {'text': 'obrót a zysk'},
        'toolbox': {'feature': {'saveAsImage': {'pixelRatio': 2}}},
        'legend': {'align': 'right',
                   'data': ['s1', 's3']},
        'data-zoom': [{'type': 'inside',
                       'start': 0,
                       'end': 100},
                      {'start': 0,
                       'end': 100}],
        'tooltip': {},
        'xAxis': {'type': 'value',
                  'axisPointer': {'show': True}},
        'yAxis': {},
        'series': [
            {'type': 'scatter',
             'name': 's1',
             'symbolSize': 8,
             'data': s1_series},
            {'type': 'line',
             'name': 's1-reg',
             'data': s1_reg},
            {'type': 'scatter',
             'name': 's3',
             'symbolSize': 8,
             'data': s3_series},
            {'type': 'line',
             'name': 's3-reg',
             'data': s3_reg}]}
echarts.plot(opts)


NameError: name 's1_sales' is not defined

In [6]:
import datetime
datetime.datetime.strptime('1808015', '%y%m%d%H')

datetime.datetime(2018, 8, 1, 5, 0)

In [14]:
receipt = data.read_receipt_sales('F01450', begin='2018-08-01', end='2018-08-31')
sales = data.read_store_sales('F01450', begin='2018-08-01', end='2018-08-31')
# df = pd.merge(df, df1[['id', 'name', 'category_id']], on='id', how='left')
# df['hour'] = df['datetime'].apply(lambda x: x.hour)
# df['weekday'] = df['datetime'].apply(lambda x: x.weekday)
# df = df.groupby(['weekday', 'hour']).agg({'sales': 'sum'})

In [15]:
tmp = sales[['id', 'category_id']].drop_duplicates()
tmp = tmp.set_index('id')
tmp = tmp.to_dict()

In [16]:
receipt['category_id'] = receipt['id'].apply(lambda x: tmp['category_id'][x])

In [17]:
receipt.head()

Unnamed: 0,datetime,ean,id,qty,sales,vat,person,price,profit,category_id
2,2018-08-01 05:00:00,5900014002180,204401,1.0,2.09,23,3,2.09,0.39,10301
3,2018-08-01 05:00:00,5900014014091,208692,1.0,13.09,23,3,13.09,2.45,10302
4,2018-08-01 05:00:00,5908238696338,19009,0.316,1.26,5,3,3.987342,0.06,90704
7,2018-08-01 05:00:00,5900014002180,204401,1.0,2.09,23,3,2.09,0.39,10301
8,2018-08-01 05:00:00,5900014014091,208692,1.0,13.09,23,3,13.09,2.45,10302


In [31]:
datetime.datetime.now().hour

TypeError: 'int' object is not callable

In [34]:
receipt['datetime'].dt.hour

2        5
3        5
4        5
7        5
8        5
9        5
10       5
11       6
12       6
13       6
14       6
15       6
16       6
17       6
18       6
19       6
20       6
21       6
22       6
23       6
24       6
25       6
26       6
27       6
28       6
29       6
30       6
31       6
32       6
33       6
        ..
1195    20
1196    20
1199    20
1200    20
1201    20
1202    20
1203    20
1204    20
1205    20
1206    20
1207    20
1208    20
1209    20
1210    20
1211    20
1212    20
1215    20
1216    20
1217    20
1218    20
1219    20
1220    20
1221    20
1222    20
1223    20
1224    20
1225    20
1226    20
1227    20
1228    20
Name: datetime, Length: 32579, dtype: int64

In [39]:
df = receipt.copy()
df = df.loc[df['category_id'].apply(lambda x: x.startswith('04'))]
df = df.groupby('datetime').agg({'sales': 'sum'})
df = df.reset_index()
df['hour'] = df['datetime'].dt.hour
df['weekday'] = df['datetime'].dt.weekday
df = df.groupby(['weekday', 'hour']).agg({'sales': 'mean'})
df = df.reset_index()
df['sales'] = np.round(df['sales'])


In [40]:
viridis = [
    '#440154',
    '#481567',
    '#482677',
    '#453781',
    '#404788',
    '#39568C',
    '#33638D',
    '#2D708E',
    '#287D8E',
    '#238A8D',
    '#1F968B',
    '#20A387',
    '#29AF7F',
    '#3CBB75',
    '#55C667',
    '#73D055',
    '#95D840',
    '#B8DE29',
    '#DCE319',
    '#FDE725'
]
viridis.reverse()

days = 'pn wt sr cz pt so nd'.split()
hours = [str(x) for x in range(5, 22)]

series = [[row[1] - 5, row[2], row[3]] for row in df[['hour', 'weekday', 'sales']].itertuples()]
opts = {'title': {'text': 'heatmap'},
        'toolbox': {'feature': {'saveAsImage': {'pixelRatio': 2}}},
        'tooltip': {},
        'visualMap': {
            'min': 0,
            'max': df['sales'].max(),
            'itemHeight': 250,
            'color': viridis,
            'calculable': True,
            'orient': 'vertical',
            'left': 'right',
            'top': 'center'
        },
        'xAxis': {
            'data': hours,
            'splitArea': {'show': True}
        },
        'yAxis': {
            'data': days,
            'splitArea': {'show': True}
        },
        'series': [{
            'name': 'heatmap',
            'type': 'heatmap',
            'data': series
        }]}
echarts.plot(opts, width=900, height=500)