In [1]:
# -*- coding: utf-8 -*-
# %matplotlib inline


import pandas as pd
import geohash
import sqlite3 as sql
import datetime
import sys
import sqlalchemy
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from itertools import *
from pandas import ExcelWriter
from geopy.distance import vincenty


# Use chinese for MPL
mpl.rcParams['font.family'] = 'Hiragino Sans GB'
mpl.rcParams['font.sans-serif'] = 'Hiragino Sans GB'
mpl.rcParams['text.latex.unicode']=True

In [9]:
database_path = '../database/'
database_files = ['龙茗路顾戴路(2016-10-10)-data.db']
database_locations = ['龙茗路顾戴路']

In [10]:
# load database
def load_database(db_name):
    engine = sqlalchemy.create_engine('sqlite:///' + database_path + db_name)
    restaurants_db = pd.read_sql_table('restaurants', engine)
    menus_db = pd.read_sql_table('menus', engine)
    
    # 计算营业额
    menus_db['revenue'] = menus_db['price'] * menus_db['month_sales']
        
    # 合并到主数据库
    revenue_db = menus_db.loc[:, ['restaurant_id', 'revenue']].groupby('restaurant_id').sum().reset_index(drop=False)
    restaurants_db = pd.merge(restaurants_db, revenue_db, left_on='id', right_on='restaurant_id',how='left')
    
    # 计算菜单平均价
    mean_db = menus_db.loc[:, ['restaurant_id', 'price']].groupby('restaurant_id').mean().reset_index(
            drop=False).rename(columns={'price': 'mean_price'})
    restaurants_db = pd.merge(restaurants_db, mean_db, on='restaurant_id')

    # 计算平均价格
    restaurants_db['average_price'] = restaurants_db['revenue'] / restaurants_db['month_sales']

    restaurants_db['revenue'] = restaurants_db['revenue'].fillna(0)
#     del restaurants_db['restaurant_id']
    
    return restaurants_db
    

database_list =[]
for db_file in database_files:
    database_list.append(load_database(db_file))

In [11]:
num_records = 0
for db in database_list:
    num_records += db.shape[0]
print (num_records)

1646


In [12]:
for db in database_list:
    print (db[db.name.str.contains('瓜牛')].loc[:,['name','restaurant_id','revenue']])

Empty DataFrame
Columns: [name, restaurant_id, revenue]
Index: []


In [16]:
# test_db_1 = database_list[0]
# print (test_db_1.shape[0], test_db_1.drop_duplicates('id').shape[0])
# test_db_2 = database_list[1]
# print (test_db_2.shape[0], test_db_2.drop_duplicates('id').shape[0])
# test_db_3 = database_list[2]
# print (test_db_3.shape[0], test_db_3.drop_duplicates('id').shape[0])

# print ('num_duplicates', test_db_1[test_db_1['id'].isin(test_db_2['id']) == False].shape[0])

In [13]:
# Merge
full_db = database_list[0]
for db in database_list[1:]:
    full_db = full_db.append(db)

    
full_db = full_db.sort_values('revenue',ascending=False).drop_duplicates(subset='id')
print (full_db.shape[0])

1646


In [6]:
new_db = full_db.copy()
new_db.loc[2232,'revenue'] = 1500000
full_db = new_db

full_db.sort_values('revenue', ascending=False)

Unnamed: 0,id,name,name_for_url,rating,rating_count,month_sales,phone,latitude,longitude,is_free_delivery,delivery_fee,minimum_order_amount,minimum_free_delivery_amount,promotion_info,address,revenue,mean_price,average_price
8089,974824,帝王小龙虾,bzizja,4,492,36461,18221740380,31.139233,121.500491,False,4.0,88.0,0.0,你好欢迎光临我们帝王小龙虾餐厅，有什么需要请与我们联系，用餐高峰期请你提前预定，以免耽误你的...,上海市浦东新区三林路,6.540442e+06,55.335385,179.381866
7826,983095,新香小龙虾&鸿福港式烧腊,rnnjbi,4,419,34322,13681095720,31.139259,121.500373,False,5.0,128.0,0.0,,上海市闵行区普乐路325号-1、327号-1、327号-2,3.855211e+06,53.890781,112.324781
2232,310846,盒马（陆家嘴店）,ljz-hm,4,11205,42678,021-68902565 021-50935087,31.229416,121.518057,True,0.0,10.0,0.0,饿了么强势推荐，盒马外卖，互联网品牌，五星级餐饮品质，准时送达！\n电话：021-68902...,南泉北路585号新大陆广场S03-08\S03-09a,1.500000e+06,1915.228571,5.374652
5653,685386,盒马（虹桥店）,xx-hm130,4,11693,41958,61158312 13761245867,31.207171,121.405083,True,0.0,10.0,0.0,饿了么强势推荐，盒马外卖，互联网品牌，五星级餐饮品质，准时送达！电话：61158312、13...,上海市长宁区紫云西路130号一层,1.198075e+06,1018.750000,28.554142
5797,739158,A梦Eamonns(通北店),eamonnstbd,4,12165,31435,65686690 65686691 65686693 17301704541,31.264891,121.515211,False,3.0,25.0,0.0,"A梦Eamonn's 杨浦（通北店）通北路888号,以时下最流行的创意菜为主线,食材新鲜，用...",上海市杨浦区通北路888号,9.283630e+05,41.026906,29.532782
1112,92979,A梦Eamonns(共新店),zb-eamonn,4,9758,28245,66075363 66078657 66075516,31.251631,121.463253,False,3.0,65.0,0.0,A梦餐廳Eamonn's 闸北(共新店)位于共和新路480号，晚市及夜宵堂吃爆款优惠活动，每...,上海市闸北区共和新路480号,8.614790e+05,41.056180,30.500230
3887,461360,A梦Eamonns(枣阳店),ptam,4,10561,29035,62608397 62608827 13052042718 62608387,31.230939,121.401111,False,3.0,50.0,0.0,A梦餐廳Eamonn's 普陀（枣阳店)枣阳路259号，晚市及夜宵堂吃爆款优惠活动，每桌满8...,上海市普陀区枣阳路259号,8.576010e+05,41.139269,29.536800
4993,685371,盒马（五角场店）,yp-he53,4,8673,31420,021-55890397,31.308682,121.518831,True,0.0,10.0,0.0,饿了么强势推荐，盒马外卖，互联网品牌，五星级餐饮品质，准时送达！\n电话：021- 5589...,上海市杨浦区政德东路53号Z18广场项目209室商铺,8.177006e+05,1043.148718,26.024844
2176,281523,久喜创作便当（延安路）,jhrl,4,3603,12340,18930730080,31.216710,121.434350,True,0.0,0.0,0.0,请亲们正确详细的填写送餐地址,延安西路626号,7.021210e+05,29.708861,56.897974
3915,443740,OneWayCATE小龙虾盖浇饭（南京西路店）,onewaycate001,4,3024,13564,021-31136600 14721064003,31.234529,121.449762,False,10.0,0.0,0.0,Darling..世界辣么大，果然十分精彩。虽然很留恋。但我们更期待能即刻为您提供超有幸福感...,上海市静安区康定路359号1幢201室,6.890100e+05,24.555556,50.796963


In [8]:
# full_db.loc[2232]

In [16]:
# 嘉里中心
targets = { 
#     '嘉里中心':(31.224325,121.45036), 
#     '浦东国金':(31.2365804, 121.5010703), 
#     '新大陆广场':(31.2291802, 121.5180393),
#     '淮海香港广场':(31.2229601, 121.4752205),
#     '春城汉堡王':(31.1053198, 121.4114296)
    '龙茗路顾戴路':(31.13629, 121.38919)
}

with ExcelWriter('2000_metre.xlsx') as writer:
    for key,value in targets.items():
        full_db['distance'] = full_db.apply(lambda row: vincenty((row.latitude, row.longitude), value).km, axis=1)
        db = full_db[full_db.distance < 2]
        db = db[(db.name.str.contains('麻辣烫') 
                 | db.name.str.contains('香锅') 
                 | db.name.str.contains('冒菜')
                 | db.name.str.contains('烧烤')) == False]

        db = db.sort_values('revenue',ascending=False)
        print (key,'2KM内店家数量', db.shape[0])
        
        print (key,'营业额', db.revenue.sum())
        print (key,'平均单店', db.revenue.sum() / db.shape[0])
        
        head_db = db.head(50)
        head_db.to_excel(writer, key + '-revenue')
        
        db = db.sort_values('month_sales',ascending=False)
        print (key,'销量', db.month_sales.sum())
        print (key,'每单价值', db.revenue.sum() / db.month_sales.sum())
        
        
        head_db = db.head(50)
        head_db.to_excel(writer, key + '-monthsales')

        
    
    
        




    


龙茗路顾戴路 2KM内店家数量 109
龙茗路顾戴路 营业额 3712240.1566666663
龙茗路顾戴路 平均单店 34057.249143730885
龙茗路顾戴路 销量 85924
龙茗路顾戴路 每单价值 43.20376328693574


In [91]:
db = 

Unnamed: 0,name,revenue
6811,瓜牛（新天地店）,609620.0
753,瓜牛（陆家嘴店）,549171.333333
5320,瓜牛（南京西路店）,534243.0
7932,瓜牛（愚园路店）,107083.0


In [15]:
group = full_db['revenue'].groupby(full_db['geohash'])
values = group.sum().sort_values(ascending=False).reset_index()
print(values.shape[0])

KeyError: 'geohash'

In [242]:
group = full_db['revenue'].groupby(full_db['geohash'])
values = group.sum().sort_values(ascending=False).head(10).reset_index()

with ExcelWriter('all_geohash.xlsx') as writer:
    for value in values['geohash']:
        records = full_db[full_db['geohash'] == value]
        records.to_excel(writer,value)

In [243]:
mean_db = full_db.loc[full_db['mean_price'] > 30].copy()
print(mean_db.shape[0])

mean_db.head(1)


2148


Unnamed: 0,id,name,name_for_url,rating,rating_count,month_sales,phone,latitude,longitude,is_free_delivery,delivery_fee,minimum_order_amount,minimum_free_delivery_amount,promotion_info,address,revenue,mean_price,average_price,geohash
3,389,行运港式茶餐厅（斜徐路店）,hp-xygscct,4,222,733,63022877,31.206646,121.472554,False,5.0,20.0,0.0,由于今日运力紧张，起送价临时调整到30元，运力恢复时起送价将恢复到原先价格，此举为了保证服务...,斜徐路583号行运港式餐厅,10795.0,34.759184,14.727149,wtw3s0


In [244]:
group = mean_db['revenue'].groupby(mean_db['geohash'])
values = group.sum().sort_values(ascending=False).head(10).reset_index()

with ExcelWriter('30_geohash.xlsx') as writer:
    for value in values['geohash']:
        records = mean_db[mean_db['geohash'] == value]
        records.to_excel(writer,value)

    

#     print (records['name','address'])



In [245]:
mean_db = full_db.loc[full_db['mean_price'] > 50].copy()
print(mean_db.shape[0])

mean_db.head(1)



743


Unnamed: 0,id,name,name_for_url,rating,rating_count,month_sales,phone,latitude,longitude,is_free_delivery,delivery_fee,minimum_order_amount,minimum_free_delivery_amount,promotion_info,address,revenue,mean_price,average_price,geohash
53,5874,大胡子特色炒饭,xh-ahztssj,4,3575,14820,13681930519 13024105871,31.160329,121.421669,False,5.0,20.0,0.0,各位小主，本店优先配送饿了么，遇到恶劣天气送餐较慢，请多多担待哈！如果您的美食凉了，或者口味...,桂林路114号,161079.0,87.65,10.869028,wtw370


In [246]:
group = mean_db['revenue'].groupby(mean_db['geohash'])
values = group.sum().sort_values(ascending=False).head(10).reset_index()

with ExcelWriter('50_geohash.xlsx') as writer:
    for value in values['geohash']:
        records = mean_db[mean_db['geohash'] == value]
        records.to_excel(writer,value)