In [64]:
import pandas as pd
import geopandas as gpd
import os
from oauth2client.service_account import ServiceAccountCredentials
import gspread

## Opening Performance Data

In [65]:
#Authorize the API
scope = [
    'https://www.googleapis.com/auth/drive',
    'https://www.googleapis.com/auth/drive.file'
    ]
file_name = r'C:\Users\User\OneDrive - Migo (1)\Documents\Migo\Data Project\Masterfile\Credentials\update-mds-d3d8ff0bb675.json'
creds = ServiceAccountCredentials.from_json_keyfile_name(file_name,scope)
client = gspread.authorize(creds)

In [66]:
# get the instance of the Spreadsheet
sheet = client.open('Wargo Monthly Performance')

# get the first sheet of the Spreadsheet
sheet_instance = sheet.get_worksheet(0)

# get all the records of the data
records_data = sheet_instance.get_all_records()
performance = pd.DataFrame.from_dict(records_data)
performance

Unnamed: 0,calendar_month_utc7,store_id,store_name,watchers_all_time_first,watchers_monthly_first,watchers_quarterly_first,gross_revenue
0,2022-02-01,C01203,Kedai Om Dedz Rica,633,634,633,60214000
1,2022-03-01,C01203,Kedai Om Dedz Rica,1748,1749,1748,67290000
2,2022-06-01,C00905,Warteg Jaya Ampera 76,871,878,871,27225000
3,2022-02-01,C01519,Krajan Cell,375,380,377,11436000
4,2022-03-01,C01000,Kedai Wenaks,272,301,272,8530000
...,...,...,...,...,...,...,...
30502,2023-02-01,C02425,The Blues Cell,0,0,0,0
30503,2023-02-01,C03094,Abot Cell,0,0,0,0
30504,2023-02-01,C03098,DD Cell,0,0,0,0
30505,2023-01-01,C02476,Novi Cake Dukupuntang,0,0,0,0


In [67]:
#Pivot performance
performance_pivot = performance.pivot_table(index='store_id', values=['watchers_all_time_first','watchers_monthly_first','watchers_quarterly_first','gross_revenue'],
                                            aggfunc={'watchers_all_time_first':'sum',
                                                     'watchers_monthly_first':'sum',
                                                     'watchers_quarterly_first':'sum',
                                                     'gross_revenue':'sum'}).reset_index()

## Preprocessing

In [68]:
#Assigning Paths
path_adm = r'Z:\Masterfile\1. Shapefile\1. Population and Administrative Boundaries\3. New Area'
path_masterfile = r'C:\Users\User\OneDrive - Migo (1)\Documents\Migo\Data Project\Masterfile'
path_zmp = r'Z:\Masterfile\1. Shapefile\8. ZMP'

In [69]:
# Reading Files
region = gpd.read_file(os.path.join(path_adm,'new_area.gpkg'))
masterfile = pd.read_excel(os.path.join(path_masterfile, 'Store Score Masterfile.xlsx'), sheet_name='Masterfile New')
masterfile['area_lookup'] = masterfile['Area_Wilayah'].str.lower().str.replace(" ","").str.strip()
zmp = gpd.read_file(os.path.join(path_zmp, 'zmp_masterfile.gpkg'))
zmp = zmp[['ZMP_NAME', 'zmp_category']]
zmp['ZMP_NAME'] = zmp['ZMP_NAME'].str.lower().str.replace(" ","").str.strip()

In [76]:
zmp[zmp.ZMP_NAME == r'pasarsawotanjunglengkong']

Unnamed: 0,ZMP_NAME,zmp_category
3766,pasarsawotanjunglengkong,


In [70]:
#Checking columns
region.columns

Index(['OBJECTID', 'PROVINCE', 'CITY', 'KECA', 'PCK', 'SUM_AREA', 'SUM_POP',
       'SUM_DENSIT', 'Shape_Leng', 'Shape_Area', 'Area', 'geometry'],
      dtype='object')

### Join Masterfile with Region, Performance , and ZMP

In [71]:
# Join masterfile with region
region['new_id'] = region['CITY'].str.lower().str.strip() +" "+region['KECA'].str.lower().str.strip().str.replace(" ","")
masterfile['new_id'] = masterfile['City'].str.lower().str.strip() + " "+ masterfile['Kecamatan'].str.lower().str.strip().str.replace(" ","")
masterfile_join = masterfile.merge(region, how ='left', on = 'new_id').merge(performance_pivot, how ='left',left_on = 'Store_ID', right_on = 'store_id')\
.merge(zmp, how = 'left', left_on = 'area_lookup', right_on = 'ZMP_NAME')
masterfile_join = masterfile_join.rename(columns={
                                                      'Kecamatan_x':'Kecamatan', 'City_x' :'City', 'SUM_POP': 'population',
                                                      'status3':'status'})

In [72]:
#Preparing Masterfile to Update
masterfile_join = masterfile_join[['Area_Wilayah','Store_ID', 'Nama_Toko','Lintang', 'Bujur', 'Kecamatan', 'City', 'Province',
                         'status','new_id','sitting_type', 'population','gross_revenue', 'watchers_all_time_first','watchers_monthly_first','watchers_quarterly_first','zmp_category','Area']]
masterfile_join = masterfile_join.fillna(0)
masterfile_join = masterfile_join[masterfile_join['Store_ID']!=0]
masterfile_join = masterfile_join.fillna(0)

# Updating to Google Sheets

In [73]:
#Reading from
spreadsheetId = "1P3mo9DnaZ9Kv3slU7AWSQCBuOJZgKPUMA5WAMhHXypY"
sheetId = "1821994154"

sh = client.open_by_key(spreadsheetId)
worksheet = sh.get_worksheet(2)
worksheet.clear()

{'spreadsheetId': '1P3mo9DnaZ9Kv3slU7AWSQCBuOJZgKPUMA5WAMhHXypY',
 'clearedRange': 'store_masterfile!A1:R13892'}

In [74]:
worksheet.update([masterfile_join.columns.values.tolist()] + masterfile_join.values.tolist())

{'spreadsheetId': '1P3mo9DnaZ9Kv3slU7AWSQCBuOJZgKPUMA5WAMhHXypY',
 'updatedRange': 'store_masterfile!A1:R2698',
 'updatedRows': 2698,
 'updatedColumns': 18,
 'updatedCells': 48564}