In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np

In [2]:
cellrebel = pd.read_csv('Source Data/Bogor_Bekasi_RSRP_Throughput.csv', low_memory=False)
grid = gpd.read_file('Grid Folder/grid_bogorbekasi_250x250.geojson')
boundary = gpd.read_file('Polygon Kecamatan_Yunan/boundary.geojson')

In [3]:
cellrebel = cellrebel[cellrebel['mobile_operator'].str.contains('Indosat Ooredoo|XL Axiata')==True]
cellrebel = cellrebel[~cellrebel['dl_throughput'].isnull()]

In [4]:
cellrebel_gpd = gpd.GeoDataFrame(cellrebel, geometry=gpd.points_from_xy(cellrebel.longitude, cellrebel.latitude), crs=4326)
boundary = boundary.to_crs(4326)
grid = grid.to_crs(4326)

In [5]:
grid['geometry1'] = grid['geometry']

In [6]:
points_within = gpd.sjoin(left_df= cellrebel_gpd,right_df=grid, how='left', predicate='within')
points_within.drop(['index_right'], axis=1, inplace=True)
points_within = gpd.sjoin(left_df= points_within, right_df=boundary, how='left', predicate='within')

In [7]:
points_within.columns

Index(['date', 'mobile_operator', 'network_mnc', 'longitude', 'latitude',
       'dl_throughput', 'ul_throughput', 'latency',
       'reference_signal_received_power', 'geometry', 'id', 'left', 'top',
       'right', 'bottom', 'geometry1', 'index_right', 'NO_PROV', 'NO_KAB',
       'NO_KEC', 'NO_DES', 'PROV', 'KAB_KOT', 'KEC', 'KEL_DES', 'ID', 'ID_Kec',
       'Kepadatan_Penduduk', 'Elektrifikasi', 'Aksesibilitas_Jalan',
       'Ekonomi_Desa', 'WEIGHT', 'Kabkota_no', 'Kab_kota', 'Prop_no',
       'Propinsi', 'Pendd', 'Kode_pos', 'POC', 'Luas_kab', 'Perimeter_Kab',
       'BPS_Kab_Kota', 'BPS_Provinsi', 'XL_Region'],
      dtype='object')

In [8]:
pivot = pd.pivot_table(points_within, index= ['id', 'KAB_KOT', 'KEC'],
                        columns=['mobile_operator'], values=['dl_throughput'], aggfunc = np.mean)

In [9]:
pivot.reset_index(inplace=True, col_level=1, allow_duplicates=False)
pivot = pivot.droplevel(level=0, axis=1)

In [10]:
# Methods-1 : replacing np.nan in the sample to 0 to increase number of grid covered

# pivot['XL Axiata'].replace(np.nan, 0, inplace=True)
# pivot['Indosat Ooredoo'].replace(np.nan, 0, inplace=True)

In [11]:
# Methods-2 : drop nan in the sample. The impact : qty of compared grid will decrease

pivot = pivot[~pivot['Indosat Ooredoo'].isnull() & ~pivot['XL Axiata'].isnull()]
pivot = pivot[(pivot['Indosat Ooredoo']) > 0 & (pivot['XL Axiata'] > 0)]
pivot['1-1-IOH-XL'] = 'Yes'

In [12]:
points_within = points_within.merge(pivot[['id', '1-1-IOH-XL']], how='left', on='id', suffixes=['_x'])
points_within = points_within[points_within['1-1-IOH-XL'] == 'Yes']

In [13]:
pivot['compare'] = pivot['XL Axiata'] - pivot['Indosat Ooredoo']

In [14]:
pivot = pivot.merge(grid[['id', 'geometry1']], how='left', on='id')

In [15]:
# grid.to_csv('hasil.csv')
pivot.to_csv('bogorbekasi_throughput_250x250_methods2.csv')

In [16]:
points_within.to_csv('bogorbekasi_throughput_processed_methods2.csv')

In [17]:
### Methods to count sample for each grid

pivot1 = pd.pivot_table(points_within, index= ['id', 'KAB_KOT', 'KEC'],
                        columns=['mobile_operator'], values=['reference_signal_received_power'], aggfunc = len)

pivot1.reset_index(inplace=True, col_level=1, allow_duplicates=False)
pivot1 = pivot1.droplevel(level=0, axis=1)

pivot1 = pivot1[~pivot1['Indosat Ooredoo'].isnull() & ~pivot1['XL Axiata'].isnull()]
pivot1 = pivot1[(pivot1['Indosat Ooredoo']) > 0 & (pivot1['XL Axiata'] > 0)]

pivot1.loc["Indosat Ooredoo"] = pivot1.sum()

In [18]:
pivot1

mobile_operator,id,KAB_KOT,KEC,Indosat Ooredoo,XL Axiata
0,2472.0,BOGOR,JASINGA,74.0,5.0
1,2845.0,BOGOR,JASINGA,1.0,2.0
2,5406.0,BOGOR,TENJO,15.0,7.0
3,5779.0,BOGOR,TENJO,16.0,13.0
4,5780.0,BOGOR,TENJO,7.0,8.0
...,...,...,...,...,...
10054,146714.0,BEKASI,PEBAYURAN,7.0,16.0
10055,147059.0,BEKASI,PEBAYURAN,12.0,7.0
10056,147087.0,BEKASI,PEBAYURAN,14.0,2.0
10057,147459.0,BEKASI,PEBAYURAN,107.0,6.0
