# 把網路圖型資料與地點位置結合

In [1]:
%matplotlib inline

In [2]:
import pandas as pd

## 1. 把資料放到 pandas 中

In [158]:
df = pd.read_csv("./../../small_set/TTS1.csv", header=None , encoding="big5")

In [159]:
df.columns = ["Status_code", "Mail_num", "Mail_date", 
             "Mail_time", "OP_office", "other"]

In [160]:
df['Mail_datetime'] = df.Mail_date+" "+df.Mail_time
df.Mail_datetime = pd.to_datetime(df.Mail_datetime)

In [161]:
del df['Mail_date']
del df['Mail_time']
del df['other']

In [183]:
mail_df = df.sort_values(["Mail_num", "Mail_datetime"])

## 2. indexing

In [134]:
df.set_index(["Mail_num", "OP_office"], inplace=True)

In [136]:
df.sort_values(["Mail_num", "Mail_datetime"])

Unnamed: 0_level_0,Unnamed: 1_level_0,Status_code,Mail_datetime
Mail_num,OP_office,Unnamed: 2_level_1,Unnamed: 3_level_1
00000000000000,330031,Y4,2018-01-01 09:49:04
00000000000000,330031,Y4,2018-01-01 09:58:08
00000000000000,330031,I4,2018-01-01 14:11:51
00000000000000,330031,Y4,2018-01-01 14:23:32
00000000000000,330031,I4,2018-01-01 14:52:09
00000000000000,330031,I4,2018-01-01 16:15:52
00000000000000,900030,Y4,2018-01-02 08:23:15
00000000000000,500055,Y4,2018-01-02 08:41:51
00000000000000,100029,G2,2018-01-02 09:20:00
00000000000000,900030,I4,2018-01-02 14:06:24


In [152]:
df.reset_index()

Unnamed: 0,Mail_num,OP_office,Status_code,Mail_datetime
0,00000000000000,330031,Y4,2018-01-01 09:49:04
1,00000000000000,330031,Y4,2018-01-01 09:58:08
2,00000000000000,330031,I4,2018-01-01 14:11:51
3,00000000000000,330031,Y4,2018-01-01 14:23:32
4,00000000000000,330031,I4,2018-01-01 14:52:09
5,00000000000000,330031,I4,2018-01-01 16:15:52
6,58668700100170,100250,Y4,2018-01-01 11:06:44
7,59928400100170,220014,I3,2018-01-01 21:10:45
8,75233300100170,704583,P5,2018-01-01 05:30:09
9,19491400101070,320008,A3,2018-01-01 21:40:46


## 3. 載入郵局地點資訊

In [102]:
post_office = pd.read_csv("../Post_All_new.csv")

In [103]:
new_po = post_office[["郵務局號", "局名", "緯度", "經度"]]

In [104]:
new_po.columns=["post_code", "name", "lon", "lat"]

In [105]:
po_dict = new_po.set_index("post_code").to_dict()

In [227]:
def getPOInfo(post_code):
    if isPOCode(post_code):
        return (po_dict['name'][post_code],
                po_dict['lon'][post_code],
                po_dict['lat'][post_code] )
    else:
        return ("%s"%(post_code), "", "")

In [112]:
unique_po_code = set(po_dict['name'].keys())
def isPOCode(post_code):
    if post_code in unique_po_code:
        return True
    else:
        return False

In [229]:
getPOInfo(420584)

('420584', '', '')

## 4. 把郵件資料的郵號跟地點整合

In [189]:
all_mail = {}
all_mail_key = set()
for idx, row in df.iterrows():
    mail_code = row.Mail_num.strip()
    if not mail_code in all_mail_key:
        all_mail[mail_code] = []
        all_mail_key.add(mail_code)
        
    all_mail[mail_code].append( (row.OP_office, row.Mail_datetime, row.Status_code) )

In [194]:
def convert_2_edge(mail_status):
    edges = []
    for idx in range(len(mail_status)-1):
        edges.append( ( mail_status[idx][0], mail_status[idx+1][0], 
                      mail_status[idx][2], mail_status[idx+1][2]))
        
    return edges
# "Y4" -> "H4", 
# "H4" -> "Z2"

mail_status = all_mail['96410700000070']
convert_2_edge(mail_status)

[(540028, 540028, 'Y4', 'Y4'),
 (540028, 540028, 'Y4', 'I4'),
 (540028, 540028, 'I4', 'I4')]

In [195]:
all_edges = [] 
for mail_code in all_mail:
    status_num = len(all_mail[mail_code])
    
    if (status_num) > 1:
        mail_status = all_mail[mail_code]
        all_edges.extend(convert_2_edge(mail_status))

In [205]:
src_nodes = set([ ele[0] for ele in all_edges])
tar_nodes = set([ ele[1] for ele in all_edges]) 
all_nodes = list(src_nodes.union(tar_nodes))

In [206]:
print(len(src_nodes), len(tar_nodes), len(all_nodes))

358 312 385


In [210]:
getPOInfo(all_nodes[12])

('竹東下公館郵局', 24.72617, 121.09508799999999)

In [225]:
src, target, src_state, target_state = all_edges[16]

In [228]:
getPOInfo(src), getPOInfo(target), src_state, target_state

(('220600', '', ''), ('220600', '', ''), 'Y4', 'I4')

In [295]:
import simplekml
kml = simplekml.Kml()

In [298]:
import collections

In [299]:
cnt = collections.Counter(all_edges).most_common(100000)

In [312]:
for (x, val) in cnt:
    
    src, target, src_state, target_state = x
    if target_state == 'I4':
        continue
    print(x, val)
    if isPOCode(src) and isPOCode(target) and not src==target:
       
        src_pnt = getPOInfo(src)
        tar_pnt = getPOInfo(target)
        
#         # point
#         kml.newpoint(name=str(src), 
#                      description=src_pnt[0],
#                      coords=[(src_pnt[2],src_pnt[1])])
#         kml.newpoint(name=str(target), 
#                      description=tar_pnt[0],
#                      coords=[(tar_pnt[2],tar_pnt[1])])
        
        
        # line
        kml.newlinestring(
            name="%s to %s"%(src,target), 
            description="%s to %s"%(src_pnt[0],tar_pnt[0]) ,
            coords=[(src_pnt[2],src_pnt[1]), 
                    (tar_pnt[2],tar_pnt[1])])

(400003, 400672, 'A1', 'Z4') 183
(100012, 91813, 'A1', 'Z4') 95
(400008, 400672, 'A1', 'Z4') 80
(400006, 400672, 'A1', 'Z4') 78
(830000, 800672, 'A1', 'Z4') 73
(704586, 704586, 'Y4', 'H4') 65
(91814, 100587, 'Z4', 'Y4') 50
(400009, 400672, 'A1', 'Z4') 45
(704584, 704584, 'Y4', 'H4') 27
(100581, 100581, 'Y7', 'I7') 25
(800007, 800672, 'A1', 'Z4') 21
(500027, 500027, 'H4', 'W2') 20
(800451, 800672, 'A1', 'Z4') 17
(704585, 704585, 'Y4', 'H4') 17
(800070, 800672, 'A1', 'Z4') 16
(400008, 400008, 'Z1', 'G1') 15
(400008, 400008, 'G1', 'I1') 15
(704586, 704586, 'H4', 'Z4') 14
(300450, 400672, 'A1', 'Z4') 14
(704586, 704586, 'X2', 'H4') 13
(100010, 91813, 'A1', 'Z4') 12
(100591, 100591, 'Y4', 'H4') 11
(704600, 704600, 'Y4', 'H4') 10
(100589, 100589, 'Y4', 'H4') 9
(830038, 800672, 'A1', 'Z4') 9
(950010, 950010, 'A1', 'Z4') 9
(730039, 730039, 'Y4', 'H4') 9
(100599, 100599, 'Y4', 'H4') 8
(830016, 800672, 'A1', 'Z4') 8
(220600, 220600, 'Y4', 'H4') 8
(800054, 800672, 'A1', 'Z4') 8
(950580, 950580, '

In [297]:
kml.save("test.kml")