In [51]:
import pandas as pd
import numpy as np
import json
import time
import csv
import os

需要对主流向的数据进行坐标上的转换

坐标转换的函数如下

In [52]:
import math
x_pi = 3.14159265358979324 * 3000.0 / 180.0
pi = 3.1415926535897932384626  # π
a = 6378245.0  # 长半轴
ee = 0.00669342162296594323  # 偏心率平方

def wgs84_to_gcj02(lng, lat):
    """
    WGS84转GCJ02(火星坐标系)
    :param lng:WGS84坐标系的经度
    :param lat:WGS84坐标系的纬度
    :return:
    """
    if out_of_china(lng, lat):  # 判断是否在国内
        return [lng, lat]
    dlat = _transformlat(lng - 105.0, lat - 35.0)
    dlng = _transformlng(lng - 105.0, lat - 35.0)
    radlat = lat / 180.0 * pi
    magic = math.sin(radlat)
    magic = 1 - ee * magic * magic
    sqrtmagic = math.sqrt(magic)
    dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
    dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
    mglat = lat + dlat
    mglng = lng + dlng
    return [mglng, mglat]


def out_of_china(lng, lat):
    """
    判断是否在国内，不在国内不做偏移
    :param lng:
    :param lat:
    :return:
    """
    return not (lng > 73.66 and lng < 135.05 and lat > 3.86 and lat < 53.55)

def _transformlat(lng, lat):
    ret = -100.0 + 2.0 * lng + 3.0 * lat + 0.2 * lat * lat + \
          0.1 * lng * lat + 0.2 * math.sqrt(math.fabs(lng))
    ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
            math.sin(2.0 * lng * pi)) * 2.0 / 3.0
    ret += (20.0 * math.sin(lat * pi) + 40.0 *
            math.sin(lat / 3.0 * pi)) * 2.0 / 3.0
    ret += (160.0 * math.sin(lat / 12.0 * pi) + 320 *
            math.sin(lat * pi / 30.0)) * 2.0 / 3.0
    return ret


def _transformlng(lng, lat):
    ret = 300.0 + lng + 2.0 * lat + 0.1 * lng * lng + \
          0.1 * lng * lat + 0.1 * math.sqrt(math.fabs(lng))
    ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
            math.sin(2.0 * lng * pi)) * 2.0 / 3.0
    ret += (20.0 * math.sin(lng * pi) + 40.0 *
            math.sin(lng / 3.0 * pi)) * 2.0 / 3.0
    ret += (150.0 * math.sin(lng / 12.0 * pi) + 300.0 *
            math.sin(lng / 30.0 * pi)) * 2.0 / 3.0
    return ret

将数据弄成四个字段

routeTime route longitude latitude  

总体代码

In [72]:
def write_file(date, data):
    """
    date: 日期， 格式为20170201
    data: 一个字典，依次是，例如['2017-02-01', 0, lng, lat]
    """
    with open('csv/' + date + '.csv', 'a') as csvfile:
        fieldnames = ['routeTime','route','longitude', 'latitude']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writerow(data)

In [77]:
filenames = os.listdir('res')
pattern = re.compile('\d{4}\d{2}\d{2}')
days = [re.findall(pattern, filename)[0] for filename in file_names]
pro_days = [day[:4]+'-'+day[4:6]+'-'+day[6:] for day in days]
for i in range(len(filenames)):
    file = json.load(open('res/' + filenames[i]), encoding='gbk')
    routeTime = pro_days[i]
    # 下三行负责加个表头
    with open('csv/' + days[i] + '.csv', 'w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['routeTime', 'route', 'longitude', 'latitude'])
    # 将数据转换成csv格式
    for route in list(file.keys()):
        lngs_lats = file[route]
        for lng_lat in lngs_lats:
            lng, lat = lng_lat
            lng, lat = wgs84_to_gcj02(lng, lat)  # 顺便将GPS转火星坐标系
            data = {'routeTime': routeTime, 'route': route, 'longitude': lng, 'latitude': lat}
            write_file(days[i], data)

In [88]:
yohu = os.listdir('csv')
for file in yohu:
    df = pd.read_csv('csv/' + file)
    print(str(file) + str(dict(df.route.value_counts())))

20170201.csv{0: 2577, 2: 802, 4: 511, 3: 209, 1: 107}
20170202.csv{0: 3577, 1: 423, 3: 92, 2: 82}
20170203.csv{0: 3424, 3: 410, 1: 235, 2: 118}
20170204.csv{0: 6158, 4: 440, 2: 431, 3: 253, 6: 252, 1: 35, 7: 7, 5: 1}
20170205.csv{0: 4185, 1: 153, 3: 61, 4: 43, 2: 30, 5: 3}
20170206.csv{0: 5450, 1: 580, 4: 188, 2: 101}
20170207.csv{0: 5144, 3: 537, 4: 395, 2: 294, 1: 273, 5: 231}
20170208.csv{0: 3352, 1: 66, 2: 56}
20170209.csv{0: 3522, 1: 367, 2: 144, 3: 108}
20170210.csv{0: 3895, 2: 465, 3: 401, 1: 367}
20170211.csv{0: 3311, 1: 416, 2: 153, 3: 68}
20170212.csv{0: 4651, 1: 486, 3: 315, 5: 144, 2: 97}
20170213.csv{0: 3972, 1: 369, 3: 272, 2: 158}
20170214.csv{0: 3720, 2: 84, 4: 73}
20170215.csv{0: 3835, 1: 535, 2: 262, 3: 126}
20170216.csv{0: 3639, 1: 1472, 3: 459, 2: 239}
20170217.csv{0: 2980, 1: 430, 2: 177, 4: 80, 3: 50}
20170218.csv{0: 2183, 3: 257}
20170219.csv{0: 3784, 1: 373, 4: 222}
20170220.csv{0: 3134, 3: 341, 1: 246, 4: 125}
20170221.csv{0: 3103, 1: 584}
20170222.csv{0: 3518,