In [1]:
import json
import pandas as pd
import sqlalchemy
from datetime import datetime
import numpy as np

In [8]:
import time
import logging


# 64位ID的划分
WORKER_ID_BITS = 5
DATACENTER_ID_BITS = 5
SEQUENCE_BITS = 12

# 最大取值计算
MAX_WORKER_ID = -1 ^ (-1 << WORKER_ID_BITS)  # 2**5-1 0b11111
MAX_DATACENTER_ID = -1 ^ (-1 << DATACENTER_ID_BITS)

# 移位偏移计算
WOKER_ID_SHIFT = SEQUENCE_BITS
DATACENTER_ID_SHIFT = SEQUENCE_BITS + WORKER_ID_BITS
TIMESTAMP_LEFT_SHIFT = SEQUENCE_BITS + WORKER_ID_BITS + DATACENTER_ID_BITS

# 序号循环掩码
SEQUENCE_MASK = -1 ^ (-1 << SEQUENCE_BITS)

# Twitter元年时间戳
TWEPOCH = 1288834974657


logger = logging.getLogger('flask.app')


class IdWorker(object):
    """
    用于生成IDs
    """

    def __init__(self, datacenter_id, worker_id, sequence=0):
        """
        初始化
        :param datacenter_id: 数据中心（机器区域）ID
        :param worker_id: 机器ID
        :param sequence: 其实序号
        """
        # sanity check
        if worker_id > MAX_WORKER_ID or worker_id < 0:
            raise ValueError('worker_id值越界')

        if datacenter_id > MAX_DATACENTER_ID or datacenter_id < 0:
            raise ValueError('datacenter_id值越界')

        self.worker_id = worker_id
        self.datacenter_id = datacenter_id
        self.sequence = sequence

        self.last_timestamp = -1  # 上次计算的时间戳

    def _gen_timestamp(self):
        """
        生成整数时间戳
        :return:int timestamp
        """
        return int(time.time() * 1000)

    def get_id(self):
        """
        获取新ID
        :return:
        """
        timestamp = self._gen_timestamp()

        # 时钟回拨
        if timestamp < self.last_timestamp:
            logging.error('clock is moving backwards. Rejecting requests until {}'.format(self.last_timestamp))
            raise

        if timestamp == self.last_timestamp:
            self.sequence = (self.sequence + 1) & SEQUENCE_MASK
            if self.sequence == 0:
                timestamp = self._til_next_millis(self.last_timestamp)
        else:
            self.sequence = 0

        self.last_timestamp = timestamp

        new_id = ((timestamp - TWEPOCH) << TIMESTAMP_LEFT_SHIFT) | (self.datacenter_id << DATACENTER_ID_SHIFT) | \
                 (self.worker_id << WOKER_ID_SHIFT) | self.sequence
        return new_id

    def _til_next_millis(self, last_timestamp):
        """
        等到下一毫秒
        """
        timestamp = self._gen_timestamp()
        while timestamp <= last_timestamp:
            timestamp = self._gen_timestamp()
        return timestamp


class JsonEncoder(json.JSONEncoder):
    """Convert numpy classes to JSON serializable objects."""

    def default(self, obj):
        if isinstance(obj, (np.integer, np.floating, np.bool_)):
            return obj.item()
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(JsonEncoder, self).default(obj)

In [3]:
class_engine = sqlalchemy.create_engine('mysql+pymysql://zhangsihao:x8FWKH0YgTqc7UPt@54.219.241.164:14000/basicai_annotation')
result_engine = sqlalchemy.create_engine('mysql+pymysql://basicai:mP3L0S93@nlb.alidev.beisai.com:4000/basicai_dataset')
target_engine = sqlalchemy.create_engine('mysql+pymysql://zhangsihao:x8FWKH0YgTqc7UPt@54.219.241.164:14000/basicai_dataset')

In [6]:
dataset_class_sql='''
select * from dataset_class where team_id=270077
'''
df_class = pd.read_sql(dataset_class_sql,class_engine)

In [7]:
sorted(list(set(df_class['dataset_id'])))

[270267,
 270271,
 330271,
 330272,
 330273,
 330274,
 330275,
 330276,
 330277,
 330278,
 330279,
 330280,
 330281,
 330282,
 330284,
 330285,
 330286,
 330287,
 330288,
 330289,
 330290,
 330291,
 330292,
 330293,
 330294,
 330295,
 330296,
 360226,
 360227,
 360228,
 360229,
 360230,
 360231,
 360232,
 360233,
 360234,
 360235,
 360236,
 360237,
 360238,
 360239,
 360240,
 360241,
 360242,
 360244,
 360245,
 360246,
 360247,
 360248,
 360249,
 360250,
 360303,
 360464]

In [52]:
file = r"D:\倍赛\data_ids.json"
j_data = {
    "ids": sorted(list(set(df_class['dataset_id'])))[296:]
}
with open(file, 'w', encoding='utf-8') as idf:
    json.dump(j_data, idf)

In [30]:
dataset_result_sql='''
select * from data_annotation_result where team_id=90030
'''
df_result = pd.read_sql(dataset_result_sql,result_engine)

In [31]:
df_result

Unnamed: 0,id,team_id,dataset_id,data_id,version,source_type,source_id,validity,classification_values,objects,created_at,created_by,updated_at,updated_by


In [34]:
class_name_id_mapping = {}
name_attr_mapping = {}
for x in df_class.iloc:
    class_name = x['name']
    class_id = x['id']
    class_name_id_mapping[class_name] = str(class_id)
    class_atts = json.loads(x['attributes'])
    attr_id_mapping = {}
    for att in class_atts:
        name = att['name']
        id = att['id']
        attr_id_mapping[name] = id
    name_attr_mapping[class_name] = attr_id_mapping

In [35]:
class_name_id_mapping

{'Car': '201385',
 'Van': '201386',
 'Truck': '201387',
 'bus': '204514',
 'car': '411783',
 'person': '381867',
 'motorcycle': '411784'}

In [36]:
name_attr_mapping

{'Car': {'Occlusion': 'f359e99e-d98b-4306-8639-22a65ed2eab0',
  'Confidence': '64b4b2ad-c83f-4f7f-98bc-8627c0bd2cbe'},
 'Van': {'Occlusion': 'dcd302ef-2b88-492b-954f-88406067caa0',
  'Confidence': '624289f6-e260-41c8-b052-581b885020b2'},
 'Truck': {'Occlusion': '9c83bdc1-8014-490a-a4f0-65e798783529',
  'Confidence': 'b1ef630c-295e-4024-b88e-bf5d02e76bba'},
 'bus': {'a': '8feeac7b-18ad-44b7-91b9-c9e174069100'},
 'car': {'occlusion': '801f3717-3553-4a6f-a342-13bc7fc389aa'},
 'person': {},
 'motorcycle': {'occlusion': 'c3a3b2fa-e6c8-44d8-bc19-087e2478a8e8'}}

In [10]:
worker = IdWorker(1, 2, 0)
dat = [{
    "id": None,
    "team_id": 120231,
    "serial_number": worker.get_id(),
    "file_url": None,
    "file_name": None,
    "error_message": None,
    "total_file_size": None,
    "downloaded_file_size": None,
    "total_data_num": None,
    "parsed_data_num": None,
    "status": None,
    "created_at": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    "created_by": None,
    "updated_at": None,
    "updated_by": None
}]
data = pd.DataFrame(dat)

data.to_sql('upload_record', con=target_engine,index=False,if_exists='append')

In [None]:
dataset_result_source_write = {
    "id": None,
    "team_id": [120231],
    "dataset_id": [990007],
    "result_name": ['zsh_test_fusion'],
    "source_id": [600003],
    "source_type": ['EXTERNAL_GROUND_TRUTH'],
    "created_at": [datetime.now().strftime('%Y-%m-%d %H:%M:%S')],
    "created_by": [600002],
    "updated_at": None,
    "updated_by": None
}
data_result_source = pd.DataFrame(dataset_result_source_write)

data_result_source.to_sql('dataset_result_source', con=engine,index=False,if_exists='append')