# 功能整合

类`ChangEDataManager`，包含以下功能：

- 本地数据文件夹设置、数据文件数统计、定制条件查询
- 远程数据查询和下载
- 载入pds文件并处理为图像数据（numpy？cv2？pyqt？）
- 将图像数据保存为PNG，可调缩放

还未考虑好的功能：

- 双目图像自动查找并返回图像数据
- 同批次图像自动查找并镶嵌拼合
- 全景图巡视

`ChangEDataManager`是用来在后端处理数据的类，前端的`ChangEViewer`将请求发来之后，`ChangEDataManager`会将处理之后的数据（字符串、列表或图像数据）返回给`ChangEViewer`


## 1. `ChangEDataManager`

由于嫦娥公开数据文件数量众多，文件名超长，所以这里专门设计了一个类`ChangEDataManager`，将数据的远程查询、下载、缓存、及本地标签查询等功能集成在一起，便于管理和访问。

In [3]:
import os
import re
import requests
import time
import atool

class ChangEDataManager:
    def __init__(self, path='./data'):
        """
        初始化内部变量，扫描数据文件夹，并现实初始化信息
        """
        self.__cache_path = path
        self.__data = []
        self.info()
        pass

    def scan_cache(self, with_hidden=False):
        """
        扫描数据文件夹，将文件名保存下来，并返回文件数量
        """        
        self.__data = []
        for f in os.listdir(self.__cache_path):
            if with_hidden == False and f[0] == '.':
                pass
            else:
                self.__data.append(f)
        count = len(self.__data)
        return count
    
    def info(self):
        print('嫦娥数据管理器 v0.2')
        print('数据文件目录 : %s' % (self.__cache_path))
        print('数据文件数目 ：%d' % (self.scan_cache()))
        #for f in os.listdir(self.__cache):
        #    print(f)
        pass
    
    def get_ChangE_download_link(self, pageNo, pageSize, task_id='CE4', data_type='SCI', payload='PCAM', data_grade='2B', date_start='default', date_end='today'):
        """
        通过requests.get()发起远程查询，说明详见“01下载数据”。尽量用query_remote()调用，而不是直接调用本函数。
        """
        # 默认情况下，根据任务设置查询的起始时间
        if date_start == 'default':
            if task_id == 'CE5':
                date_start = '2020-12-01 00:00:00'
            if task_id == 'CE4':
                date_start = '2019-01-04 00:00:00'
            if task_id == 'CE3':
                date_start = '2009-12-31 00:00:00'
            if task_id == 'CE2':
                date_start = '2010-10-01 00:00:00'
            if task_id == 'CE1':
                date_start = '2007-10-24 00:00:00'

        # 默认情况下，设置查询的结束时间为当天
        if date_end == 'today':
            date_end = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

        # 组装参数
        url = 'https://' + 'moon.bao.ac.cn' + '/proSearch!proSearchList.search'
        inputJsonString = '{"globalLang":null,"DM_DTYPE":"%s","DM_TASKID":"%s","DM_ZHNAME":"%s","DM_GRADE":"%s","DM_CBTIMEStart":"%s","DM_CBTIMEend":"%s","order_by":"desc","qsList":"","qsStart":"","qsEnd":"","bianhao":""}' % (data_type, task_id, payload, data_grade, date_start, date_end)
        param = {
            '_dc': '1615623500055', 
            'pageNo': str(pageNo), 
            'pageSize': str(pageSize), 
            'inputJsonString': inputJsonString, 
            'page': str(pageNo), 
            'start': str((pageNo-1)*pageSize), 
            'limit': str(pageSize)}

        # 访问网站，查询
        r = requests.get(url, param)
        if r.ok:
            link = []
            link.append(r.json()['totalCount'])
            for data in r.json()['returndata']:
                # print(atool.pretty(data))
                # 真实的下载链接
                link.append(data['dmStorepath'].replace('/PUBDATA/', 'http://moon.bao.ac.cn/cedownload/'))

            return link
        else:
            return None
    
    def query_remote(self, qtask='CE4', qseq='*', qbatch='*', qcolor='*', qsite='*', qpayload='*', qdata_type='*', qdata_grade='*', qext='*'):
        """
        get_ChangE_download_link()函数的套壳
        """
        # 处理各参数
        task_id = '' if qtask == '*' else qtask
        data_type = '' if qdata_type == '*' else qdata_type
        payload = '' if qpayload == '*' else qpayload
        data_grade = '' if qdata_grade == '*' else qdata_grade
            
        # 先查第0页，获得数据总条目
        r = get_ChangE_download_link(0, 10, task_id, data_type, payload, data_grade, date_start, date_end)
        if r == None:
            return r
        # 获取数据总条目
        totalcount = int(r[0])
        print(totalcount)
        
        # TODO:
        # 多次查询将文件名和下载链接提取出来
        # 用其他的查询条件去除多余的结果
        # 返回列表
        pass
        
    def query_local(self, qtask='CE4', qseq='*', qbatch='*', qcolor='*', qsite='*', qpayload='*', qdata_type='*', qdata_grade='*', qext='*'):
        """
        发起一次本地查询，遍历self.__data中保存的所有文件名，返回符合查询条件的文件名
        """
        def check_tag(filename, tags):
            # 数据文件名样式有如下几种：
            # CE4_GRAS_PCAML-C-000_SCI_N_20200227111928_20200227111928_0121_B.2BL
            # 匹配样式：(.*)_(.*)_(.*)-(.*)-(.*)_(.*)_(.*)_([0-9]*)_([0-9]*)_([0-9]{4})_(.*)\.(.*)
            pattern = []
            pattern.append(r'(.*)_(.*)_(.*)-(.*)-(.*)_(.*)_(.*)_([0-9]*)_([0-9]*)_([0-9]{4})_(.*)\.(.*)')
            # CE4_GRAS_ASAN-SCI_SCI_N_20190111150201_20190112102100_0003_B.2B
            # 匹配样式：(.*)_(.*)_(.*)-(.*)_(.*)_(.*)_([0-9]*)_([0-9]*)_([0-9]{4})_(.*)\.(.*)
            pattern.append(r'(.*)_(.*)_(.*)-(.*)_(.*)_(.*)_([0-9]*)_([0-9]*)_([0-9]{4})_(.*)\.(.*)')
            
            match_obj = re.match(pattern[0], filename)
            match_obj2= re.match(pattern[1], filename)
            # 提取出文件名中的各种标签
            if match_obj != None:
                task = match_obj.group(1) # 任务
                site = match_obj.group(2) # 站点
                payload = match_obj.group(3) # 载荷，如果是PCAM的话，还需要指出是左眼还是右眼
                color = match_obj.group(4) # C、Q
                seq = match_obj.group(5) # 序号
                data_type = match_obj.group(6) # 数据类型
                unknown = match_obj.group(7) # 未知
                start_time = match_obj.group(8) # 起时间
                end_time = match_obj.group(9) # 止时间
                batch = match_obj.group(10) # 序号？
                data_grade = match_obj.group(11) # 数据等级
                ext = match_obj.group(12) # 扩展名
            elif match_obj2 != None:
                task = match_obj2.group(1) # 任务
                site = match_obj2.group(2) # 站点
                payload = match_obj2.group(3) # 载荷，如果是PCAM的话，还需要指出是左眼还是右眼
                color = 'N/A' # C、Q
                seq = '-1' # 序号
                data_type = match_obj2.group(5) # 数据类型
                unknown = match_obj2.group(6) # 未知
                start_time = match_obj2.group(7) # 起时间
                end_time = match_obj2.group(8) # 止时间
                batch = match_obj2.group(9) # 序号？
                data_grade = match_obj2.group(10) # 数据等级
                ext = match_obj2.group(11) # 扩展名
            else:
                return None
                
            ftags = [task, seq, batch, color, site, payload, data_type, data_grade, ext]
            #print(ftags)
                
            # 逐个检查标签是否匹配
            checklist = []
            for i in range(len(tags)):
                t = tags[i]
                f = ftags[i]
                if t == '*':
                    checklist.append(True)
                elif t == f:
                    checklist.append(True)
                else:
                    checklist.append(False)
            return checklist
        
        result = []

        # 依次判断文件名是否满足查询
        for f in self.__data:
            checked = True
            checklist = check_tag(f, [qtask, qseq, qbatch, qcolor, qsite, qpayload, qdata_type, qdata_grade, qext])
            #print(f, checklist)
            if checklist == None:
                print('注意：文件名格式无法匹配 %s' % (f))
            else:
                for c in checklist:
                    if c == False:
                        checked = False
                        break
                if checked == True:
                    result.append(f)
        
        return result
    
    def query(self, qtask='CE4', qseq='*', qbatch='*', qcolor='*', qsite='*', qpayload='*', qdata_type='*', qdata_grade='*', qext='*'):
        """
        统一查询。先查本地，再查远程。本地标注已缓存，远程标注下载链接
        """
        local_list = self.query_local(qtask, qseq, qbatch, qcolor, qsite, qpayload, qdata_type, qdata_grade, qext)
        remote_list= self.query_remote(qtask, qseq, qbatch, qcolor, qsite, qpayload, qdata_type, qdata_grade, qext)
        query_list=[]
        for f in remote_list:
            if f in local_list:
                query_list.append(['c', f])
            else:
                query_list.append(['download_url', f])
        print(atool.pretty(query_list))
        
        

__发起本地查询__

创建一个`ChangEDataBase`，参数是保存数据文件的目录。然后用`db.query()`发起查询即可。

In [49]:
change_manager = ChangEDataManager(path='./data')
q = change_manager.query_local(qtask='CE4', qpayload='PCAML', qcolor='C', qbatch='*', qseq='*', qext='2BL')
print('查询命中数量：%d\n' % (len(q)))
print(atool.pretty(sorted(q)))

嫦娥数据管理器 v0.2
数据文件目录 : ./data
数据文件数目 ：14459
查询命中数量：290

[
    CE4_GRAS_PCAML-C-000_SCI_N_20190104041518_20190104041518_0001_B.2BL
    CE4_GRAS_PCAML-C-000_SCI_N_20190110211129_20190110211129_0002_B.2BL
    CE4_GRAS_PCAML-C-000_SCI_N_20190112100836_20190112100836_0003_B.2BL
    CE4_GRAS_PCAML-C-000_SCI_N_20190208204547_20190208204547_0007_B.2BL
    CE4_GRAS_PCAML-C-000_SCI_N_20190228081519_20190228081519_0011_B.2BL
    CE4_GRAS_PCAML-C-000_SCI_N_20190429143738_20190429143738_0033_B.2BL
    CE4_GRAS_PCAML-C-000_SCI_N_20190508023258_20190508023258_0035_B.2BL
    CE4_GRAS_PCAML-C-000_SCI_N_20190510100115_20190510100115_0038_B.2BL
    CE4_GRAS_PCAML-C-000_SCI_N_20190708043358_20190708043358_0048_B.2BL
    CE4_GRAS_PCAML-C-000_SCI_N_20190729011859_20190729011859_0050_B.2BL
    CE4_GRAS_PCAML-C-000_SCI_N_20190806113452_20190806113452_0053_B.2BL
    CE4_GRAS_PCAML-C-000_SCI_N_20190826032641_20190826032641_0056_B.2BL
    CE4_GRAS_PCAML-C-000_SCI_N_20190827051956_20190827051956_0059_B.2BL
    CE4

In [50]:
# 将查询结果写入文件
f = open('query.txt', 'w')
for i in q:
    f.write('%s\n' % (i))
f.close()

## 2. 能否将本地查询和远程查询功能整合在一起

比如，查询参数为：

- 嫦娥4
- PCAML
- C

那么查询过程是这样：

- 先发起本地查询，获取本地文件名列表local_list
- 然后发起远程查询，获取远程文件名列表remote_list
- 将两个list合并，都存在的，标注“已缓存”，不存在的，标注下载地址，形成新的列表query_list



In [24]:
local_list = [
    'CE4_GRAS_PCAML-C-003_SCI_N_20200320092105_20200320092105_0123_B.2BL',
    'CE4_GRAS_PCAML-C-004_SCI_N_20200320092234_20200320092234_0123_B.2BL',
    'CE4_GRAS_PCAML-C-005_SCI_N_20200320092403_20200320092403_0123_B.2BL',
    'CE4_GRAS_PCAML-C-006_SCI_N_20200320092758_20200320092758_0123_B.2BL',
    'CE4_GRAS_PCAML-C-009_SCI_N_20200320093225_20200320093225_0123_B.2BL',
    'CE4_GRAS_PCAML-C-010_SCI_N_20200320093354_20200320093354_0123_B.2BL',
    'CE4_GRAS_PCAML-C-011_SCI_N_20200320093523_20200320093523_0123_B.2BL'
]
remote_list = [
    'CE4_GRAS_PCAML-C-000_SCI_N_20200320091638_20200320091638_0123_B.2BL',
    'CE4_GRAS_PCAML-C-001_SCI_N_20200320091807_20200320091807_0123_B.2BL',
    'CE4_GRAS_PCAML-C-002_SCI_N_20200320091936_20200320091936_0123_B.2BL',
    'CE4_GRAS_PCAML-C-003_SCI_N_20200320092105_20200320092105_0123_B.2BL',
    'CE4_GRAS_PCAML-C-004_SCI_N_20200320092234_20200320092234_0123_B.2BL',
    'CE4_GRAS_PCAML-C-005_SCI_N_20200320092403_20200320092403_0123_B.2BL',
    'CE4_GRAS_PCAML-C-006_SCI_N_20200320092758_20200320092758_0123_B.2BL',
    'CE4_GRAS_PCAML-C-007_SCI_N_20200320092927_20200320092927_0123_B.2BL',
    'CE4_GRAS_PCAML-C-008_SCI_N_20200320093056_20200320093056_0123_B.2BL',
    'CE4_GRAS_PCAML-C-009_SCI_N_20200320093225_20200320093225_0123_B.2BL',
    'CE4_GRAS_PCAML-C-010_SCI_N_20200320093354_20200320093354_0123_B.2BL',
    'CE4_GRAS_PCAML-C-011_SCI_N_20200320093523_20200320093523_0123_B.2BL'
]

query_list=[]
for f in remote_list:
    if f in local_list:
        query_list.append(['已缓存', f])
    else:
        query_list.append(['', f])
print(atool.pretty(query_list))

[
    [
        
        CE4_GRAS_PCAML-C-000_SCI_N_20200320091638_20200320091638_0123_B.2BL

    ]
    [
        
        CE4_GRAS_PCAML-C-001_SCI_N_20200320091807_20200320091807_0123_B.2BL

    ]
    [
        
        CE4_GRAS_PCAML-C-002_SCI_N_20200320091936_20200320091936_0123_B.2BL

    ]
    [
        已缓存
        CE4_GRAS_PCAML-C-003_SCI_N_20200320092105_20200320092105_0123_B.2BL

    ]
    [
        已缓存
        CE4_GRAS_PCAML-C-004_SCI_N_20200320092234_20200320092234_0123_B.2BL

    ]
    [
        已缓存
        CE4_GRAS_PCAML-C-005_SCI_N_20200320092403_20200320092403_0123_B.2BL

    ]
    [
        已缓存
        CE4_GRAS_PCAML-C-006_SCI_N_20200320092758_20200320092758_0123_B.2BL

    ]
    [
        
        CE4_GRAS_PCAML-C-007_SCI_N_20200320092927_20200320092927_0123_B.2BL

    ]
    [
        
        CE4_GRAS_PCAML-C-008_SCI_N_20200320093056_20200320093056_0123_B.2BL

    ]
    [
        已缓存
        CE4_GRAS_PCAML-C-009_SCI_N_20200320093225_20200320093225_0123_B.2BL

    ]
   

In [27]:
qtask='*ab'
task_id = '' if qtask == '*' else qtask
print(task_id)

*ab
