In [1]:
import os
import datetime

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
def cartesian_product_transpose(*arrays):
    broadcastable = np.ix_(*arrays)
    broadcasted = np.broadcast_arrays(*broadcastable)
    rows, cols = np.prod(broadcasted[0].shape), len(broadcasted)
    dtype = np.result_type(*arrays)

    out = np.empty(rows * cols, dtype=dtype)
    start, end = 0, rows
    for a in broadcasted:
        out[start:end] = a.reshape(-1)
        start, end = end, end + rows
    return out.reshape(cols, rows).T

def generate_weight(min_weight=0.1, interval=0.1, weight_num=5, column_names=None):
    arrays = [np.arange(min_weight, 1+interval, interval)] * weight_num
    all_weight = cartesian_product_transpose(*arrays)
    all_weight_c_sum = all_weight.sum(1)
    sel_weight = all_weight[all_weight_c_sum==1]
    if column_names is None:
        column_names = [f'w{i+1}' for i in range(weight_num)]
    data = pd.DataFrame(sel_weight, columns=column_names, dtype=np.float32)
    index = data.mul(10).astype(int).astype(str).apply(lambda x: '_'.join(x), axis=1)
    data.index=index
    return data

In [3]:
data = pd.read_csv('../data/NWP.csv', parse_dates=['gtime', 'time'], index_col=['sta', 'gtime', 'ltime'])[['pm25', 'rh2', 't2', 'u10', 'v10']]
data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pm25,rh2,t2,u10,v10
sta,gtime,ltime,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1008A,2016-01-01 20:00:00,0,172.9,84.4,269.2,-0.3,-0.2
1008A,2016-01-01 20:00:00,1,183.4,82.0,269.8,-0.2,-0.8
1008A,2016-01-01 20:00:00,2,191.1,78.7,270.3,-0.2,-1.1
1008A,2016-01-01 20:00:00,3,121.7,68.6,271.2,-0.1,-0.8
1008A,2016-01-01 20:00:00,4,47.1,61.0,272.1,-1.2,-2.1


In [4]:
test_date = '2019-01-01 20:00'
train = data.query('gtime<@test_date')
test = data.query('gtime==@test_date')
train.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pm25,rh2,t2,u10,v10
sta,gtime,ltime,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1008A,2016-01-01 20:00:00,0,172.9,84.4,269.2,-0.3,-0.2
1008A,2016-01-01 20:00:00,1,183.4,82.0,269.8,-0.2,-0.8
1008A,2016-01-01 20:00:00,2,191.1,78.7,270.3,-0.2,-1.1
1008A,2016-01-01 20:00:00,3,121.7,68.6,271.2,-0.1,-0.8
1008A,2016-01-01 20:00:00,4,47.1,61.0,272.1,-1.2,-2.1


# 变量权重

In [7]:
train.index.levels.sta

AttributeError: 'FrozenList' object has no attribute 'sta'

In [85]:
def calc_weight_step_one(train:pd.DataFrame, test:pd.DataFrame, vweight=None):
#      assume coords order : sta gtime vweight ltime
    nsta = train.index.unique('sta').size
    ngtime = train.index.unique('gtime').size
    nltime = train.index.unique('ltime').size
    
    train_index = train.index
    train_columns=train.columns
    ncolumns=train_columns.size
    
    if vweight is None:
        vweight = pd.DataFrame(np.ones((1,ncolumns)), 
                               columns=train_columns,
                               index=['_'.join(['1'] * ncolumns)])
        vweight.index.name='vweight'
    nweight = vweight.index.unique('vweight').size
    
    train_np = train.to_numpy().reshape(nsta, ngtime, nltime, ncolumns)
    test_np = test.to_numpy().reshape(nsta, 1, nltime, ncolumns)
    distance = pd.DataFrame(
        (train_np - test_np).reshape(-1, ncolumns), 
        index=train_index,
        columns=train_columns
    )
    return distance

def calc_train_test_delta(train:pd.DataFrame, test:pd.DataFrame):
    nsta = train.index.unique('sta').size
    ngtime = train.index.unique('gtime').size
    nltime = train.index.unique('ltime').size
    train_index = train.index
    train_columns=train.columns
    ncolumns=train_columns.size
    train_np = train.to_numpy().reshape(nsta, ngtime, nltime, ncolumns)
    test_np = test.to_numpy().reshape(nsta, 1, nltime, ncolumns)
    distance = pd.DataFrame(
        (train_np - test_np).reshape(-1, ncolumns), 
        index=train_index,
        columns=train_columns
    )
    return distance.to_xarray()
    


def calc_weight_delta(train:pd.DataFrame, test:pd.DataFrame, vweight=None):
    train_test_delta = calc_train_test_delta(train, test)
    
    if vweight is None:
        vweight = pd.DataFrame(np.ones((1,ncolumns)), 
                               columns=train_columns,
                               index=['_'.join(['1'] * ncolumns)])
        vweight.index.name='vweight'
    
    if isinstance(vweight, pd.DataFrame):
        vweight = vweight.to_xarray()

    distance_weight = train_test_delta * vweight
    
    return distance_weight

In [87]:
aa = calc_weight_delta(train, test)

In [92]:
aa.to_dataframe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,pm25,rh2,t2,u10,v10
gtime,ltime,sta,vweight,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2016-01-01 20:00:00,0,1008A,1_1_1_1_1,129.4,51.1,0.1,-1.1,0.6
2016-01-01 20:00:00,1,1008A,1_1_1_1_1,146.6,29.3,4.8,1.7,0.1
2016-01-01 20:00:00,2,1008A,1_1_1_1_1,153.4,22.1,5.6,-0.8,-0.5
2016-01-01 20:00:00,3,1008A,1_1_1_1_1,87.3,11.2,6.5,-1.6,1.2
2016-01-01 20:00:00,4,1008A,1_1_1_1_1,19.4,3.4,7.8,-2.0,-0.3
...,...,...,...,...,...,...,...,...
2018-12-31 20:00:00,92,1008A,1_1_1_1_1,-16.2,14.0,0.2,-0.2,-2.5
2018-12-31 20:00:00,93,1008A,1_1_1_1_1,-14.9,9.1,0.8,0.6,-1.5
2018-12-31 20:00:00,94,1008A,1_1_1_1_1,-18.9,9.1,0.8,0.8,-1.6
2018-12-31 20:00:00,95,1008A,1_1_1_1_1,-9.8,11.1,0.5,0.6,-1.7


In [27]:
windex=vweight.index

In [42]:
a,b = np.meshgrid(tindex.values, windex)

In [43]:
np.column_stack((a.ravel(), b.ravel())).shape

(212042, 2)

In [60]:
np.recarray(tindex.values, formats=['str', 'float64', 'int'])

ValueError: maximum supported dimension for an ndarray is 32, found 106021

In [61]:
vweight.to_xarray()

In [64]:
end = train.to_xarray() * vweight.to_xarray()
end

In [72]:
(end.rolling(dim={'ltime':3}, min_periods =2) ** 2).mean()

TypeError: unsupported operand type(s) for ** or pow(): 'DatasetRolling' and 'int'

In [75]:
end ** 2

# 时间权重