In [None]:
from IPython.display import clear_output
!pip install paddle-quantum
clear_output()

In [None]:
import time
import matplotlib
import pandas as pd
import numpy as np
import seaborn as sns
import paddle
import astropy
from numpy import pi as PI
from matplotlib import pyplot as plt

from paddle import matmul, transpose
from paddle_quantum.circuit import UAnsatz

import sklearn
from sklearn import svm
from sklearn.datasets import fetch_openml, make_moons, make_circles
from sklearn.model_selection import train_test_split

from IPython.display import clear_output
from tqdm import tqdm

import time
import matplotlib
from numpy import pi as PI

import multiprocessing
import warnings
from itertools import chain
sns.set_style('whitegrid')
warnings.simplefilter('ignore', FutureWarning)
warnings.simplefilter('ignore', RuntimeWarning)
from cesium.time_series import TimeSeries
import cesium.featurize as featurize
from gatspy.periodic import LombScargleMultiband, LombScargleMultibandFast
import pdb

# Data visualization

In [None]:
from astropy.table import Table

filename = '../input/training_set.csv'
data = Table.read(filename, format='csv')
nobjects = len(data)
data

In [None]:
train_series = pd.read_csv('../input/training_set.csv')
train_metadata = pd.read_csv('../input/training_set_metadata.csv')


In [None]:
def view_target(n):
    obj_id = np.random.choice(train_metadata.object_id[train_metadata.target == n].values)
    obj_df = train_series[train_series.object_id == obj_id]
    fig, axes = plt.subplots(6,1, figsize=(10, 10))
    axes[0].set_title(f'Class_{n}')
    for i, ax in enumerate(axes):
        ax.scatter(obj_df.mjd[obj_df.passband == i].values, obj_df.flux[obj_df.passband == i].values, alpha=0.5)    
        ax.scatter(obj_df.mjd[obj_df.passband == i].values, obj_df.flux_err[obj_df.passband == i].values, alpha=0.5)
    return obj_df

In [None]:
view_target(42)
view_target(90)

In [None]:
groups = train_series.groupby(['object_id', 'passband'])
times = groups.apply(
    lambda block: block['mjd'].values).reset_index().rename(columns={0: 'seq'})
flux = groups.apply(
    lambda block: block['flux'].values
).reset_index().rename(columns={0: 'seq'})
err = groups.apply(
    lambda block: block['flux_err'].values
).reset_index().rename(columns={0: 'seq'})
det = groups.apply(
    lambda block: block['detected'].astype(bool).values
).reset_index().rename(columns={0: 'seq'})
times_list = times.groupby('object_id').apply(lambda x: x['seq'].tolist()).tolist()
flux_list = flux.groupby('object_id').apply(lambda x: x['seq'].tolist()).tolist()
err_list = err.groupby('object_id').apply(lambda x: x['seq'].tolist()).tolist()
det_list = det.groupby('object_id').apply(lambda x: x['seq'].tolist()).tolist()

We cannot make much sense of the data with the large observation gap, unsynchronised passband observations and sparsity of data. But We know that some objects have periodic bebaviours, so we can attempt to fold them by period. Here we will examine which classes are more likely to be periodic, and how they typically look like.

In [None]:
def fit_multiband_freq(tup):
    idx, group = tup
    t, f, e, b = group['mjd'], group['flux'], group['flux_err'], group['passband']
    model = LombScargleMultiband(fit_period=True)
    model.optimizer.period_range = (0.1, int((group['mjd'].max() - group['mjd'].min()) / 2))
    model.fit(t, f, e, b)
    return model

In [None]:
def get_freq_features(N, subsetting_pos=None):
    if subsetting_pos is None:
        subset_times_list = times_list
        subset_flux_list = flux_list
    else:
        subset_times_list = [v for i, v in enumerate(times_list) 
                             if i in set(subsetting_pos)]
        subset_flux_list = [v for i, v in enumerate(flux_list) 
                            if i in set(subsetting_pos)]
    feats = featurize.featurize_time_series(times=subset_times_list[:N],
                                            values=subset_flux_list[:N],
                                            features_to_use=['skew',
                                                            'percent_beyond_1_std',
                                                            'percent_difference_flux_percentile'
                                                            ],
                                            scheduler=None)
    subset = train_series[train_series['object_id'].isin(
        train_metadata['object_id'].iloc[subsetting_pos].iloc[:N])]
    models = list(map(fit_multiband_freq, subset.groupby('object_id')))
    feats['object_pos'] = subsetting_pos[:N]
    feats['freq1_freq'] = [model.best_period for model in models]
    return feats, models

In [None]:
unique_classes = train_metadata['target'].unique()
unique_classes

In [None]:
def get_class_feats(label, N=10):
    class_pos = train_metadata[train_metadata['target'] == label].index
    class_feats, class_models = get_freq_features(N, class_pos)
    return class_feats, class_models

In [None]:
def plot_phase_curves(feats, models, use_median_freq=False, hide_undetected=True, N=10):
    for i in range(N):
        freq = feats.loc[i, 'freq1_freq'].median()
        freq_min = feats.loc[i, 'freq1_freq'].min()
        freq_std = feats.loc[i, 'freq1_freq'].std()
        skew = feats.loc[i, 'skew'].mean()
        object_pos = int(feats.loc[i, 'object_pos'][0])
        f, ax = plt.subplots(1, 2, figsize=(14, 4))
        sample = train_series[train_series['object_id'] ==
                              train_metadata['object_id'].iloc[object_pos]].copy()
        colors = ['red', 'orange', 'yellow', 'green', 'blue', 'purple']
        score = models[i].score(models[i].best_period)
        
        ax[0].scatter(x=sample['mjd'], 
                   y=sample['flux'], 
                   c=[colors[b] for b in sample['passband']],
                   s=8, alpha=0.8)
        ax[0].vlines(sample['mjd'], 
                  sample['flux'] - sample['flux_err'],
                  sample['flux'] + sample['flux_err'],
                  colors=[colors[b] for b in sample['passband']],
                  linewidth=1, alpha=0.8)
        
        sample['phase'] = (sample['mjd'] / models[i].best_period) % 1
        ax[1].scatter(x=sample['phase'], 
                   y=sample['flux'], 
                   c=[colors[b] for b in sample['passband']],
                   s=8, alpha=0.8)
        ax[1].vlines(sample['phase'], 
                  sample['flux'] - sample['flux_err'],
                  sample['flux'] + sample['flux_err'],
                  colors=[colors[b] for b in sample['passband']],
                  linewidth=1, alpha=0.8)
        x_range = np.linspace(sample['mjd'].min(), sample['mjd'].max(), 1000)
        for band in range(6):
            y = models[i].predict(x_range, band)
            xs = (x_range / models[i].best_period) % 1
            ords = np.argsort(xs)
            ax[1].plot(xs[ords], y[ords], c=colors[band], alpha=0.4)
        
        title = ax[0].get_title()
        ax[0].set_title('time')
        ax[1].set_title('phase')
        f.suptitle(title + f'object: {sample["object_id"].iloc[0]}, '
                   f'class: {train_metadata["target"].iloc[object_pos]}\n'
                   f'period: {models[i].best_period: .4}, '
                   f'period score: {score: .4}, '
                   f'mean skew: {skew:.4}', y=1.1)
        plt.show()

In [None]:
warnings.simplefilter('ignore', UserWarning)

**Class 42**

In [None]:
%%capture capt
feats, models = get_class_feats(42)

In [None]:
plot_phase_curves(feats, models)

Class 42 appears to be mostly flat. It is extragalactical, so high uncertainty happens in some cases. It most likely is a class of objects with "burst" events, although the actual burst is not always detected in the observation windows. When the burst happens, the object's magnitude increases dramatically across all bands and gradually falls back to normal levels in a few months. If the burst is indeed detected, it will be characterised by relatively low frequency std between bands together with very high detected periods. Other features like skewness can also be used to identify this class of objects, as a burst usually results in high skew in the light curve.

**Class 90**

In [None]:
%%capture capt
feats, models = get_class_feats(90)

In [None]:
plot_phase_curves(feats, models)

Class 90 is kind of similar to class 42 in that it is very likely a group of objects with "burst" behaviours, or objects with partially unobsered bursts. Like class 42, it is characterised by sudden peaks that usually last for a few months, high skew and often long fitted periods (or 1-day periods) with low score. So far we are still unable to tell the difference between class 90 and class 42.

choose class 42 and 90

# Data Preprocessing

# Transform into a binary classification

In [None]:
from sklearn import metrics
from sklearn import preprocessing
from sklearn.preprocessing import MultiLabelBinarizer
import numpy as np
import pandas as pd
train_series = pd.read_csv('../input/training_set.csv')
train_metadata = pd.read_csv('../input/training_set_metadata.csv')
#test_data = pd.read_csv('../input/test_set_batch1.csv')
obj_id_1 = np.random.RandomState(500).choice(train_metadata.object_id[train_metadata.target == 42].values)
obj_df_1 = train_series[train_series.object_id == obj_id_1]
obj_df_1_train = obj_df_1[:100]
x_test_1 = obj_df_1[101:121]
obj_id_2 = np.random.RandomState(500).choice(train_metadata.object_id[train_metadata.target == 90].values)
obj_df_2 = train_series[train_series.object_id == obj_id_2]
obj_df_2_train = obj_df_2[:100]
#obj_id_1
#obj_df_1
x_test_2 = obj_df_2[101:121]
#m = np.array(obj_df_1)
#m
x_test_1[:5]

In [None]:
obj_df_2[:5]

mb_1 = MultiLabelBinarizer()
x_train_1 = mb_1.fit_transform(obj_df_1)
x_test_1 = mb_1.fit_transform(test_id_1)

mb_2 = MultiLabelBinarizer()
x_train_2 = mb_2.fit_transform(obj_df_2)
x_test_2 = mb_1.fit_transform(test_id_2)

# **Time Series Transformations**


transform each time series into the same set of derived quantities. 
These include: 
1. the number of measurements; 
1. the minimum, maximum, mean, median, standard deviation, and skew of flux;
1. the minimum, maximum, mean, median, standard deviation, and skew of flux error; 
1. the sum of the ratio between flux and flux error;
1. the skew of the ratio between flux and flux error;
1. the sum of the flux times squared flux ratio;
1. the skew of the flux times squared flux ratio;
1. the mean time between measurements; 
1. the maximum time between measurements;
1. spectroscopic redshifts for the host galaxy; 
1. photometric redshifts for the host galaxy;
1. the position of each object in the sky; 
1. the first two Fourier coefficients for each band, 
1. as well as kurtosis and skewness. 

In [None]:
def agg_func(x):
    d = {}
    flux, dflux = x["flux"], x["flux_err"]
    flux_mean = np.sum(flux*np.square(flux/dflux))/np.sum(np.square(flux/dflux))
    d["flux_mean"] = flux_mean
    d["flux_std"] = np.std(flux/flux_mean, ddof = 1)
    d["flux_amp"] = (np.max(flux) - np.min(flux))/flux_mean
    d["flux_beyond"] = np.sum(np.abs(flux - flux_mean) > np.std(flux, ddof = 1))/flux.shape[0]
    d["flux_mad"] = np.median(np.abs((flux - np.median(flux))/flux_mean))
    d["flux_skew"] = skew(flux)
    colnames = ["flux_mean", "flux_std", "flux_amp", "flux_mad", "flux_beyond", "flux_skew"]
    return pd.Series(d, index = colnames)

In [None]:
def _finalize(self):
    '''Store individual passband fluxes as object attributes'''
    # in this example, we'll use the weighted mean to normalize the features
    weighted_mean = lambda flux, dflux: np.sum(flux*(flux/dflux)**2)/np.sum((flux/dflux)**2)

    # define some functions to compute simple descriptive statistics
    normalized_flux_std = lambda flux, wMeanFlux: np.std(flux/wMeanFlux, ddof = 1)
    normalized_amplitude = lambda flux, wMeanFlux: (np.max(flux) - np.min(flux))/wMeanFlux
    normalized_MAD = lambda flux, wMeanFlux: np.median(np.abs((flux - np.median(flux))/wMeanFlux))
    beyond_1std = lambda flux, wMeanFlux: sum(np.abs(flux - wMeanFlux) > np.std(flux, ddof = 1))/len(flux)

    for pb in self._passbands:
        ind = self.DFlc['passband'] == pb
        pbname = self._pbnames[pb]

        if len(self.DFlc[ind]) == 0:
            setattr(self, f'{pbname}Std', np.nan)
            setattr(self, f'{pbname}Amp', np.nan)
            setattr(self, f'{pbname}MAD', np.nan)
            setattr(self, f'{pbname}Beyond', np.nan)
            setattr(self, f'{pbname}Skew', np.nan)
            continue

        f  = self.DFlc['flux'][ind]
        df = self.DFlc['flux_err'][ind]
        m  = weighted_mean(f, df)

        # we'll save the measurements in each passband to simplify access.
        setattr(self, f'{pbname}Flux', f)
        setattr(self, f'{pbname}FluxUnc', df)
        setattr(self, f'{pbname}Mean', m)

        # compute the features
        std = normalized_flux_std(f, df)
        amp = normalized_amplitude(f, m)
        mad = normalized_MAD(f, m)
        beyond = beyond_1std(f, m)
        skew = spstat.skew(f) 

        # and save the features
        setattr(self, f'{pbname}Std', std)
        setattr(self, f'{pbname}Amp', amp)
        setattr(self, f'{pbname}MAD', mad)
        setattr(self, f'{pbname}Beyond', beyond)
        setattr(self, f'{pbname}Skew', skew)

In [None]:
def get_features(self):
    '''Return all the features for this object'''
    variables = ['Std', 'Amp', 'MAD', 'Beyond', 'Skew']
    feats = []
    for i, pb in enumerate(self._passbands):
        pbname = self._pbnames[pb]
        feats += [getattr(self, f'{pbname}{x}', np.nan) for x in variables]
    return feats

In [None]:
def add_features_to_agg(df):
    # CPMP using the following feature was really silliy :)
    # df['mjd_diff'] = df['mjd_max'] - df['mjd_min']
    # see https://www.kaggle.com/c/PLAsTiCC-2018/discussion/69696
    
    # The others may be useful
    df['flux_diff'] = df['flux_max'] - df['flux_min']
    df['flux_dif2'] = (df['flux_max'] - df['flux_min']) / df['flux_mean']
    df['flux_w_mean'] = df['flux_by_flux_ratio_sq_sum'] / df['flux_ratio_sq_sum']
    df['flux_dif3'] = (df['flux_max'] - df['flux_min']) / df['flux_w_mean']

    # del df['mjd_max'], df['mjd_min']

    return df

**Logscale transformation**

from sklearn.preprocessing import FunctionTransformer

transformer = FunctionTransformer(np.log10, validate=True)

**MinMaxScaler Normalization/scaling and outliers**

In [None]:
from sklearn.preprocessing import MinMaxScaler
transformer = FunctionTransformer(np.log10)
Logscale_data = transformer.transform(obj_df_1)
Logscale_data = np.nan_to_num(Logscale_data) 
print(Logscale_data[:5])
""""""
# X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
# X_scaled = X_std * (max - min) + min
""""""
scaler = MinMaxScaler(feature_range=(-(np.pi/2),(np.pi/2)))

# transform data

Normal_data = scaler.fit_transform(Logscale_data)       
Normal_data = np.array(Normal_data)
print(Normal_data[:5])

In [None]:
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import MinMaxScaler
def data_preprocessing(origin_data):
    transformer = FunctionTransformer(np.log10)
    Logscale_data = transformer.transform(origin_data)
    Logscale_data = np.nan_to_num(Logscale_data) 
    """"""
    # X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
    # X_scaled = X_std * (max - min) + min
    """"""
    scaler = MinMaxScaler(feature_range=(-(np.pi/2),(np.pi/2)))

    # transform data

    Normal_data = scaler.fit_transform(Logscale_data)                                          
    return Normal_data

# Quantum Circuit test

**iswapgate^0.5 Test**

In [None]:
#iswapgate^0.5
n = 2
# 初始化电路
circuit_t = UAnsatz(n)
theta1 = paddle.to_tensor(np.array([np.pi/2], np.float64))
theta2 = paddle.to_tensor(np.array([(7*np.pi)/4], np.float64))
circuit_t.sdg(0)
circuit_t.h(0)
circuit_t.sdg(0)
circuit_t.rz(theta1[0], 0)
circuit_t.cnot([0, 1])
circuit_t.sdg(0)
circuit_t.h(0)
circuit_t.sdg(0)
circuit_t.sdg(1)
circuit_t.h(1)
circuit_t.sdg(1)
circuit_t.rz(theta2[0], 0)
circuit_t.rz(theta2[0], 1)
circuit_t.sdg(0)
circuit_t.h(0)
circuit_t.sdg(0)
circuit_t.rz(theta1[0], 0)
circuit_t.cnot([0, 1])
circuit_t.sdg(0)
circuit_t.h(0)
circuit_t.sdg(0)
circuit_t.sdg(1)
circuit_t.h(1)
circuit_t.sdg(1)
print(circuit_t)

**Circuit Encode Test**

In [None]:
# 量子比特的数量
n = 2
# 初始化电路
circuit = UAnsatz(n)
# x 是经典信息
x1 = paddle.to_tensor([-1.45, 3, 2, -0.05], 'float64')
x2 = paddle.to_tensor([-1.45, 3, 2, -0.05], 'float64')
x3 = paddle.to_tensor([-1.45, 3, 2, -0.05], 'float64')

for i in range(n):
    # 加上一层 Hadamard 门
    circuit.superposition_layer()
    # 加上一层旋转门 Rz
    for j in range(n):
        circuit.rz(x1[j] ,j)
    # 加上一层旋转门 Ry
        circuit.ry(x2[j] ,j)
    # 加上一层旋转门 Rz
        circuit.rz(x3[j] ,j)
print(circuit)

# 导入训练集和测试集

In [None]:
X_train = data_preprocessing(np.array(obj_df_1_train))
X_train = np.reshape(X_train, 600)
y_train = data_preprocessing(np.array(obj_df_2_train))
y_train = np.reshape(y_train, 600)
X_test = data_preprocessing(np.array(x_test_1))
X_test = np.reshape(X_test, 120)
y_test = data_preprocessing(np.array(x_test_2))
y_test = np.reshape(y_test, 120)
#X_train
print(X_train[:20])
print(y_train[:5])

In [None]:
# 初始化进度条
bar_format_string = '{l_bar}{bar}|[{elapsed}<{remaining}, ' '{rate_fmt}{postfix}]'
pbar = tqdm(total=100, bar_format=bar_format_string)
pbar.close()
clear_output()

**Quantum Circuit**

In [None]:
#x = paddle.to_tensor([0,0,0,0,0,0], 'float64')
#X_train = np.array(X_train)
#y_train = np.array(y_train)
x1 = paddle.to_tensor(np.reshape(X_train, 600), 'float64')
x2 = paddle.to_tensor(np.reshape(y_train, 600), 'float64')
theta = np.array([np.pi], np.float64)
theta = paddle.to_tensor(theta)
# 量子比特的数量等于经典信息的长度
n = 6
r = 20
theta1 = paddle.to_tensor(np.array([np.pi/2], np.float64))
theta2 = paddle.to_tensor(np.array([(7*np.pi)/4], np.float64))
# 初始化电路
circuit = UAnsatz(n)
#x = paddle.to_tensor([lambda x : x, for x in range(n)], 'float64')
for q in range(r):
    # 加上一层 Hadamard 门
    circuit.superposition_layer()
    for i in range(n):
        for j in range(n):
        # 加上一层旋转门 Rz
            circuit.rz(x1[j] ,j)
        # 加上一层旋转门 Ry
            circuit.ry(x1[j] ,j)
        # 加上一层旋转门 Rz
            circuit.rz(x1[j] ,j)
        """"""
        #iswap^0.5
        for j in range(n-1):
            circuit.sdg(j)
            circuit.h(j)
            circuit.sdg(j)
            circuit.rz(theta1[0], j)
            circuit.cnot([j, j+1])
            circuit.sdg(j)
            circuit.h(j)
            circuit.sdg(j)
            circuit.sdg(j+1)
            circuit.h(j+1)
            circuit.sdg(j+1)
            circuit.rz(theta2[0], j)
            circuit.rz(theta2[0], j+1)
            circuit.sdg(j)
            circuit.h(j)
            circuit.sdg(j)
            circuit.rz(theta1[0], j)
            circuit.cnot([j, j+1])
            circuit.sdg(j)
            circuit.h(j)
            circuit.sdg(j)
            circuit.sdg(j+1)
            circuit.h(j+1)
            circuit.sdg(j+1)

        """"""
        for j in range(n):
            circuit.ry(x1[j] ,j)
            circuit.rz(x1[j] ,j)
            circuit.h(j)
    #invert
    for i in range(n):
        for j in range(n):
            circuit.h(j)
            circuit.rz(x2[j] ,j)
            circuit.ry(x2[j] ,j)
        #iswap^0.5
        for j in range(n-1):
            circuit.sdg(j)
            circuit.h(j)
            circuit.sdg(j)
            circuit.rz(theta1[0], j)
            circuit.cnot([j, j+1])
            circuit.sdg(j)
            circuit.h(j)
            circuit.sdg(j)
            circuit.sdg(j+1)
            circuit.h(j+1)
            circuit.sdg(j+1)
            circuit.rz(theta2[0], j)
            circuit.rz(theta2[0], j+1)
            circuit.sdg(j)
            circuit.h(j)
            circuit.sdg(j)
            circuit.rz(theta1[0], j)
            circuit.cnot([j, j+1])
            circuit.sdg(j)
            circuit.h(j)
            circuit.sdg(j)
            circuit.sdg(j+1)
            circuit.h(j+1)
            circuit.sdg(j+1)
        """"""
        # 加上一层旋转门 Rz
        for j in range(n):
        # 加上一层旋转门 Rz
            circuit.rz(x2[j] ,j)
        # 加上一层旋转门 Ry
            circuit.ry(x2[j] ,j)
        # 加上一层旋转门 Rz
            circuit.rz(x2[j] ,j)
            circuit.h(j)
        """"""
#print(circuit)

In [None]:
fin_state = circuit.run_state_vector()
print([np.round(i, 5) for i in fin_state.numpy()])

In [None]:
# 返回测量结果为 0...0 的概率
pro = (fin_state[0].conj() * fin_state[0]).real().numpy()[0]
pro

In [None]:
def q_kernel_estimator(x1, x2):
    #x1 = paddle.to_tensor(np.reshape(X_train, 600), 'float64')
    #x2 = paddle.to_tensor(np.reshape(y_train, 600), 'float64')
    theta = np.array([np.pi], np.float64)
    theta = paddle.to_tensor(theta)
    # 量子比特的数量等于经典信息的长度
    n = 6
    r = 20
    theta1 = paddle.to_tensor(np.array([np.pi/2], np.float64))
    theta2 = paddle.to_tensor(np.array([(7*np.pi)/4], np.float64))
    # 初始化电路
    circuit = UAnsatz(n)
    #x = paddle.to_tensor([lambda x : x, for x in range(n)], 'float64')
    for q in range(r):
        # 加上一层 Hadamard 门
        circuit.superposition_layer()
        for i in range(n):
            for j in range(n):
            # 加上一层旋转门 Rz
                circuit.rz(x1[j] ,j)
            # 加上一层旋转门 Ry
                circuit.ry(x1[j] ,j)
            # 加上一层旋转门 Rz
                circuit.rz(x1[j] ,j)
            """"""
            #iswap^0.5
            for j in range(n-1):
                circuit.sdg(j)
                circuit.h(j)
                circuit.sdg(j)
                circuit.rz(theta1[0], j)
                circuit.cnot([j, j+1])
                circuit.sdg(j)
                circuit.h(j)
                circuit.sdg(j)
                circuit.sdg(j+1)
                circuit.h(j+1)
                circuit.sdg(j+1)
                circuit.rz(theta2[0], j)
                circuit.rz(theta2[0], j+1)
                circuit.sdg(j)
                circuit.h(j)
                circuit.sdg(j)
                circuit.rz(theta1[0], j)
                circuit.cnot([j, j+1])
                circuit.sdg(j)
                circuit.h(j)
                circuit.sdg(j)
                circuit.sdg(j+1)
                circuit.h(j+1)
                circuit.sdg(j+1)

            """"""
            for j in range(n):
                circuit.ry(x1[j] ,j)
                circuit.rz(x1[j] ,j)
                circuit.h(j)
        #invert
        for i in range(n):
            for j in range(n):
                circuit.h(j)
                circuit.rz(x2[j] ,j)
                circuit.ry(x2[j] ,j)
            #iswap^0.5
            for j in range(n-1):
                circuit.sdg(j)
                circuit.h(j)
                circuit.sdg(j)
                circuit.rz(theta1[0], j)
                circuit.cnot([j, j+1])
                circuit.sdg(j)
                circuit.h(j)
                circuit.sdg(j)
                circuit.sdg(j+1)
                circuit.h(j+1)
                circuit.sdg(j+1)
                circuit.rz(theta2[0], j)
                circuit.rz(theta2[0], j+1)
                circuit.sdg(j)
                circuit.h(j)
                circuit.sdg(j)
                circuit.rz(theta1[0], j)
                circuit.cnot([j, j+1])
                circuit.sdg(j)
                circuit.h(j)
                circuit.sdg(j)
                circuit.sdg(j+1)
                circuit.h(j+1)
                circuit.sdg(j+1)
            """"""
            # 加上一层旋转门 Rz
            for j in range(n):
            # 加上一层旋转门 Rz
                circuit.rz(x2[j] ,j)
            # 加上一层旋转门 Ry
                circuit.ry(x2[j] ,j)
            # 加上一层旋转门 Rz
                circuit.rz(x2[j] ,j)
                circuit.h(j)
            """"""
    # 用态矢量模式运行电路
    fin_state = circuit.run_state_vector()
    # 更新进度条
    global pbar
    global N
    pbar.update(100/N)
    # 返回测量结果为 0...0 的概率
    return (fin_state[0].conj() * fin_state[0]).real().numpy()[0]

In [None]:
# 创建进度条，并设置所需要的量子核函数计算数量 N
def q_kernel_matrix(X1, X2):
    return np.array([[q_kernel_estimator(x1, x2) for x2 in X2] for x1 in X1])
pbar = tqdm(total=100, 
            desc='训练 QKE-SVM 并分类中', 
            bar_format=bar_format_string)
N = len(X_train) ** 2 + len(X_train) ** 2 + len(X_train) * len(X_test)

# 创建一个具有量子核函数的支持向量机
svm_qke = svm.SVC(kernel=q_kernel_matrix)

# 根据训练数据计算支持向量机的决策平面
svm_qke.fit(X_train, y_train)

# 计算支持向量机分别对于训练数据和测试数据的分类预测值
predict_svm_qke_train = svm_qke.predict(X_train)
predict_svm_qke_test = svm_qke.predict(X_test)

# 计算准确率
accuracy_train = np.array(predict_svm_qke_train == y_train, dtype=int).sum()/len(y_train)
accuracy_test = np.array(predict_svm_qke_test == y_test, dtype=int).sum()/len(y_test)

Transformed

In [None]:
from sklearn import metrics
from sklearn import preprocessing
from sklearn.preprocessing import MultiLabelBinarizer
import numpy as np
import pandas as pd
train_series = pd.read_csv('../input/training_set.csv')
train_metadata = pd.read_csv('../input/training_set_metadata.csv')
obj_id_1 = np.random.RandomState(500).choice(train_metadata.object_id[train_metadata.target == 42].values)
obj_df_1 = train_series[train_series.object_id == obj_id_1]
obj_df_1_train = obj_df_1[:100]
x_test_1 = obj_df_1[101:121]
obj_id_2 = np.random.RandomState(500).choice(train_metadata.object_id[train_metadata.target == 90].values)
obj_df_2 = train_series[train_series.object_id == obj_id_2]
obj_df_2_train = obj_df_2[:100]
#obj_id_1
#obj_df_1
x_test_2 = obj_df_2[101:121]
#m = np.array(obj_df_1)
#m
x_test_1[:5]

In [None]:
train_series = pd.read_csv('../input/training_set.csv')
train_metadata = pd.read_csv('../input/training_set_metadata.csv')
obj_id_1 = np.random.RandomState(500).choice(train_metadata.object_id[train_metadata.target == 42].values)
obj_df_1 = train_series[train_series.object_id == obj_id_1]
obj_df_1_train = obj_df_1[:100]
x_test_1 = obj_df_1[101:121]
obj_id_2 = np.random.RandomState(500).choice(train_metadata.object_id[train_metadata.target == 90].values)
obj_df_2 = train_series[train_series.object_id == obj_id_2]
obj_df_2_train = obj_df_2[:100]
#obj_id_1
#obj_df_1
x_test_2 = obj_df_2[101:121]
#m = np.array(obj_df_1)
#m
x_test_1[:5]

In [None]:
obj_df_1_train['flux_ratio_sq'] = np.power(obj_df_1_train['flux'] / obj_df_1_train['flux_err'], 2.0)
obj_df_1_train['flux_by_flux_ratio_sq'] = obj_df_1_train['flux'] * obj_df_1_train['flux_ratio_sq']

In [None]:
data_features = obj_df_1_train.columns[1:]

In [None]:
groupObjects = obj_df_1_train.groupby('object_id')[data_features]

#print("Add constant object features")
features = train_metadata.drop(['target'], axis=1)

print("Add sum of mutable object features")
features = pd.merge(features, groupObjects.agg('sum'), how='right', on='object_id', suffixes=['', '_sum'])

print("Add mean of mutable object features")
features = pd.merge(features, groupObjects.agg('mean'), how='right', on='object_id', suffixes=['', '_mean'])

print("Add median of mutable features")
features = pd.merge(features, groupObjects.agg('median'), how='right', on='object_id', suffixes=['', '_median'])

print("Add minimum of mutable features")
features = pd.merge(features, groupObjects.agg('min'), how='right', on='object_id', suffixes=['', '_min'])

print("Add maximum of mutable features")
features = pd.merge(features, groupObjects.agg('max'), how='right', on='object_id', suffixes=['', '_max'])

print("Add range of mutable features")
features = pd.merge(features, groupObjects.agg(lambda x: max(x) - min(x)), how='right', on='object_id', suffixes=['', '_range'])

print("Add standard deviation of mutable features")
features = pd.merge(features, groupObjects.agg('std'), how='right', on='object_id', suffixes=['', '_stddev'])

print("Add skew of mutable features")
features = pd.merge(features, groupObjects.agg('skew'), how='right', on='object_id', suffixes=['', '_skew'])

In [None]:
features = features.fillna(features.mean())

In [None]:
features

In [None]:
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import MinMaxScaler
def data_preprocessing(origin_data):
    transformer = FunctionTransformer(np.log10)
    Logscale_data = transformer.transform(origin_data)
    Logscale_data = np.nan_to_num(Logscale_data) 
    """"""
    # X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
    # X_scaled = X_std * (max - min) + min
    """"""
    scaler = MinMaxScaler(feature_range=(-(np.pi/2),(np.pi/2)))

    # transform data

    Normal_data = scaler.fit_transform(Logscale_data)                                          
    return Normal_data

In [None]:
X_train = data_preprocessing(np.array(features))
X_train

In [None]:
X_train = data_preprocessing(np.array(obj_df_1_train))
X_train = np.reshape(X_train, 600)
y_train = data_preprocessing(np.array(obj_df_2_train))
y_train = np.reshape(y_train, 600)
X_test = data_preprocessing(np.array(x_test_1))
X_test = np.reshape(X_test, 120)
y_test = data_preprocessing(np.array(x_test_2))
y_test = np.reshape(y_test, 120)
#X_train
print(X_train[:20])
print(y_train[:5])

In [None]:
def q_kernel_estimator(x1, x2):
    #x1 = paddle.to_tensor(np.reshape(X_train, 600), 'float64')
    #x2 = paddle.to_tensor(np.reshape(y_train, 600), 'float64')
    theta = np.array([np.pi], np.float64)
    theta = paddle.to_tensor(theta)
    # 量子比特的数量等于经典信息的长度
    n = 6
    r = 20
    theta1 = paddle.to_tensor(np.array([np.pi/2], np.float64))
    theta2 = paddle.to_tensor(np.array([(7*np.pi)/4], np.float64))
    # 初始化电路
    circuit = UAnsatz(n)
    #x = paddle.to_tensor([lambda x : x, for x in range(n)], 'float64')
    for q in range(r):
        # 加上一层 Hadamard 门
        circuit.superposition_layer()
        for i in range(n):
            for j in range(n):
            # 加上一层旋转门 Rz
                circuit.rz(x1[j] ,j)
            # 加上一层旋转门 Ry
                circuit.ry(x1[j] ,j)
            # 加上一层旋转门 Rz
                circuit.rz(x1[j] ,j)
            """"""
            #iswap^0.5
            for j in range(n-1):
                circuit.sdg(j)
                circuit.h(j)
                circuit.sdg(j)
                circuit.rz(theta1[0], j)
                circuit.cnot([j, j+1])
                circuit.sdg(j)
                circuit.h(j)
                circuit.sdg(j)
                circuit.sdg(j+1)
                circuit.h(j+1)
                circuit.sdg(j+1)
                circuit.rz(theta2[0], j)
                circuit.rz(theta2[0], j+1)
                circuit.sdg(j)
                circuit.h(j)
                circuit.sdg(j)
                circuit.rz(theta1[0], j)
                circuit.cnot([j, j+1])
                circuit.sdg(j)
                circuit.h(j)
                circuit.sdg(j)
                circuit.sdg(j+1)
                circuit.h(j+1)
                circuit.sdg(j+1)

            """"""
            for j in range(n):
                circuit.ry(x1[j] ,j)
                circuit.rz(x1[j] ,j)
                circuit.h(j)
        #invert
        for i in range(n):
            for j in range(n):
                circuit.h(j)
                circuit.rz(x2[j] ,j)
                circuit.ry(x2[j] ,j)
            #iswap^0.5
            for j in range(n-1):
                circuit.sdg(j)
                circuit.h(j)
                circuit.sdg(j)
                circuit.rz(theta1[0], j)
                circuit.cnot([j, j+1])
                circuit.sdg(j)
                circuit.h(j)
                circuit.sdg(j)
                circuit.sdg(j+1)
                circuit.h(j+1)
                circuit.sdg(j+1)
                circuit.rz(theta2[0], j)
                circuit.rz(theta2[0], j+1)
                circuit.sdg(j)
                circuit.h(j)
                circuit.sdg(j)
                circuit.rz(theta1[0], j)
                circuit.cnot([j, j+1])
                circuit.sdg(j)
                circuit.h(j)
                circuit.sdg(j)
                circuit.sdg(j+1)
                circuit.h(j+1)
                circuit.sdg(j+1)
            """"""
            # 加上一层旋转门 Rz
            for j in range(n):
            # 加上一层旋转门 Rz
                circuit.rz(x2[j] ,j)
            # 加上一层旋转门 Ry
                circuit.ry(x2[j] ,j)
            # 加上一层旋转门 Rz
                circuit.rz(x2[j] ,j)
                circuit.h(j)
            """"""
    # 用态矢量模式运行电路
    fin_state = circuit.run_state_vector()
    # 更新进度条
    global pbar
    global N
    pbar.update(100/N)
    # 返回测量结果为 0...0 的概率
    return (fin_state[0].conj() * fin_state[0]).real().numpy()[0]

In [None]:
from sklearn.svm import SVR
# 创建进度条，并设置所需要的量子核函数计算数量 N
def q_kernel_matrix(X1, X2):
    return np.array([[q_kernel_estimator(x1, x2) for x2 in X2] for x1 in X1])
pbar = tqdm(total=100, 
            desc='训练 QKE-SVM 并分类中', 
            bar_format=bar_format_string)
N = len(X_train) ** 2 + len(X_train) ** 2 + len(X_train) * len(X_test)

# 创建一个具有量子核函数的支持向量机
svm_qke = svm.SVC(kernel=q_kernel_matrix)

# 根据训练数据计算支持向量机的决策平面
svm_qke.fit(X_train, y_train)

# 计算支持向量机分别对于训练数据和测试数据的分类预测值
predict_svm_qke_train = svm_qke.predict(X_train)
predict_svm_qke_test = svm_qke.predict(X_test)

# 计算准确率
accuracy_train = np.array(predict_svm_qke_train == y_train, dtype=int).sum()/len(y_train)
accuracy_test = np.array(predict_svm_qke_test == y_test, dtype=int).sum()/len(y_test)