In [None]:
import numpy as np
from scipy.linalg import eigh
from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.preprocessing import StandardScaler

In [None]:
class TCA:
    def __init__(self, kernel='linear', n_components=2, mu=1.0, gamma=1.0):
        self.kernel = kernel
        self.n_components = n_components
        self.mu = mu
        self.gamma = gamma

    def fit(self, X_source, X_target):
        # 标准化数据
        scaler = StandardScaler()
        X_source = scaler.fit_transform(X_source)
        X_target = scaler.transform(X_target)

        # 计算源域和目标域的核矩阵
        K_source = pairwise_kernels(X_source, metric=self.kernel)
        K_target = pairwise_kernels(X_target, metric=self.kernel)

        # 计算中心矩阵
        J = np.eye(K_source.shape[0]) - np.ones(K_source.shape) / K_source.shape[0]
        J_prime = np.eye(K_target.shape[0]) - np.ones(K_target.shape) / K_target.shape[0]

        # 计算TCA的最优特征变换
        M = np.dot(np.dot(K_source.T, J), K_source) + self.mu * K_target + self.gamma * np.dot(np.dot(K_target.T, J_prime), K_target)
        _, V = eigh(M, eigvals=(M.shape[0] - self.n_components, M.shape[0] - 1))

        # 设置特征变换矩阵
        self.components_ = V.T

    def transform(self, X):
        # 标准化数据
        X = StandardScaler().fit_transform(X)

        # 计算核矩阵
        K = pairwise_kernels(X, metric=self.kernel)

        # 使用TCA的特征变换将数据映射到共享的特征空间
        X_transformed = np.dot(K, self.components_.T)

        return X_transformed

In [None]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
#from mpl_toolkits.mplot3d import Axes3D

In [None]:
# 生成源域数据和目标域数据
X_source, y_source = make_classification(n_samples=100, n_features=20, n_informative=10, n_classes=2, random_state=1)
X_target, y_target = make_classification(n_samples=100, n_features=20, n_informative=10, n_classes=2, random_state=2)

In [None]:
# 划分训练集和测试集
X_source_train, X_source_test, y_source_train, y_source_test = train_test_split(X_source, y_source, test_size=0.2, random_state=1)
X_target_train, X_target_test, y_target_train, y_target_test = train_test_split(X_target, y_target, test_size=0.2, random_state=2)

# 特征预处理
scaler = StandardScaler()
X_source_train = scaler.fit_transform(X_source_train)
X_source_test = scaler.transform(X_source_test)
X_target_train = scaler.fit_transform(X_target_train)
X_target_test = scaler.transform(X_target_test)

In [None]:
X_source_train

In [None]:
# 使用PCA进行可视化（仅用于二维或三维特征空间）
pca = PCA(n_components=2)
X_source_pca = pca.fit_transform(X_source_train)
X_target_pca = pca.transform(X_target_train)


In [None]:
# 可视化源域数据和目标域数据
plt.scatter(X_source_pca[:, 0], X_source_pca[:, 1], c=y_source_train, marker='o', label='Source Domain')
plt.scatter(X_target_pca[:, 0], X_target_pca[:, 1], c=y_target_train, marker='x', label='Target Domain')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend()
plt.show()

In [None]:
# 使用TCA进行特征转换
tca = TCA(kernel='linear', n_components=2, mu=1.0, gamma=1.0)
tca.fit(X_source_train, X_target_train)


In [None]:
X_target_train

In [None]:
X_source_tca = tca.transform(X_source_train)
X_target_tca = tca.transform(X_target_train)

In [None]:

# 可视化TCA转换后的源域数据和目标域数据
plt.scatter(X_source_tca[:, 0], X_source_tca[:, 1], c=y_source_train, marker='o', label='Source Domain (TCA)')
plt.scatter(X_target_tca[:, 0], X_target_tca[:, 1], c=y_target_train, marker='x', label='Target Domain (TCA)')
plt.xlabel('Component 1')
plt.ylabel('Component 2')
plt.legend()
plt.show()

# 在转换后的特征空间上训练分类器
svm = SVC()
svm.fit(X_source_tca, y_source_train)


In [None]:
X_source_tca.shape

In [None]:
X_source_train.shape

In [None]:
X_source_test.shape

In [None]:

# 在转换后的特征空间上进行预测
X_source_test_tca = tca.transform(X_source_test)
X_target_test_tca = tca.transform(X_target_test)
y_source_pred = svm.predict(X_source_test)
y_target_pred = svm.predict(X_target_test)

# 计算准确率
accuracy_source = accuracy_score(y_source_test, y_source_pred)
accuracy_target = accuracy_score(y_target_test, y_target_pred)
print("Accuracy on source domain: {:.2f}".format(accuracy_source))
print("Accuracy on target domain: {:.2f}".format(accuracy_target))