In [1]:
import numpy as np

In [2]:
electricity_data = np.loadtxt('electricity.txt', delimiter=',')
electricity_data.shape

(26304, 321)

In [3]:
commodity_data = np.loadtxt('commodity.txt', delimiter=',')
commodity_data.shape

(572, 500)

In [4]:
traffic_data = np.loadtxt('traffic.txt', delimiter=',')
traffic_data.shape

(12672, 228)

In [5]:
W = np.loadtxt('W.csv', delimiter=',')
W.shape

(228, 228)

In [6]:
# 选择数据
origin_data = electricity_data.copy()
# origin_data = commodity_data.copy()
# origin_data = traffic_data.copy()
data = {"electricity_data":electricity_data.copy(),
        "commodity_data":commodity_data.copy(),
        "traffic_data":traffic_data.copy()}


In [7]:
from sklearn.model_selection import train_test_split
for key, origin_data in data.items():
    print(key)
    # 对数据进行归一化
    for i in range(origin_data.shape[1]):
        min_val = origin_data[:,i].min()
        max_val = origin_data[:,i].max()
        dis_val = max_val - min_val
        for j in range(origin_data.shape[0]):
            origin_data[j,i]=(origin_data[j,i] - min_val) / dis_val

    # 数据预处理
    # 定义数据段的长度
    width = 5
    # 1. 求一个时间段数据的均值
    avg_data = np.mean(origin_data, axis=1)
    feature = []
    label = []
    for t in range(0, avg_data.shape[0] - width):
        cur_data = avg_data[t:t+width-1]
        feature.append(np.append(cur_data, [cur_data.min(), cur_data.max()]))
        label.append(avg_data[t+width])
    feature = np.array(feature)
    label = np.array(label)

    x_train, x_test, y_train, y_test = train_test_split(feature, label, train_size=0.8)

    import time
    time.time()
    # 使用线性回归训练模型
    from LinearRegression import LinearRegression
    model = LinearRegression()
    start_time = time.time()
    model.fit_normal(x_train, y_train)
    print("线性回归: ")
    print("训练执行时间" + str(time.time() - start_time) + "s")
    start_time = time.time()
    print(model.score(x_test,y_test))
    print("预测执行时间" + str(time.time() - start_time) + "s")
    print("\n")

    # 使用多项式回归训练模型
    from PolyRegression import PolyRegression
    model = PolyRegression()
    start_time = time.time()
    model.fit_cv(x_train, y_train, degrees=[1,2,3,4,5,6])
    print("多项式回归: ")
    print("训练执行时间" + str(time.time() - start_time) + "s")
    start_time = time.time()
    print(model.score(x_test,y_test))
    print("预测执行时间" + str(time.time() - start_time) + "s")
    print("degree取值:" + str(model.degree))
    print("\n")

    # 岭回归
    from RidgeRegression import  RidgeRegression
    model = RidgeRegression()
    start_time = time.time()
    model.fit_cv(x_train, y_train, alphas=np.logspace(-5,2,200))
    print("岭回归: ")
    print("训练执行时间" + str(time.time() - start_time) + "s")
    start_time = time.time()
    print(model.score(x_test,y_test))
    print("预测执行时间" + str(time.time() - start_time) + "s")
    print("------------------------------------------------------\n")

electricity_data
线性回归: 
训练执行时间0.006998777389526367s
0.037085485784281176
预测执行时间0.001001596450805664s


多项式回归: 
训练执行时间4.128999948501587s
0.028380588585375302
预测执行时间0.0030040740966796875s
degree取值:2


岭回归: 
训练执行时间1.0440003871917725s
0.03707853006885294
预测执行时间0.0009989738464355469s
------------------------------------------------------

commodity_data
线性回归: 
训练执行时间0.0010030269622802734s
0.025248410284627463
预测执行时间0.0009980201721191406s


多项式回归: 
训练执行时间0.10499954223632812s
0.02430524546952156
预测执行时间0.0009999275207519531s
degree取值:2


岭回归: 
训练执行时间0.1550002098083496s
0.025857216542075786
预测执行时间0.0009984970092773438s
------------------------------------------------------

traffic_data
线性回归: 
训练执行时间0.0009648799896240234s
0.007440934172417665
预测执行时间0.0009984970092773438s


多项式回归: 
训练执行时间0.9740002155303955s
0.007536635770553508
预测执行时间0.004001140594482422s
degree取值:3


岭回归: 
训练执行时间0.4120001792907715s
0.0074427904723953645
预测执行时间0.0s
------------------------------------------------------



In [8]:
# from sklearn.linear_model import RidgeCV
# model = RidgeCV(alphas=np.logspace(-5,2,200),normalize=True,scoring='neg_root_mean_squared_error',cv=10)
# model.fit(feature, label)
# model.score(feature, label)
# from sklearn.metrics import mean_squared_error
# np.sqrt(mean_squared_error(label, model.predict(feature)))