
- 依赖安装

! pip install pystan

! pip install prophet

! pip install scikit-learn


In [5]:

import pandas as pd
import numpy as np
from prophet import Prophet
from datetime import datetime, timedelta


data = pd.read_csv("timeseries.csv")
data['CPI'] = data['CYCLES'] / data['INSTRUCTIONS']
start_date = datetime(2000, 1, 1)
data['timestamp'] = data['timestamp'].apply(lambda x : start_date + timedelta(seconds=x))

num_rows = int(len(data) * 0.8)

train_data = data.iloc[:num_rows]
test_data = data.iloc[num_rows:]

# 使用 prophet 训练和预测
prophet_df = pd.DataFrame()
prophet_df['ds'] = train_data['timestamp']
prophet_df['y'] = train_data['CPI']

# 初始化并拟合模型
m = Prophet()
m.fit(prophet_df)

# 创建预测的数据框
future_periods = len(test_data)
future = m.make_future_dataframe(periods=future_periods, freq='s')
print(future_periods)

forecast = m.predict(future)

print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(len(test_data)))


22:16:09 - cmdstanpy - INFO - Chain [1] start processing


22:16:09 - cmdstanpy - INFO - Chain [1] done processing


2
                          ds      yhat  yhat_lower  yhat_upper
8 2000-01-01 00:00:09.034087  1.992836    1.911095    2.080517
9 2000-01-01 00:00:10.034087  2.011243    1.918612    2.100590


In [2]:
import pandas as pd
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
from datetime import datetime, timedelta

data = pd.read_csv("timeseries.csv")
data['CPI'] = data['CYCLES'] / data['INSTRUCTIONS']

# 使用 prophet 训练和预测
prophet_df = pd.DataFrame()
prophet_df['ds'] = data['timestamp']
prophet_df['y'] = data['CPI']

# prophet 支持的时间格式为： YYYY-MM-DD HH:MM:SS
# 进行时间格式的转换
# 假定一个年月日日期
start_date = datetime(2000, 1, 1)

# 将浮点数时间戳转换为 datetime 类型
prophet_df['ds'] = prophet_df['ds'].apply(lambda x : start_date + timedelta(seconds=x))

# 计算80%的索引位置
cut_off_index = int(len(prophet_df) * 0.8)

# 分成训练集和测试集
train_df = prophet_df.iloc[:cut_off_index]
test_df = prophet_df.iloc[cut_off_index:]

print(f"训练集大小: {len(train_df)}")
print(f"测试集大小: {len(test_df)}")

# 初始化并拟合模型
model = Prophet()
model.fit(train_df)

# 创建未来数据框，包括测试集的日期
# 我们使用“秒”作为性能数据的时间间隔，因此 freq 设置为 s（秒）。
# 在不设置情况，是以 天 为单位
future = model.make_future_dataframe(periods=len(test_df), freq='s')
forecast = model.predict(future)

# 提取实际值和预测值
actual = test_df['y'].values
predicted = forecast['yhat'].iloc[-len(test_df):].values

# 计算评估指标
mae = mean_absolute_error(actual, predicted)
rmse = np.sqrt(mean_squared_error(actual, predicted))

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")

print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(len(test_df)))


22:15:34 - cmdstanpy - INFO - Chain [1] start processing


22:15:34 - cmdstanpy - INFO - Chain [1] done processing


训练集大小: 8
测试集大小: 2
Mean Absolute Error (MAE): 0.11521442557138528
Root Mean Squared Error (RMSE): 0.13882324214481337
                          ds      yhat  yhat_lower  yhat_upper
8 2000-01-01 00:00:09.034087  1.992836    1.899198    2.083994
9 2000-01-01 00:00:10.034087  2.011243    1.913709    2.097614


In [6]:
forecast

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2000-01-01 00:00:01.001018,1.844968,1.757998,1.931998,1.844968,1.844968,0.0,0.0,0.0,0.0,0.0,0.0,1.844968
1,2000-01-01 00:00:02.006088,1.863469,1.772712,1.948423,1.863469,1.863469,0.0,0.0,0.0,0.0,0.0,0.0,1.863469
2,2000-01-01 00:00:03.011141,1.881969,1.790009,1.972042,1.881969,1.881969,0.0,0.0,0.0,0.0,0.0,0.0,1.881969
3,2000-01-01 00:00:04.016085,1.900468,1.810337,1.987332,1.900468,1.900468,0.0,0.0,0.0,0.0,0.0,0.0,1.900468
4,2000-01-01 00:00:05.021051,1.918966,1.832886,2.012144,1.918966,1.918966,0.0,0.0,0.0,0.0,0.0,0.0,1.918966
5,2000-01-01 00:00:06.025944,1.937464,1.850073,2.025121,1.937464,1.937464,0.0,0.0,0.0,0.0,0.0,0.0,1.937464
6,2000-01-01 00:00:07.030087,1.955948,1.86798,2.047047,1.955948,1.955948,0.0,0.0,0.0,0.0,0.0,0.0,1.955948
7,2000-01-01 00:00:08.034087,1.974429,1.888081,2.061446,1.974429,1.974429,0.0,0.0,0.0,0.0,0.0,0.0,1.974429
8,2000-01-01 00:00:09.034087,1.992836,1.911095,2.080517,1.992836,1.992836,0.0,0.0,0.0,0.0,0.0,0.0,1.992836
9,2000-01-01 00:00:10.034087,2.011243,1.918612,2.10059,2.011243,2.011243,0.0,0.0,0.0,0.0,0.0,0.0,2.011243
