
- 依赖安装

! pip install pystan

! pip install prophet

! pip install scikit-learn


In [1]:

import pandas as pd
import numpy as np
from prophet import Prophet
from datetime import datetime, timedelta


data = pd.read_csv("timeseries.csv")
data['CPI'] = data['CYCLES'] / data['INSTRUCTIONS']
start_date = datetime(2000, 1, 1)
data['timestamp'] = data['timestamp'].apply(lambda x : start_date + timedelta(seconds=x))

num_rows = int(len(data) * 0.8)

train_data = data.iloc[:num_rows]
test_data = data.iloc[num_rows:]

# 使用 prophet 训练和预测
prophet_df = pd.DataFrame()
prophet_df['ds'] = train_data['timestamp']
prophet_df['y'] = train_data['CPI']

# 初始化并拟合模型
m = Prophet()
m.fit(prophet_df)

# 创建预测的数据框
future_periods = len(test_data)
future = m.make_future_dataframe(periods=future_periods)
print(future_periods)

forecast = m.predict(future)

print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(len(test_data)))


  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.
09:22:41 - cmdstanpy - INFO - Chain [1] start processing
09:22:41 - cmdstanpy - INFO - Chain [1] done processing


2
                          ds         yhat   yhat_lower   yhat_upper
8 2000-01-02 00:00:08.034087  1592.375180  1592.288792  1592.463704
9 2000-01-03 00:00:08.034087  3182.775931  3182.688369  3182.859170


In [2]:
import pandas as pd
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
from datetime import datetime, timedelta

data = pd.read_csv("timeseries.csv")
data['CPI'] = data['CYCLES'] / data['INSTRUCTIONS']

# 使用 prophet 训练和预测
prophet_df = pd.DataFrame()
prophet_df['ds'] = data['timestamp']
prophet_df['y'] = data['CPI']

# prophet 支持的时间格式为： YYYY-MM-DD HH:MM:SS
# 进行时间格式的转换
# 假定一个年月日日期
start_date = datetime(2000, 1, 1)

# 将浮点数时间戳转换为 datetime 类型
prophet_df['ds'] = prophet_df['ds'].apply(lambda x : start_date + timedelta(seconds=x))

# 计算80%的索引位置
cut_off_index = int(len(prophet_df) * 0.8)

# 分成训练集和测试集
train_df = prophet_df.iloc[:cut_off_index]
test_df = prophet_df.iloc[cut_off_index:]

print(f"训练集大小: {len(train_df)}")
print(f"测试集大小: {len(test_df)}")

# 初始化并拟合模型
model = Prophet()
model.fit(train_df)

# 创建未来数据框，包括测试集的日期
future = model.make_future_dataframe(periods=len(test_df))
forecast = model.predict(future)

# 提取实际值和预测值
actual = test_df['y'].values
predicted = forecast['yhat'].iloc[-len(test_df):].values

# 计算评估指标
mae = mean_absolute_error(actual, predicted)
rmse = np.sqrt(mean_squared_error(actual, predicted))

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")

print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(len(test_df)))


09:22:45 - cmdstanpy - INFO - Chain [1] start processing
09:22:45 - cmdstanpy - INFO - Chain [1] done processing


训练集大小: 8
测试集大小: 2
Mean Absolute Error (MAE): 2385.688729865966
Root Mean Squared Error (RMSE): 2514.700092739318
                          ds         yhat   yhat_lower   yhat_upper
8 2000-01-02 00:00:08.034087  1592.375180  1592.278208  1592.466316
9 2000-01-03 00:00:08.034087  3182.775931  3182.693706  3182.868896


In [7]:
forecast

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2000-01-01 00:00:01.001018,1.844968,1.75393,1.925979,1.844968,1.844968,0.0,0.0,0.0,0.0,0.0,0.0,1.844968
1,2000-01-01 00:00:02.006088,1.863469,1.772495,1.955647,1.863469,1.863469,0.0,0.0,0.0,0.0,0.0,0.0,1.863469
2,2000-01-01 00:00:03.011141,1.881969,1.791063,1.971432,1.881969,1.881969,0.0,0.0,0.0,0.0,0.0,0.0,1.881969
3,2000-01-01 00:00:04.016085,1.900468,1.809608,1.98406,1.900468,1.900468,0.0,0.0,0.0,0.0,0.0,0.0,1.900468
4,2000-01-01 00:00:05.021051,1.918966,1.827803,2.001509,1.918966,1.918966,0.0,0.0,0.0,0.0,0.0,0.0,1.918966
5,2000-01-01 00:00:06.025944,1.937464,1.853336,2.029779,1.937464,1.937464,0.0,0.0,0.0,0.0,0.0,0.0,1.937464
6,2000-01-01 00:00:07.030087,1.955948,1.866204,2.045622,1.955948,1.955948,0.0,0.0,0.0,0.0,0.0,0.0,1.955948
7,2000-01-01 00:00:08.034087,1.974429,1.886585,2.062922,1.974429,1.974429,0.0,0.0,0.0,0.0,0.0,0.0,1.974429
8,2000-01-02 00:00:08.034087,1592.37518,1592.282724,1592.468569,1592.37495,1592.375377,0.0,0.0,0.0,0.0,0.0,0.0,1592.37518
9,2000-01-03 00:00:08.034087,3182.775931,3182.691209,3182.856412,3182.775226,3182.776582,0.0,0.0,0.0,0.0,0.0,0.0,3182.775931


In [5]:
train_df

Unnamed: 0,ds,y
0,2000-01-01 00:00:01.001018,1.92241
1,2000-01-01 00:00:02.006088,1.700055
2,2000-01-01 00:00:03.011141,1.935454
3,2000-01-01 00:00:04.016085,1.935266
4,2000-01-01 00:00:05.021051,1.933936
5,2000-01-01 00:00:06.025944,1.916836
6,2000-01-01 00:00:07.030087,1.982707
7,2000-01-01 00:00:08.034087,1.951399


In [6]:
test_df

Unnamed: 0,ds,y
8,2000-01-01 00:00:09.038087,1.800178
9,2000-01-01 00:00:10.001677,1.973473
