In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [19]:
# 1. Generate synthetic time series data
np.random.seed(42)
dates = pd.date_range(start="2022-01-01", periods=100, freq="D")
values = np.sin(np.linspace(0, 20, 100)) + np.random.normal(scale=0.3, size=100)
df = pd.DataFrame({"Date": dates, "Value": values})
df.set_index("Date", inplace=True)

In [21]:
# 2. Create lag features
def create_lag_features(df, lag=5):
    for i in range(1, lag + 1):
        df[f"lag_{i}"] = df["Value"].shift(i)
    return df

df = create_lag_features(df)
df.dropna(inplace=True)

In [None]:
df.info()

In [None]:
# 3. Train-test split
train = df[:-10]
test = df[-10:]

X_train = train.drop("Value", axis=1)
y_train = train["Value"]
X_test = test.drop("Value", axis=1)
y_test = test["Value"]

In [None]:
# 4. Train model
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
# 5. Predict
y_pred = model.predict(X_test)

In [None]:
# 6. Evaluation
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

In [None]:
# 7. Plot results
plt.figure(figsize=(10, 5))
plt.plot(df.index, df["Value"], label="Actual Data")
plt.plot(test.index, y_pred, label="Predicted", linestyle='--', marker='o')
plt.axvline(test.index[0], color='gray', linestyle='--', label='Train/Test Split')
plt.legend()
plt.title("Time Series Forecasting with Linear Regression")
plt.xlabel("Date")
plt.ylabel("Value")
plt.grid(True)
plt.show()