In [None]:
import pandas as pd
df = pd.read_excel('E:\坚果云同步文件\江梓烨\供应链调查与分析\数据\各品牌汽车销售数据.xlsx')
df.head()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
# 1. 主题与样式设置
sns.set_theme(
    style="whitegrid",
    rc={
        'font.sans-serif': ['SimHei'],
        'axes.unicode_minus': False,
        'axes.linewidth': 1.5
    }
)
# 2. 数据处理（按合计降序取前10）
top_brands = df.sort_values('合计', ascending=False).head(10)
# 3. 创建横向条形图
plt.figure(figsize=(10, 6))
ax = sns.barplot(
    x='合计',
    y='品牌',
    data=top_brands,
    palette="coolwarm_r",  # 横向图建议使用_r反转色板
    saturation=0.8,
    width=0.7,
    orient='h'            # 关键参数
)
# 4. 添加数据标签
for p in ax.patches:
    width = p.get_width()
    ax.text(
        width + max(top_brands['合计'])*0.02,  # 动态偏移量
        p.get_y() + p.get_height()/2,
        f"{int(width)}",
        va='center',
        fontsize=10
    )
# 5. 图表美化
plt.title('汽车品牌销量TOP10', fontsize=16, pad=20)
plt.xlabel('销量（辆）', fontsize=12)
plt.ylabel('品牌', fontsize=12)
plt.xlim(0, max(top_brands['合计'])*1.15)  # 扩展X轴范围
sns.despine(left=True, bottom=True)
plt.tight_layout()
plt.show()

In [None]:
# 提取销量前十车企的年份数据
years = ['2020', '2021', '2022', '2023', '2024']
top_brands.columns = top_brands.columns.astype(str)  # 将列名转换为字符串
top_brands_time_series = top_brands[['品牌'] + years].set_index('品牌').T

# 绘制折线图
plt.figure(figsize=(12, 6))
for brand in top_brands_time_series.columns:
    plt.plot(top_brands_time_series.index, top_brands_time_series[brand], marker='o', label=brand)

# 图表美化
plt.title('销量前十车企的销量变化趋势', fontsize=16, pad=20)
plt.xlabel('年份', fontsize=12)
plt.ylabel('销量（辆）', fontsize=12)
plt.legend(title='品牌', fontsize=10)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
from datetime import datetime
import matplotlib.ticker as ticker
# 设置全局样式
plt.rcParams['font.sans-serif'] = ['SimHei']  # 解决中文显示问题
# 初始化seaborn样式
sns.set(style="whitegrid")
# 数据准备
dates = [
    "2022-01-01", "2022-02-01", "2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", 
    "2022-07-01", "2022-08-01", "2022-09-01", "2022-10-01", "2022-11-01", "2022-12-01", 
    "2023-01-01", "2023-02-01", "2023-03-01", "2023-04-01", "2023-05-01", "2023-06-01", 
    "2023-07-01", "2023-08-01", "2023-09-01", "2023-10-01", "2023-11-01", "2023-12-01", 
    "2024-01-01", "2024-02-01", "2024-03-01", "2024-04-01", "2024-05-01", "2024-06-01", 
    "2024-07-01", "2024-08-01", "2024-09-01", "2024-10-01", "2024-11-01", "2024-12-01", 
    "2025-01-01", "2025-02-01", "2025-03-01", "2025-04-01" 
]
prices = [300753.8, 401005, 488944.7, 496500, 457500, 468500, 
          475500, 476500, 492500, 537500, 597500, 519500,
          480500, 449500, 332500, 212814, 186432.2, 307789,
          300753.8, 260301.5, 200502.5, 168842.4, 160050.3, 107286.4,
          96733.7, 94974.9, 112562.8, 105527.6, 107286.4, 100251.3,
          89698.5, 75628.1, 72110.6, 75628.1, 73869.4, 71600,
          77386.9, 75628.1, 73869.4, 70351.8]
dates = [datetime.strptime(d, "%Y-%m-%d") for d in dates]
# 创建画布
plt.figure(figsize=(20, 8))
ax = plt.gca()
# 绘图（Seaborn）
sns.lineplot(x=dates, y=prices, 
             marker='o', 
             linewidth=2, 
             color='#268bd2', 
             markerfacecolor='#f97306',
             label='Monthly Price')
# 设置 X 轴格式化函数（完整日期格式）
def format_date(x, pos=None):
    dt = mdates.num2date(x)
    return dt.strftime('%Y-%m-%d')
# 设置刻度和格式
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
ax.xaxis.set_major_formatter(plt.FuncFormatter(format_date))
ax.xaxis.set_minor_locator(mdates.MonthLocator())
# Y轴千位分隔
ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{int(x):,}'))
# 添加年度分隔线
for date in dates:
    if date.month == 1 and date.year > dates[0].year:
        plt.axvline(date, color='gray', linestyle='--', linewidth=0.8, alpha=0.3)
# 其他标签和格式
plt.ylabel('Price (Unit)', fontsize=12)
plt.title('碳酸锂价格曲线 2022–2025', fontsize=16, fontweight='bold', pad=20)
plt.legend(loc='upper right', fontsize=11)
# 美化 X 轴刻度
plt.xticks(rotation=45, ha='right', fontsize=9)
plt.yticks(fontsize=10)
# 边距调整
plt.subplots_adjust(bottom=0.18, left=0.08, right=0.95)
# 显示图形
plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
# 设置全局样式
plt.rcParams['font.sans-serif'] = ['SimHei']  # 解决中文显示问题
# 1. 数据准备
dates = [
    "2022-01-01", "2022-02-01", "2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", 
    "2022-07-01", "2022-08-01", "2022-09-01", "2022-10-01", "2022-11-01", "2022-12-01", 
    "2023-01-01", "2023-02-01", "2023-03-01", "2023-04-01", "2023-05-01", "2023-06-01", 
    "2023-07-01", "2023-08-01", "2023-09-01", "2023-10-01", "2023-11-01", "2023-12-01", 
    "2024-01-01", "2024-02-01", "2024-03-01", "2024-04-01", "2024-05-01", "2024-06-01", 
    "2024-07-01", "2024-08-01", "2024-09-01", "2024-10-01", "2024-11-01", "2024-12-01", 
    "2025-01-01", "2025-02-01", "2025-03-01"  
]
prices = [300753.8, 401005, 488944.7, 496500, 457500, 468500, 
          475500, 476500, 492500, 537500, 597500, 519500,
          480500, 449500, 332500, 212814, 186432.2, 307789,
          300753.8, 260301.5, 200502.5, 168842.4, 160050.3, 107286.4,
          96733.7, 94974.9, 112562.8, 105527.6, 107286.4, 100251.3,
          89698.5, 75628.1, 72110.6, 75628.1, 73869.4, 71600,
          77386.9, 75628.1, 73869.4]

df = pd.DataFrame({'ds': pd.to_datetime(dates), 'y': prices})

# 2. 归一化数据
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_prices = scaler.fit_transform(df[['y']])

# 3. 构造时间序列数据 (前 12 个预测第 13 个)
def create_dataset(data, look_back=12):
    X, y = [], []
    for i in range(len(data) - look_back):
        X.append(data[i:i + look_back])
        y.append(data[i + look_back])
    return np.array(X), np.array(y)

look_back = 12
X, y = create_dataset(scaled_prices, look_back)
X = X.reshape(X.shape[0], X.shape[1], 1)  # LSTM输入形状

# 4. 构建 LSTM 模型
model = Sequential([
    LSTM(64, return_sequences=False, input_shape=(look_back, 1)),
    Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X, y, epochs=100, batch_size=4, verbose=0)

# 5. 预测未来 6 个月
last_sequence = scaled_prices[-look_back:]
future_preds = []

for _ in range(6):
    input_seq = last_sequence.reshape(1, look_back, 1)
    pred = model.predict(input_seq, verbose=0)
    future_preds.append(pred[0, 0])
    last_sequence = np.append(last_sequence[1:], pred, axis=0)

# 6. 还原真实值
predicted_values = scaler.inverse_transform(np.array(future_preds).reshape(-1, 1)).flatten()

# 7. 构造日期
future_dates = [df['ds'].iloc[-1] + pd.DateOffset(months=i+1) for i in range(6)]

# 8. 可视化结果
plt.figure(figsize=(14, 6))
plt.plot(df['ds'], df['y'], label='历史价格', marker='o')
plt.plot(future_dates, predicted_values, label='预测价格（LSTM）', marker='s', color='orange')
plt.title("价格预测（LSTM）", fontsize=14)
plt.xlabel("时间")
plt.ylabel("价格")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.xticks(rotation=45)
plt.show()


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from datetime import datetime, timedelta

# 设置全局样式
plt.rcParams['font.sans-serif'] = ['SimHei']  # 解决中文显示问题

# 1. 数据准备与预处理
def load_data():
    dates = [
        "2022-01-01", "2022-02-01", "2022-03-01", "2022-04-01", "2022-05-01", "2022-06-01", 
        "2022-07-01", "2022-08-01", "2022-09-01", "2022-10-01", "2022-11-01", "2022-12-01", 
        "2023-01-01", "2023-02-01", "2023-03-01", "2023-04-01", "2023-05-01", "2023-06-01", 
        "2023-07-01", "2023-08-01", "2023-09-01", "2023-10-01", "2023-11-01", "2023-12-01", 
        "2024-01-01", "2024-02-01", "2024-03-01", "2024-04-01", "2024-05-01", "2024-06-01", 
        "2024-07-01", "2024-08-01", "2024-09-01", "2024-10-01", "2024-11-01", "2024-12-01", 
        "2025-01-01", "2025-02-01", "2025-03-01"  
    ]
    prices = [300753.8, 401005, 488944.7, 496500, 457500, 468500, 
              475500, 476500, 492500, 537500, 597500, 519500,
              480500, 449500, 332500, 212814, 186432.2, 307789,
              300753.8, 260301.5, 200502.5, 168842.4, 160050.3, 107286.4,
              96733.7, 94974.9, 112562.8, 105527.6, 107286.4, 100251.3,
              89698.5, 75628.1, 72110.6, 75628.1, 73869.4, 71600,
              77386.9, 75628.1, 73869.4]
    return pd.DataFrame({'ds': pd.to_datetime(dates), 'y': prices})

# 2. 数据预处理
def preprocess_data(df, look_back=12):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(df[['y']])
    
    # 创建时间序列数据集
    X, y = [], []
    for i in range(len(scaled_data) - look_back):
        X.append(scaled_data[i:i+look_back])
        y.append(scaled_data[i+look_back])
    
    X = np.array(X).reshape(len(X), look_back, 1)
    y = np.array(y)
    return X, y, scaler

# 3. 构建LSTM模型
def build_model(look_back):
    model = Sequential([
        LSTM(128, return_sequences=True, input_shape=(look_back, 1)),
        Dropout(0.3),
        LSTM(64),
        Dropout(0.3),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

# 4. 主程序
def main():
    df = load_data()
    look_back = 12
    
    # 数据预处理
    X, y, scaler = preprocess_data(df, look_back)
    
    # 划分训练验证集（保留最后12个月作为验证）
    train_size = int(len(X) * 0.8)
    X_train, X_val = X[:train_size], X[train_size:]
    y_train, y_val = y[:train_size], y[train_size:]
    
    # 构建并训练模型
    model = build_model(look_back)
    early_stop = EarlyStopping(monitor='val_loss', patience=20, verbose=1)
    history = model.fit(
        X_train, y_train,
        epochs=200,
        batch_size=8,
        validation_data=(X_val, y_val),
        callbacks=[early_stop],
        verbose=1
    )
    
    # 预测验证集
    val_predict = model.predict(X_val)
    val_predict = scaler.inverse_transform(val_predict)
    y_val_true = scaler.inverse_transform(y_val)
    val_rmse = np.sqrt(mean_squared_error(y_val_true, val_predict))
    print(f"验证集RMSE: {val_rmse:.2f}")
    
    # 预测未来6个月
    last_sequence = X[-1]
    future_preds = []
    for _ in range(6):
        pred = model.predict(last_sequence.reshape(1, look_back, 1))
        future_preds.append(pred[0, 0])
        last_sequence = np.append(last_sequence[1:], pred, axis=0)
    
    predicted_values = scaler.inverse_transform(np.array(future_preds).reshape(-1, 1)).flatten()
    
    # 生成未来日期
    future_dates = [df['ds'].iloc[-1] + pd.DateOffset(months=i+1) for i in range(6)]
    
    # 可视化
    plt.figure(figsize=(14, 7))
    plt.plot(df['ds'], df['y'], label='历史价格', marker='o', linewidth=2)
    plt.plot(future_dates, predicted_values, label='预测价格', marker='s', 
             linestyle='--', linewidth=2, color='#FF6347')
    
    # 标记关键点
    last_historical = df.iloc[-1]
    plt.scatter(last_historical['ds'], last_historical['y'], color='#2E8B57', s=100, zorder=5, 
                label='当前最新数据点')
    
    # 添加预测数值标签
    for date, value in zip(future_dates, predicted_values):
        plt.text(date, value, f'{value:,.1f}', ha='center', va='bottom', fontsize=9)
    
    plt.title("价格趋势预测分析（LSTM模型）", fontsize=16, pad=20)
    plt.xlabel("时间", fontsize=12)
    plt.ylabel("价格", fontsize=12)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.legend(loc='upper left')
    plt.xticks(rotation=45)
    plt.tight_layout()
    
    # 训练过程可视化
    plt.figure(figsize=(12, 5))
    plt.plot(history.history['loss'], label='训练损失')
    plt.plot(history.history['val_loss'], label='验证损失')
    plt.title('模型训练过程', fontsize=14)
    plt.xlabel('Epochs')
    plt.ylabel('MSE Loss')
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()
    
    plt.show()