In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder

try:
    import arabic_reshaper
    from bidi.algorithm import get_display
except ImportError:
    print("Error: Required libraries for Persian text not found.")
    print("Please install them using: pip install arabic-reshaper python-bidi")
    exit()

print("Step 1: Loading data...")
try:
    data = pd.read_csv("D:/BI_NI_CI/Data/MonthlySales_TopGoods_500_ByCode_And_Name.csv")
    data.columns = data.columns.str.strip()
except FileNotFoundError:
    print("Error: Data file not found. Please check the file path.")
    exit()

data.sort_values(['Code', 'Year', 'Month'], inplace=True)


print("Step 2: Engineering features for seasonality and trend...")
df = data.copy()

for lag in [1, 2, 3]:
    df[f'Sales_Lag_{lag}'] = df.groupby('Code')['MainQty'].shift(lag)


df['Sales_Lag_12'] = df.groupby('Code')['MainQty'].shift(12)


df['Rolling_Mean_3'] = df.groupby('Code')['MainQty'].shift(1).rolling(window=3, min_periods=1).mean()
df['Rolling_Std_3'] = df.groupby('Code')['MainQty'].shift(1).rolling(window=3, min_periods=1).std()

df.dropna(inplace=True)


le = LabelEncoder()
df['Code_Encoded'] = le.fit_transform(df['Code'])


print("Feature engineering complete.")
print("-" * 30)


print("Step 3: Training a powerful RandomForest model with new features...")

features = [
    'Code_Encoded', 'Year', 'Month',
    'Sales_Lag_1', 'Sales_Lag_2', 'Sales_Lag_3',
    'Sales_Lag_12',
    'Rolling_Mean_3', 'Rolling_Std_3'
]
target = 'MainQty'

X_train = df[features]
y_train = df[target]

rf_model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1, min_samples_leaf=5)
rf_model.fit(X_train, y_train)

print("Model training is complete.")
print("-" * 30)


print("Step 4: Performing intelligent autoregressive forecasting...")

future_dates_df = pd.DataFrame({
    'Year': [1404] * 8,
    'Month': [5, 6, 7, 8, 9, 10, 11, 12]
})


live_data = data.copy()
final_predictions = []

for _, future_row in future_dates_df.iterrows():
    future_year = future_row['Year']
    future_month = future_row['Month']
    
    current_features = []
    codes_to_predict = le.classes_
    
    for code in codes_to_predict:
        history = live_data[live_data['Code'] == code].tail(12)
        if history.empty:
            continue

        lag_1 = history['MainQty'].iloc[-1]
        lag_2 = history['MainQty'].iloc[-2] if len(history) >= 2 else 0
        lag_3 = history['MainQty'].iloc[-3] if len(history) >= 3 else 0
        lag_12 = history['MainQty'].iloc[0] if len(history) >= 12 else 0 # مقدار ۱۲ ماه قبل
        
        rolling_mean = history['MainQty'].tail(3).mean()
        rolling_std = history['MainQty'].tail(3).std()

        current_features.append({
            'Code_Encoded': le.transform([code])[0],
            'Code': code,
            'Year': future_year,
            'Month': future_month,
            'Sales_Lag_1': lag_1,
            'Sales_Lag_2': lag_2,
            'Sales_Lag_3': lag_3,
            'Sales_Lag_12': lag_12,
            'Rolling_Mean_3': rolling_mean,
            'Rolling_Std_3': rolling_std if not np.isnan(rolling_std) else 0
        })

    if not current_features:
        continue

    features_df = pd.DataFrame(current_features)
    predictions = rf_model.predict(features_df[features])
    
    for i, row in features_df.iterrows():
        predicted_qty = max(0, round(predictions[i]))
        
        new_row = {
            'Code': row['Code'], 'Year': row['Year'], 'Month': row['Month'], 'MainQty': predicted_qty
        }
        if 'Name' in data.columns:
            new_row['Name'] = data[data['Code'] == row['Code']]['Name'].iloc[0]
            
        live_data = pd.concat([live_data, pd.DataFrame([new_row])], ignore_index=True)
        
        final_predictions.append({
            'Code': row['Code'], 'Year': row['Year'], 'Month': row['Month'],
            'Predicted_Sales': predicted_qty, 'Model': 'Seasonal_RF'
        })

print("Forecasting complete.")
print("-" * 30)


results_df = pd.DataFrame(final_predictions)
df_predictions_pivoted = results_df.pivot_table(index=['Code', 'Model'], columns='Month', values='Predicted_Sales')

output_dir = 'D:\BI_NI_CI\plot\plot2\seasonal_forecast_plots'
os.makedirs(output_dir, exist_ok=True)
df_predictions_pivoted.to_csv(os.path.join(output_dir, "seasonal_forecast_results.csv"))

print(f"Results saved to '{output_dir}/seasonal_forecast_results.csv'")
print("\n--- Forecast Results (First 15) ---")
print(df_predictions_pivoted.head(15))


print(f"\nStep 6: Generating comparison plots in '{output_dir}' directory...")

user_code_list = [
    12704, 16629, 17292, 17732, 18616, 20305, 21270, 23804, 25962, 33579, 10806, 10808, 10823, 10829, 10830, 10869,10885,
    10890, 10934, 11121, 11150, 11257, 11277, 11303, 11310, 11312, 11313, 11325, 11339, 11668, 12093, 12108,12109, 12243,
    12254, 12265, 12326, 12492, 12493, 12494, 12500, 12555, 12646, 12654, 12672, 12694, 12700, 12704, 12714, 12721]

predicted_codes = df_predictions_pivoted.index.get_level_values('Code').unique()
codes_to_plot = [code for code in user_code_list if code in predicted_codes]
print(f"Plotting for {len(codes_to_plot)} specified codes that have predictions.")


# =========================================================================================================================
for code_to_plot in codes_to_plot:
    plt.figure(figsize=(12, 6))
    
    actual_data = data[data['Code'] == code_to_plot]
    
    product_name = ""
    if not actual_data.empty:
        product_name = actual_data['Name'].iloc[0]

    for year, color, marker, style in [(1402, 'black', 'o', '-'), (1403, 'red', 's', '-'), (1404, 'green', 'D', '-')]:
        year_data = actual_data[actual_data['Year'] == year]
        if not year_data.empty:
            plt.plot(year_data['Month'], year_data['MainQty'], marker=marker, linestyle=style, color=color, label=f'Actual {year}')
            
    if code_to_plot in predicted_codes:
        prediction_data = df_predictions_pivoted.loc[code_to_plot].iloc[0]
        months_numeric = pd.to_numeric(prediction_data.index)
        plt.plot(months_numeric, prediction_data.values, marker='^', linestyle='--', color='blue', label='Prediction 1404')
    
    reshaped_text = arabic_reshaper.reshape(product_name)
    bidi_text = get_display(reshaped_text)
    
    plt.title(f'Seasonal Forecast vs. Actuals for Code: {code_to_plot} - {bidi_text}', fontsize=14)
    plt.xlabel('Month')
    plt.ylabel('Sales Quantity')
    plt.xticks(range(1, 13))
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f'plot_{code_to_plot}.png'), dpi=150)
    plt.close()

print("Plot generation complete.")