<a href="https://colab.research.google.com/github/moushumipriya/AgroVet-Sales-Forecasting-ML/blob/main/AgroVet_Sales_Forecasting_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')


In [None]:
df = pd.read_csv("agro_vet_sales_data.csv")
print(df.shape)
df.head()


In [None]:
df.info()
df.describe()
df.isnull().sum()


In [None]:
df = df.dropna()
df = df[df['Total_Sales'] > 0]


In [None]:
# Sales trend over time
plt.figure(figsize=(12,6))
df.groupby('Date')['Total_Sales'].sum().plot()
plt.title("Total Sales Over Time")
plt.xlabel("Date")
plt.ylabel("Total Sales (BDT)")
plt.show()




In [None]:
# Product-wise average sales
plt.figure(figsize=(12,6))
sns.barplot(data=df.groupby('Product_Name')['Total_Sales'].mean().reset_index(),
            x='Product_Name', y='Total_Sales')
plt.title("Average Sales per Product")
plt.xticks(rotation=45)
plt.show()


In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year
df['Is_Promotion'] = df['Promotion'].map({'Yes':1, 'No':0})
df = df.drop(columns=['Promotion', 'Date'])


In [None]:
le = LabelEncoder()
for col in ['Product_Name', 'Category', 'Region']:
    df[col] = le.fit_transform(df[col])


In [None]:
X = df.drop(columns=['Total_Sales'])
y = df['Total_Sales']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
model = RandomForestRegressor(n_estimators=200, random_state=42)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)


In [None]:
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("MAE:", mae)
print("RMSE:", rmse)
print("R² Score:", r2)


In [None]:
importances = pd.Series(model.feature_importances_, index=X.columns)
importances.sort_values(ascending=False).plot(kind='bar', figsize=(10,6))
plt.title("Feature Importance in Sales Prediction")
plt.show()


In [None]:
# Future prediction (example)
future_data = X_test.iloc[:5].copy()
future_data['Month'] = (future_data['Month'] % 12) + 1  # next month
next_month_pred = model.predict(future_data)
print("Predicted next month sales (sample):")
print(next_month_pred)


In [None]:
plt.figure(figsize=(10,6))
plt.plot(y_test.values[:30], label='Actual', marker='o')
plt.plot(y_pred[:30], label='Predicted', marker='x')
plt.title("Actual vs Predicted Sales (Sample)")
plt.xlabel("Sample Index")
plt.ylabel("Sales (BDT)")
plt.legend()
plt.show()


 Business Insights

 Top features influencing sales (from feature importance).
 Seasonal trend shows which month has high demand.
Promotion impact measurable.
 Can help agro/vet companies optimize inventory & marketing.