In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.arima.model import ARIMA

# Load the Datasets
accounts = pd.read_csv('accounts.csv')
products = pd.read_csv('products.csv')
sales_teams = pd.read_csv('sales_teams.csv')
sales_pipeline = pd.read_csv('sales_pipeline.csv')

# Merge Tables on Relevant Keys
df = sales_pipeline.merge(accounts, on="account", how="left")\
                   .merge(products, on="product", how="left")\
                   .merge(sales_teams, on="sales_agent", how="left")

# Feature Engineering
df['deal_duration'] = (pd.to_datetime(df['close_date']) - pd.to_datetime(df['engage_date'])).dt.days
df['conversion'] = df['deal_stage'].apply(lambda x: 1 if x == "Won" else 0)

# Handle missing values
df.fillna(0, inplace=True)

# Encode Categorical Variables
label_encoders = {}
for col in ['sector', 'office_location', 'regional_office', 'product', 'sales_agent', 'manager', 'series']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Data Splitting for Predictive Modeling
features = ['sector', 'employees', 'revenue', 'sales_price', 'deal_duration']
X = df[features]
y = df['conversion']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Decision Tree Classifier
clf_dt = DecisionTreeClassifier(max_depth=5, random_state=42)
clf_dt.fit(X_train, y_train)
y_pred_dt = clf_dt.predict(X_test)

# Train a Random Forest Classifier
clf_rf = RandomForestClassifier(n_estimators=100, max_depth=7, random_state=42)
clf_rf.fit(X_train, y_train)
y_pred_rf = clf_rf.predict(X_test)

# Train an XGBoost Classifier
clf_xgb = xgb.XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
clf_xgb.fit(X_train, y_train)
y_pred_xgb = clf_xgb.predict(X_test)

# Model Evaluation
def evaluate_model(name, y_test, y_pred):
    print(f"\n{name} Model Performance:")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

evaluate_model("Decision Tree", y_test, y_pred_dt)
evaluate_model("Random Forest", y_test, y_pred_rf)
evaluate_model("XGBoost", y_test, y_pred_xgb)

# Feature Importance Plot
feature_importance = pd.Series(clf_rf.feature_importances_, index=features).sort_values(ascending=False)
feature_importance.plot(kind='bar', title='Feature Importance in Lead Conversion')
plt.show()

# Time Series Forecasting (Exponential Smoothing & ARIMA)
df['close_date'] = pd.to_datetime(df['close_date'])
df_sales = df.groupby('close_date')['close_value'].sum().reset_index()

# Exponential Smoothing
df_sales.set_index('close_date', inplace=True)
model_es = ExponentialSmoothing(df_sales['close_value'], trend='add', seasonal='add', seasonal_periods=12)
fitted_model_es = model_es.fit()
forecast_es = fitted_model_es.forecast(steps=6)

# ARIMA Model
model_arima = ARIMA(df_sales['close_value'], order=(5,1,0))
fitted_model_arima = model_arima.fit()
forecast_arima = fitted_model_arima.forecast(steps=6)

# Plot Forecast Results
plt.figure(figsize=(10, 5))
plt.plot(df_sales.index, df_sales['close_value'], label="Historical Sales")
plt.plot(forecast_es.index, forecast_es, label="Exponential Smoothing Forecast", linestyle='dashed')
plt.plot(forecast_arima.index, forecast_arima, label="ARIMA Forecast", linestyle='dotted')
plt.legend()
plt.title("Sales Forecast (Next 6 Months)")
plt.show()
