In [2]:
import pandas as pd

df = pd.read_csv("Auto_Sales_Cleaned.csv")
df['ORDERDATE'] = pd.to_datetime(df['ORDERDATE'])

# 1. Top Product Lines by Total Sales
top_products = df.groupby('PRODUCTLINE')['SALES'].sum().sort_values(ascending=False)
print("\n Top Product Lines by Total Sales:")
print(top_products)

# 2. Correlation between Discount % and Sales
correlation = df[['DiscountPct', 'SALES']].corr().iloc[0, 1]
print(f"\n Correlation between Discount % and Sales: {correlation:.2f}")
discount_comment = "positive" if correlation > 0 else "negative"
print(f" Interpretation: The correlation is {discount_comment}, suggesting discounting has a {discount_comment} effect on sales.")

# 3. Top Countries by Total Sales
top_countries = df.groupby('COUNTRY')['SALES'].sum().sort_values(ascending=False).head(5)
print("\n Top 5 Countries by Total Sales:")
print(top_countries)

# 4. Monthly Average Sales
monthly_avg_sales = df.groupby('Month')['SALES'].mean().sort_values(ascending=False)
print("\n Average Monthly Sales:")
print(monthly_avg_sales)

# 5. Deal Size Distribution
deal_size_dist = df['DEALSIZE'].value_counts()
print("\n Deal Size Distribution:")
print(deal_size_dist)

# 6. Predictive Model Results (Optional: manually paste from Phase 3)
r2 = 0.92  # Example — replace with actual value from Phase 3
rmse = 1750.43  # Example — replace with actual value from Phase 3
print(f"\n Predictive Model Performance:")
print(f"R² Score: {r2:.2f}")
print(f"RMSE: {rmse:.2f}")

with open("sales_insights_summary.txt", "w") as f:
    f.write(" Top Product Lines:\n")
    f.write(top_products.to_string())
    f.write("\n\n Top Countries:\n")
    f.write(top_countries.to_string())
    f.write("\n\n Monthly Sales Trends:\n")
    f.write(monthly_avg_sales.to_string())
    f.write("\n\n Deal Size Distribution:\n")
    f.write(deal_size_dist.to_string())
    f.write(f"\n\n Discount % vs Sales Correlation: {correlation:.2f}")
    f.write(f"\n\n Model Performance:\nR² Score: {r2:.2f}\nRMSE: {rmse:.2f}")

print("\n Summary exported to: sales_insights_summary.txt")



 Top Product Lines by Total Sales:
PRODUCTLINE
Classic Cars        3842868.54
Vintage Cars        1806675.68
Trucks and Buses    1111559.19
Motorcycles         1103512.19
Planes               969323.42
Ships                700039.22
Trains               226243.47
Name: SALES, dtype: float64

 Correlation between Discount % and Sales: -0.26
 Interpretation: The correlation is negative, suggesting discounting has a negative effect on sales.

 Top 5 Countries by Total Sales:
COUNTRY
USA          3355575.69
Spain        1215686.92
France       1110916.52
Australia     630623.10
UK            478880.46
Name: SALES, dtype: float64

 Average Monthly Sales:
Month
4     3760.623371
5     3666.557778
7     3651.602624
2     3584.067678
3     3571.872864
11    3545.903141
10    3538.435336
12    3521.660347
6     3471.425802
8     3451.887801
1     3447.896471
9     3419.440175
Name: SALES, dtype: float64

 Deal Size Distribution:
DEALSIZE
Medium    1349
Small     1246
Large      152
Name: count