# IBM Applied Data Science Capstone
## Part 3: Exploratory Data Analysis (EDA)

**Objective:** Analyze data patterns, relationships, and insights through visualizations

---


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Set style
sns.set_style("whitegrid")
plt.style.use('seaborn-v0_8')

# Load data
df = pd.read_csv('../data/automotive_sales.csv')
df['Date'] = pd.to_datetime(df[['Year', 'Month']].assign(Day=1))

print("Dataset loaded successfully!")
print(f"Shape: {df.shape}")


## 1. Sales Over Time


In [None]:
# Sales over time
df_time = df.groupby('Date')['Sales'].mean()

plt.figure(figsize=(14, 6))
plt.plot(df_time.index, df_time.values, linewidth=2, color='steelblue')
plt.title('Average Sales Over Time', fontsize=16, fontweight='bold')
plt.xlabel('Date', fontsize=12)
plt.ylabel('Average Sales', fontsize=12)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('../images/sales_over_time.png', dpi=300, bbox_inches='tight')
plt.show()


## 2. Sales by Vehicle Type


In [None]:
vehicle_sales = df.groupby('Vehicle_Type')['Sales'].mean().sort_values(ascending=False)

plt.figure(figsize=(12, 6))
colors = plt.cm.viridis(np.linspace(0, 1, len(vehicle_sales)))
plt.bar(vehicle_sales.index, vehicle_sales.values, color=colors)
plt.title('Average Sales by Vehicle Type', fontsize=16, fontweight='bold')
plt.xlabel('Vehicle Type', fontsize=12)
plt.ylabel('Average Sales', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig('../images/sales_by_vehicle_type.png', dpi=300, bbox_inches='tight')
plt.show()


## 3. Correlation Analysis


In [None]:
# Correlation heatmap
numeric_cols = ['Sales', 'Price', 'Advertising_Expenditure', 'Unemployment_Rate', 'GDP', 'Revenue']
corr_matrix = df[numeric_cols].corr()

plt.figure(figsize=(12, 10))
sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm', center=0,
            square=True, linewidths=1, cbar_kws={"shrink": 0.8})
plt.title('Correlation Heatmap of Numerical Variables', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig('../images/correlation_heatmap.png', dpi=300, bbox_inches='tight')
plt.show()
