### import and load the dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv('/content/drive/MyDrive/PW-Skills/Nov-Assigments/EDA-2/Car Sale.csv')
df.head()

In [None]:
df.shape

### Q1. What is the average selling price of cars for each dealer and how does it compare across different dealers?

In [None]:
# Grouping by dealer name and calculating average selling price, then sorting by price
df.groupby('Dealer_Name')['Price ($)'].mean().sort_values(ascending=False)

### Q2. Which car brand (Company) has the highest variation in prices, and what does this tell us about the pricing trends?

In [None]:
# Calculating the price range (max - min) for each car brand
df.groupby('Company')['Price ($)'].agg(['min', 'max', 'mean']).assign(price_range=lambda x: x['max'] - x['min'])

### Q3. What is the distribution of car prices for each transmission type, and how do the interquartile ranges compare?

In [None]:
# Plotting boxplot for car prices by transmission type to visualize distribution and interquartile ranges
sns.boxplot(data=df, x='Transmission', y='Price ($)')

### Q4. What is the distribution of car prices across different regions?

In [None]:
# Plotting the distribution of car prices across different dealer regions
df.boxplot(column='Price ($)', by='Dealer_Region', grid=False)

### Q5. What is the distribution of cars based on body styles?

In [None]:
# Getting the count of cars by body style
df['Body Style'].value_counts()

### Q6. How does the average selling price of cars vary by customer gender and annual income?

In [None]:
# Grouping by gender and calculating average price and annual income
df.groupby('Gender').agg({'Price ($)': 'mean', 'Annual Income': 'mean'})

### Q7. What is the distribution of car prices by region, and how does the number of cars sold vary by region?

In [None]:
# Plotting distribution of car prices and counting number of cars sold by region
fig, ax = plt.subplots(1, 2, figsize=(14, 6))
sns.boxplot(data=df, x='Dealer_Region', y='Price ($)', ax=ax[0])
df['Dealer_Region'].value_counts().plot(kind='bar', ax=ax[1])

### Q8. How does the average car price differ between cars with different engine sizes?

In [None]:
# Grouping by engine size and calculating average car price
df.groupby('Engine')['Price ($)'].mean().sort_values(ascending=False)

### Q9. How do car prices vary based on the customer’s annual income bracket?

In [None]:
# Creating income brackets and analyzing the average price within each bracket
income_bins = [0, 30000, 60000, 100000, 150000, float('inf')]
income_labels = ['0-30k', '30k-60k', '60k-100k', '100k-150k', '150k+']
df['Income_Bracket'] = pd.cut(df['Annual Income'], bins=income_bins, labels=income_labels)
df.groupby('Income_Bracket')['Price ($)'].mean()

### Q10. What are the top 5 car models with the highest number of sales, and how does their price distribution look?

In [None]:
# Counting the number of sales per car model and plotting their price distributions
top_5_models = df['Model'].value_counts().head(5).index
sns.boxplot(data=df[df['Model'].isin(top_5_models)], x='Model', y='Price ($)')

### Q11. How does car price vary with engine size across different car colors, and which colors have the highest price variation?

In [None]:
# Plotting engine size vs price across different car colors and calculating price variation
sns.scatterplot(data=df, x='Engine', y='Price ($)', hue='Color')
df.groupby('Color')['Price ($)'].std().sort_values(ascending=False)

### Q12. Is there any seasonal trend in car sales based on the date of sale?

In [None]:
# Extracting month and year from the sale date and plotting the sales trend
df['Sale_Month'] = pd.to_datetime(df['Date']).dt.month
df.groupby('Sale_Month').size().plot(kind='line', title="Car Sales by Month")

### Q13. How does the car price distribution change when considering different combinations of body style and transmission type?

In [None]:
# Plotting boxplots for price distribution by body style and transmission type combinations
sns.boxplot(data=df, x='Body Style', y='Price ($)', hue='Transmission')

### Q14. What is the correlation between car price, engine size, and annual income of customers, and how do these features interact?

In [None]:
# Check the data types to ensure numerical columns are correct
print(df[['Price ($)', 'Engine', 'Annual Income']].dtypes)

# Calculate correlation only for numeric columns
correlation_matrix = df[['Price ($)', 'Engine', 'Annual Income']].corr()
print(correlation_matrix)

# Visualizing the correlation with a pairplot
sns.pairplot(df[['Price ($)', 'Engine', 'Annual Income']])

### Q15. How does the average car price vary across different car models and engine types?

In [None]:
# Grouping by Gender and Dealer Region, then calculating the average car price
avg_price_by_gender_region = df.groupby(['Gender', 'Dealer_Region'])['Price ($)'].mean().unstack()

# Storing the result in a DataFrame
avg_price_df = pd.DataFrame(avg_price_by_gender_region)

# Displaying the DataFrame
avg_price_df