# Car Prices Data Analysis

**Assignment-Ready Jupyter Notebook**

This notebook covers:
- Data ingestion & quality profiling  
- DataFrame queries  
- Data visualization with insights  


## 1. Data Ingestion & Quality Profiling

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv("car_prices.csv")
df.head()


In [None]:
df.info()

## 1.2 Understanding the Data Structure

In [None]:
df.shape

In [None]:
df.dtypes

## 1.3 Missing & Anomaly Detection

In [None]:
df.isnull().sum()

In [None]:
df.isnull().sum().plot(kind='bar')
plt.title("Missing Values per Column")
plt.show()


In [None]:
for col in df.columns:
    if df[col].isnull().sum() > 0:
        if df[col].dtype in ['int64','float64']:
            df[col] = df[col].fillna(df[col].median())
        else:
            df[col] = df[col].fillna(df[col].mode()[0])

df = df.drop_duplicates()


## 2. DataFrame Queries

In [None]:
df['sellingprice'].agg(['mean','min','max'])

In [None]:
df['color'].unique()

In [None]:
df['make'].nunique(), df['model'].nunique()

In [None]:
df[df['sellingprice'] > 165000]

In [None]:
df['model'].value_counts().head(5)

In [None]:
df.groupby('make')['sellingprice'].mean().sort_values(ascending=False)

In [None]:
df.groupby('interior')['sellingprice'].min()

In [None]:
df.groupby('year')['odometer'].max().sort_values(ascending=False)

In [None]:
df['car_age'] = 2025 - df['year']

In [None]:
df[(df['condition'] >= 48) & (df['odometer'] > 90000)].shape[0]


In [None]:
df[df['year'] > 2013].groupby('state')['sellingprice']
.mean().sort_values(ascending=False).head()


In [None]:
threshold = df['condition'].quantile(0.80)
df[df['condition'] >= threshold]
.groupby('make')['sellingprice'].mean().sort_values().head()


## 3. Data Visualization & Insights

In [None]:
df.select_dtypes(include=['int64','float64']).corr()


In [None]:
df.groupby('year')['sellingprice'].mean().plot()
plt.title("Average Selling Price by Year")
plt.show()


In [None]:
df.groupby('odometer')['sellingprice'].mean().head(100).plot()
plt.title("Average Selling Price by Odometer")
plt.show()


In [None]:
df['state'].value_counts().plot(kind='bar')
plt.title("Cars Sold by State")
plt.show()


In [None]:
df['condition_range_5'] = (df['condition']//5)*5
df.groupby('condition_range_5')['sellingprice'].mean().plot(kind='bar')
plt.title("Avg Price by Condition Range (Size 5)")
plt.show()


In [None]:
df['condition_range_10'] = (df['condition']//10)*10
df['condition_range_10'].value_counts().sort_index().plot(kind='bar')
plt.title("Cars Sold by Condition Range (Size 10)")
plt.show()


In [None]:
df.boxplot(column='sellingprice', by='color', rot=90)
plt.title("Selling Price Distribution by Color")
plt.show()
