In [None]:
import pandas as pd
df = pd.read_csv("carprices.csv")
df

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Set up the plotting style
plt.style.use('default')
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Plot 1: Mileage vs Sell Price
axes[0, 0].scatter(df['Mileage'], df['Sell Price($)'], alpha=0.7, c='blue')
axes[0, 0].set_xlabel('Mileage')
axes[0, 0].set_ylabel('Sell Price ($)')
axes[0, 0].set_title('Mileage vs Sell Price')
axes[0, 0].grid(True, alpha=0.3)

# Plot 2: Age vs Sell Price  
axes[0, 1].scatter(df['Age(yrs)'], df['Sell Price($)'], alpha=0.7, c='red')
axes[0, 1].set_xlabel('Age (years)')
axes[0, 1].set_ylabel('Sell Price ($)')
axes[0, 1].set_title('Age vs Sell Price')
axes[0, 1].grid(True, alpha=0.3)

# Plot 3: Car Model vs Sell Price (Box plot for categorical data)
df.boxplot(column='Sell Price($)', by='Car Model', ax=axes[1, 0])
axes[1, 0].set_xlabel('Car Model')
axes[1, 0].set_ylabel('Sell Price ($)')
axes[1, 0].set_title('Car Model vs Sell Price')
axes[1, 0].tick_params(axis='x', rotation=45)

# Plot 4: Combined scatter plot with color coding by Car Model
for i, model in enumerate(df['Car Model'].unique()):
    model_data = df[df['Car Model'] == model]
    axes[1, 1].scatter(model_data['Mileage'], model_data['Sell Price($)'], 
                      label=model, alpha=0.7)
axes[1, 1].set_xlabel('Mileage')
axes[1, 1].set_ylabel('Sell Price ($)')
axes[1, 1].set_title('Mileage vs Sell Price by Car Model')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Display correlation matrix
print("Correlation Analysis:")
print("="*50)
numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
correlation_matrix = df[numeric_cols].corr()
print(correlation_matrix)

In [None]:
dummies = pd.get_dummies(df['Car Model'])
dummies

In [None]:
merged = pd.concat([df,dummies],axis='columns')
merged

In [None]:
final = merged.drop(["Car Model","Mercedez Benz C class"],axis='columns')
final

In [None]:
X = final.drop('Sell Price($)',axis='columns')
X

In [None]:
y = final['Sell Price($)']
y

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()

In [None]:
model.fit(X,y)

In [None]:
model.score(X,y)

**Price of mercedez benz that is 4 yr old with mileage 45000**

In [12]:
model.predict([[45000,4,0,0]])



array([36991.31721061])

**Price of BMW X5 that is 7 yr old with mileage 86000**

In [13]:
model.predict([[86000,7,0,1]])



array([11080.74313219])