In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Load the data
df = pd.read_csv('/Users/leonleidner/app-coding-agent/backend/uploads/mpg.csv')

# Data Exploration
print("\n=== Data Overview ===")
print(df.info())
print("\n=== First 5 Rows ===")
print(df.head())
print("\n=== Descriptive Statistics ===")
print(df.describe())

# Data Cleaning
print("\n=== Missing Values ===")
print(df.isnull().sum())

# Replace '?' in horsepower with NaN and convert to float
df['horsepower'] = pd.to_numeric(df['horsepower'], errors='coerce')

# Visualizations
plt.figure(figsize=(15,10))

# 1. Pairplot of numerical variables
print("\nCreating Pairplot...")
sns.pairplot(df[['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration']])
plt.suptitle('Pairplot of Numerical Variables', y=1.02)

# 2. Correlation Heatmap
print("\nCreating Correlation Heatmap...")
plt.figure(figsize=(10,8))
corr = df.corr()
sns.heatmap(corr, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Heatmap')

# 3. MPG Distribution by Cylinders
print("\nCreating MPG Distribution by Cylinders...")
plt.figure(figsize=(10,6))
sns.boxplot(x='cylinders', y='mpg', data=df)
plt.title('MPG Distribution by Number of Cylinders')

# 4. MPG vs Weight
print("\nCreating MPG vs Weight Scatterplot...")
plt.figure(figsize=(10,6))
sns.scatterplot(x='weight', y='mpg', hue='origin', data=df)
plt.title('MPG vs Weight by Origin')

# 5. MPG Trends Over Years
print("\nCreating MPG Trends Over Years...")
plt.figure(figsize=(12,6))
sns.lineplot(x='model_year', y='mpg', data=df, ci=None)
plt.title('Average MPG Trends Over Model Years')

# Analysis and Recommendations
print("\n=== Key Findings ===")
print("1. Strong negative correlation between mpg and weight/displacement (-0.83 and -0.80 respectively)")
print("2. MPG has generally increased over model years")
print("3. Vehicles with more cylinders tend to have lower MPG")
print("4. Origin 3 (likely Japan/Europe) vehicles tend to be more fuel efficient")

print("\n=== Recommendations ===")
print("1. Focus on weight reduction to improve fuel efficiency")
print("2. Consider smaller displacement engines for better MPG")
print("3. Study design elements from origin 3 vehicles that contribute to higher efficiency")
print("4. Continue the trend of improving MPG over model years")