In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the data
data = pd.read_csv('/Users/leonleidner/app-coding-agent/backend/uploads/mpg.csv')

# 1. Summary of the task and data used
print('MPG and Weight Correlation Analysis')
print('Dataset contains {} rows and {} columns'.format(*data.shape))
print('\nFirst 5 rows:')
print(data.head())

# 2. Detailed dataset description with visualizations
print('\nBasic Statistics:')
print(data.describe())

# Scatter plot
plt.figure(figsize=(10, 6))
sns.scatterplot(x='weight', y='mpg', data=data)
plt.title('MPG vs Weight')
plt.xlabel('Weight')
plt.ylabel('MPG')
plt.show()

# Histograms
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
sns.histplot(data['mpg'], kde=True)
plt.title('MPG Distribution')

plt.subplot(1, 2, 2)
sns.histplot(data['weight'], kde=True)
plt.title('Weight Distribution')
plt.tight_layout()
plt.show()

# 3. Correlation analysis
correlation = data['mpg'].corr(data['weight'])
print(f'\nPearson correlation coefficient between MPG and Weight: {correlation:.2f}')

# 4. Actionable recommendations
print('\nRecommendations:')
print('1. The strong negative correlation suggests that lighter vehicles tend to have better MPG')
print('2. Consider weight reduction strategies for vehicles where fuel efficiency is a priority')
print('3. Further analysis could explore other factors affecting MPG beyond weight')