In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


df = pd.read_csv('data.csv')

# Preview the dataset
print("Dataset Overview:")
print(df.head())

# Check for missing values
print("\nMissing Values:")
print(df.isnull().sum())

# Fill or drop missing values (Example: Filling with median)
df.fillna(df.median(), inplace=True)

# Data summary
print("\nDataset Description:")
print(df.describe())

# Correlation heatmap
print("\nCorrelation Heatmap:")
plt.figure(figsize=(10, 6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

# Visualization: Distribution of a specific column (e.g., 'Price')
print("\nDistribution of 'Price':")
sns.histplot(df['Price'], kde=True, bins=20, color='blue')
plt.title('Price Distribution')
plt.xlabel('Price')
plt.ylabel('Frequency')
plt.show()

# Scatter plot: Relationship between two variables (e.g., 'Area' and 'Price')
print("\nScatter Plot of Area vs Price:")
plt.figure(figsize=(8, 5))
sns.scatterplot(x='Area', y='Price', data=df, color='green')
plt.title('Area vs Price')
plt.xlabel('Area')
plt.ylabel('Price')
plt.show()

# Save the cleaned dataset
df.to_csv('cleaned_data.csv', index=False)
print("\nCleaned dataset saved as 'cleaned_data.csv'")
