# Import necessary libraries for numerical operations, data handling, ML, and visualization

import numpy as np import pandas as pd from sklearn.model_selection
import train_test_split from sklearn.linear_model import
LinearRegression from sklearn.metrics import mean_squared_error,
mean_absolute_error, r2_score from sklearn.preprocessing import
StandardScaler import matplotlib.pyplot as plt import seaborn as sns

# Display the first few rows of the dataset to get an initial overview

print(“Head of the dataset:”) print(data.head())

# Display dataset metadata such as column types and missing values

print(“Information:”) data.info()

# Display summary statistics for numerical features

print(“Statistics:”) print(data.describe())

# Extract numerical columns and compute correlation matrix

numeric_data = data.select_dtypes(include=\[‘number’\])
correlation_matrix = numeric_data.corr() print(“Matrix:”)
print(correlation_matrix)

# Set seaborn styling for better readability

sns.set(style=“whitegrid”)

# Define continuous variables for visualization

continuous_vars = \[‘mpg’, ‘displacement’, ‘horsepower’, ‘weight’,
‘acceleration’\]

# Plot histograms for continuous variables

for var in continuous_vars: if var in numeric_data.columns:
plt.figure(figsize=(8, 4)) sns.histplot(data\[var\], kde=True, bins=30,
color=‘blue’) \# KDE curve adds smooth distribution estimation
plt.title(f’Distribution of {var}‘) plt.xlabel(var)
plt.ylabel(’Frequency’) plt.show()

        # Generate a pairplot to observe relationships between numerical variables

filtered_continuous_vars = \[var for var in continuous_vars if var in
numeric_data.columns\] if filtered_continuous_vars:
sns.pairplot(data\[filtered_continuous_vars\], diag_kind=‘kde’,
plot_kws={‘alpha’: 0.7}) plt.suptitle(‘Pairplot of Continuous
Variables’, y=1.02) plt.show()

    # Boxplots help compare continuous variables across different categories

for discrete_var in discrete_vars: if discrete_var in data.columns: for
continuous_var in filtered_continuous_vars: plt.figure(figsize=(8, 4))
sns.boxplot(x=data\[discrete_var\], y=data\[continuous_var\]) \# Helps
detect outliers and distribution trends plt.title(f’{continuous_var} by
{discrete_var}’) plt.xlabel(discrete_var) plt.ylabel(continuous_var)
plt.show()

            # Heatmap to visualize correlation strength between numerical variables

plt.figure(figsize=(10, 6)) sns.heatmap(correlation_matrix, annot=True,
cmap=‘coolwarm’, fmt=‘.2f’, linewidths=0.5) plt.title(‘Correlation
Heatmap’) plt.show()

# Save correlation matrix to CSV for further analysis

correlation_matrix.to_csv(‘correlation_matrix.csv’, index=True)

# If ‘car name’ exists in the dataset, count unique names

if ‘car name’ in data.columns: print(“car names:”) print(data\[‘car
name’\].nunique())