Load the dataset from a CSV file.

In [None]:
import pandas as pd

data = pd.read_csv('car_data.csv')

Provide a summary of the dataset.

In [None]:
data.describe()

Display a random sample of 5 rows from the dataset.

In [None]:
data.sample(5)

Remove specified unused columns from the dataset.

In [None]:
data.drop(columns=['unused_column'], inplace=True)

Convert specified columns to categorical data types.

In [None]:
data['column'] = data['column'].astype('category')

Visualize data using a bar chart.

In [None]:
import matplotlib.pyplot as plt

plt.bar(data['category'], data['value'])
plt.show()

Remove the specified column from the dataset.

In [None]:
data.drop(columns=['column_to_drop'], inplace=True)

Identify categorical columns in the dataset.

In [None]:
categorical_cols = data.select_dtypes(include=['category']).columns.tolist()

Identify numerical columns in the dataset.

In [None]:
numerical_cols = data.select_dtypes(include=['float64', 'int64']).columns.tolist()

Split the dataset into features (X) and target (y).

In [None]:
X = data.drop(columns=['target_column'])
y = data['target_column']

One-hot encode categorical variables in the feature set.

In [None]:
X = pd.get_dummies(X, columns=categorical_cols, drop_first=True)

Concatenate the processed features and target into a single DataFrame.

In [None]:
processed_data = pd.concat([X, y], axis=1)

Split the dataset into training and test sets.

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Scale the feature data for better model performance.

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Initialize a linear regression model.

In [None]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()

Train the model using the scaled training data.

In [None]:
model.fit(X_train_scaled, y_train)

Evaluate the model using Mean Squared Error and Absolute Error.

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

y_pred = model.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)