In [3]:
import numpy as np
import pandas as pd

import sklearn

# Libraries to help with data visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Removes the limit for the number of displayed columns
pd.set_option("display.max_columns", None)
# Sets the limit for the number of displayed rows
pd.set_option("display.max_rows", 100)

# import and filter warnings
import warnings
warnings.filterwarnings("ignore")

#### Question 1. Pandas version

In [4]:
print(pd.__version__)

2.3.3


#### Question 2. Records count

In [5]:
!wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/car_fuel_efficiency.csv

--2025-10-02 21:06:10--  https://raw.githubusercontent.com/alexeygrigorev/datasets/master/car_fuel_efficiency.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 874188 (854K) [text/plain]
Saving to: ‘car_fuel_efficiency.csv’


2025-10-02 21:06:10 (4.78 MB/s) - ‘car_fuel_efficiency.csv’ saved [874188/874188]



In [6]:
# Load the data
df = pd.read_csv("car_fuel_efficiency.csv")

In [9]:
# How many records are in the dataset?
df.shape[0]

9704

In [10]:
# or using len
len(df)

9704

#### Question 3. Fuel Type

In [21]:
# print the columns
df.columns

Index(['engine_displacement', 'num_cylinders', 'horsepower', 'vehicle_weight',
       'acceleration', 'model_year', 'origin', 'fuel_type', 'drivetrain',
       'num_doors', 'fuel_efficiency_mpg'],
      dtype='object')

In [22]:
# How many fuel types are presented in the dataset?
df["fuel_type"].nunique()

2

#### Question 4. Missing values

In [23]:
# How many columns in the dataset have missing values?
df.isnull().sum()

engine_displacement      0
num_cylinders          482
horsepower             708
vehicle_weight           0
acceleration           930
model_year               0
origin                   0
fuel_type                0
drivetrain               0
num_doors              502
fuel_efficiency_mpg      0
dtype: int64

#### Question 5. Max fuel efficiency

In [24]:
# What's the maximum fuel efficiency of cars from Asia?
df[df["origin"] == "Asia"]["fuel_efficiency_mpg"].max()

np.float64(23.759122836520497)

#### Q6. Median value of horsepower

In [25]:
# Find the median value of the horsepower column in the dataset.
median_hp = df["horsepower"].median()
median_hp

np.float64(149.0)

In [26]:
# Next, calculate the most frequent value of the same horsepower column.
most_frequent_hp = df["horsepower"].mode()[0]

In [29]:
# Use the fillna method to fill the missing values in the horsepower column with the most frequent value from the previous step.
df["horsepower"].fillna(most_frequent_hp, inplace=True)
df["horsepower"].isnull().sum()

np.int64(0)

In [31]:
# Now, calculate the median value of horsepower once again.
new_median_hp = df["horsepower"].median()
new_median_hp

np.float64(152.0)

### Q7. Sum of weights

In [32]:
# Select all the cars from Asia
df_asia = df[df["origin"] == "Asia"]

In [34]:
# Select only columns vehicle_weight and model_year
df_asia = df_asia[["vehicle_weight", "model_year"]]

In [35]:
# Select the first 7 values
df_asia = df_asia.head(7)

In [36]:
# Get the underlying NumPy array. Let's call it X.
X = df_asia.to_numpy()

In [37]:
# Compute matrix-matrix multiplication between the transpose of X and X. To get the transpose, use X.T. Let's call the result XTX.
XTX = X.T.dot(X)

In [38]:
# Invert XTX.
XTX_inv = np.linalg.inv(XTX)

In [39]:
# Create an array y with values [1100, 1300, 800, 900, 1000, 1100, 1200].
y = np.array([1100, 1300, 800, 900, 1000, 1100, 1200])

In [40]:
# Multiply the inverse of XTX with the transpose of X, and then multiply the result by y. Call the result w.
w = XTX_inv.dot(X.T).dot(y)

In [41]:
# What's the sum of all the elements of the result?
w.sum()

np.float64(0.5187709081074006)