In [1]:
import pandas as pd
print(pd.__version__)

2.3.2


In [3]:
# Q2: Records count
# Read the dataset
df = pd.read_csv('car_fuel_efficiency.csv')

# Count the number of records
num_records = len(df)
print(f"Number of records in the dataset: {num_records}")



Number of records in the dataset: 9704


In [7]:
# Q3: Fuel types
# Count the number of unique fuel types
fuel_types = df['fuel_type'].nunique()
print(f"Number of fuel types in the dataset: {fuel_types}")

# Show the unique fuel types
print(f"Unique fuel types: {df['fuel_type'].unique()}")



Number of fuel types in the dataset: 2
Unique fuel types: ['Gasoline' 'Diesel']


In [8]:
# Q4: Missing values
# Check for missing values in each column
missing_values = df.isnull().sum()
print("Missing values per column:")
print(missing_values)

# Count how many columns have missing values
columns_with_missing = (missing_values > 0).sum()
print(f"\nNumber of columns with missing values: {columns_with_missing}")



Missing values per column:
engine_displacement      0
num_cylinders          482
horsepower             708
vehicle_weight           0
acceleration           930
model_year               0
origin                   0
fuel_type                0
drivetrain               0
num_doors              502
fuel_efficiency_mpg      0
dtype: int64

Number of columns with missing values: 4


In [9]:
# Q5: Max fuel efficiency of cars from Asia
# Filter cars from Asia
asia_cars = df[df['origin'] == 'Asia']

# Find the maximum fuel efficiency for Asian cars
max_fuel_efficiency = asia_cars['fuel_efficiency_mpg'].max()
print(f"Maximum fuel efficiency of cars from Asia: {max_fuel_efficiency}")

# Show some additional info
print(f"Number of Asian cars in dataset: {len(asia_cars)}")
print(f"Fuel efficiency range for Asian cars: {asia_cars['fuel_efficiency_mpg'].min()} - {asia_cars['fuel_efficiency_mpg'].max()}")




Maximum fuel efficiency of cars from Asia: 23.759122836520497
Number of Asian cars in dataset: 3247
Fuel efficiency range for Asian cars: 6.886244726750004 - 23.759122836520497


In [11]:
# Q6: Median value of horsepower
import numpy as np

# Step 1: Find the median value of the horsepower column
median_horsepower_before = df['horsepower'].median()
print(f"Step 1 - Median horsepower before filling missing values: {median_horsepower_before}")

# Step 2: Calculate the most frequent value of horsepower
most_frequent_horsepower = df['horsepower'].mode()[0]
print(f"Step 2 - Most frequent horsepower value: {most_frequent_horsepower}")

# Step 3: Fill missing values with the most frequent value
df_filled = df.copy()
df_filled['horsepower'] = df_filled['horsepower'].fillna(most_frequent_horsepower)

# Step 4: Calculate the median value of horsepower once again
median_horsepower_after = df_filled['horsepower'].median()
print(f"Step 4 - Median horsepower after filling missing values: {median_horsepower_after}")

# Check if it changed
if median_horsepower_before != median_horsepower_after:
    if median_horsepower_after > median_horsepower_before:
        change = "Yes, it increased"
    else:
        change = "Yes, it decreased"
else:
    change = "No"

print(f"\nHas the median changed? {change}")



Step 1 - Median horsepower before filling missing values: 149.0
Step 2 - Most frequent horsepower value: 152.0
Step 4 - Median horsepower after filling missing values: 152.0

Has the median changed? Yes, it increased


In [12]:
# Q7: Sum of weights (Linear Regression Implementation)
import numpy as np

# Step 1: Select all the cars from Asia
asia_cars = df[df['origin'] == 'Asia']

# Step 2: Select only columns vehicle_weight and model_year
selected_data = asia_cars[['vehicle_weight', 'model_year']]

# Step 3: Select the first 7 values
X_data = selected_data.head(7)

# Step 4: Get the underlying NumPy array. Let's call it X.
X = X_data.values
print("Step 4 - Matrix X (first 7 Asian cars, vehicle_weight and model_year):")
print(X)

# Step 5: Compute matrix-matrix multiplication between the transpose of X and X
XTX = X.T @ X
print(f"\nStep 5 - XTX (X transpose times X):")
print(XTX)

# Step 6: Invert XTX
XTX_inv = np.linalg.inv(XTX)
print(f"\nStep 6 - Inverse of XTX:")
print(XTX_inv)

# Step 7: Create an array y with values [1100, 1300, 800, 900, 1000, 1100, 1200]
y = np.array([1100, 1300, 800, 900, 1000, 1100, 1200])
print(f"\nStep 7 - Array y: {y}")

# Step 8: Multiply the inverse of XTX with the transpose of X, and then multiply the result by y
w = XTX_inv @ X.T @ y
print(f"\nStep 8 - Weights w: {w}")

# Step 9: What's the sum of all the elements of the result?
sum_of_weights = np.sum(w)
print(f"\nStep 9 - Sum of all elements of w: {sum_of_weights}")




Step 4 - Matrix X (first 7 Asian cars, vehicle_weight and model_year):
[[2714.21930965 2016.        ]
 [2783.86897424 2010.        ]
 [3582.68736772 2007.        ]
 [2231.8081416  2011.        ]
 [2659.43145076 2016.        ]
 [2844.22753389 2014.        ]
 [3761.99403819 2019.        ]]

Step 5 - XTX (X transpose times X):
[[62248334.33150762 41431216.5073268 ]
 [41431216.5073268  28373339.        ]]

Step 6 - Inverse of XTX:
[[ 5.71497081e-07 -8.34509443e-07]
 [-8.34509443e-07  1.25380877e-06]]

Step 7 - Array y: [1100 1300  800  900 1000 1100 1200]

Step 8 - Weights w: [0.01386421 0.5049067 ]

Step 9 - Sum of all elements of w: 0.5187709081074016
