In [1]:
# === Create environment with Python 3.11 === // Set via terminal 
#!conda create -y -n ml-zoomcamp python=3.11

# === Install core packages inside the env ===
#!conda run -n ml-zoomcamp conda install -y numpy pandas scikit-learn seaborn matplotlib jupyter

# === Make the env available as a Jupyter kernel ===
#!conda run -n ml-zoomcamp python -m pip install ipykernel
#!conda run -n ml-zoomcamp python -m ipykernel install --user --name=ml-zoomcamp --display-name "Python (ml-zoomcamp)"


In [2]:
import sys
import site
import platform

print("Python executable:", sys.executable)
print("Python version:", platform.python_version())
print("Site-packages path:", site.getsitepackages())



Python executable: /opt/anaconda3/envs/ml-zoomcamp/bin/python
Python version: 3.11.13
Site-packages path: ['/opt/anaconda3/envs/ml-zoomcamp/lib/python3.11/site-packages']


In [4]:
!conda install numpy pandas scikit-learn seaborn jupyter

[1;33mJupyter detected[0m[1;33m...[0m
[1;32m2[0m[1;32m channel Terms of Service accepted[0m
Channels:
 - defaults
Platform: osx-arm64
Collecting package metadata (repodata.json): done
Solving environment: done


    current version: 25.7.0
    latest version: 25.9.0

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.



In [5]:
!conda activate ml-zoomcamp


CondaError: Run 'conda init' before 'conda activate'



In [7]:
import pandas as pd
pd.__version__

'2.3.2'

In [9]:
!curl -o car_fuel_efficiency.csv https://raw.githubusercontent.com/alexeygrigorev/datasets/master/car_fuel_efficiency.csv

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  853k  100  853k    0     0  2216k      0 --:--:-- --:--:-- --:--:-- 2211k


In [11]:
df = pd.read_csv("car_fuel_efficiency.csv")
print("Number of records:", df.shape[0])
df.head()


Number of records: 9704


Unnamed: 0,engine_displacement,num_cylinders,horsepower,vehicle_weight,acceleration,model_year,origin,fuel_type,drivetrain,num_doors,fuel_efficiency_mpg
0,170,3.0,159.0,3413.433759,17.7,2003,Europe,Gasoline,All-wheel drive,0.0,13.231729
1,130,5.0,97.0,3149.664934,17.8,2007,USA,Gasoline,Front-wheel drive,0.0,13.688217
2,170,,78.0,3079.038997,15.1,2018,Europe,Gasoline,Front-wheel drive,0.0,14.246341
3,220,4.0,,2542.392402,20.2,2009,USA,Diesel,All-wheel drive,2.0,16.912736
4,210,1.0,140.0,3460.87099,14.4,2009,Europe,Gasoline,All-wheel drive,2.0,12.488369


In [12]:
df['fuel_type'].unique()

array(['Gasoline', 'Diesel'], dtype=object)

In [14]:
df['fuel_type'].nunique()

2

In [15]:
(df.isnull().sum() > 0).sum()

np.int64(4)

In [16]:
df.columns

Index(['engine_displacement', 'num_cylinders', 'horsepower', 'vehicle_weight',
       'acceleration', 'model_year', 'origin', 'fuel_type', 'drivetrain',
       'num_doors', 'fuel_efficiency_mpg'],
      dtype='object')

In [17]:
max_asia_eff = df[df['origin'] == 'Asia']['fuel_efficiency_mpg'].max()
print("Maximum fuel efficiency (Asia):", max_asia_eff)

Maximum fuel efficiency (Asia): 23.759122836520497


In [18]:
df.groupby('origin')['fuel_efficiency_mpg'].max()

origin
Asia      23.759123
Europe    25.967222
USA       24.971452
Name: fuel_efficiency_mpg, dtype: float64

In [19]:
median_before = df['horsepower'].median()
print("Median before filling:", median_before)

Median before filling: 149.0


In [20]:
most_freq = df['horsepower'].mode()[0]
print("Most frequent horsepower value:", most_freq)

Most frequent horsepower value: 152.0


In [21]:
df['horsepower_filled'] = df['horsepower'].fillna(most_freq)


In [22]:
median_after = df['horsepower_filled'].median()
print("Median after filling:", median_after)


Median after filling: 152.0


In [24]:
import numpy as np

# 1) Select Asia cars, keep columns, take first 7, to numpy
X = df.loc[df['origin']=='Asia', ['vehicle_weight','model_year']].head(7).to_numpy()

# 2) XTX, inverse
XTX = X.T @ X
XTX_inv = np.linalg.inv(XTX)

# 3) y vector
y = np.array([1100, 1300, 800, 900, 1000, 1100, 1200])

# 4) w = (XTX)^(-1) X^T y
w = XTX_inv @ X.T @ y

# 5) sum of elements
ans = w.sum()
ans

np.float64(0.5187709081074025)