In [8]:
import numpy as np
import pandas as pd

print("Numpy version: " + str(np.__version__))
## Q1: Pandas version
print("Pandas version: " + str(pd.__version__))

Numpy version: 2.3.1
Pandas version: 2.3.1


In [70]:
# read in data and print first 5 rows
df = pd.read_csv('https://raw.githubusercontent.com/alexeygrigorev/datasets/master/laptops.csv')

df.head()

Unnamed: 0,Laptop,Status,Brand,Model,CPU,RAM,Storage,Storage type,GPU,Screen,Touch,Final Price
0,ASUS ExpertBook B1 B1502CBA-EJ0436X Intel Core...,New,Asus,ExpertBook,Intel Core i5,8,512,SSD,,15.6,No,1009.0
1,Alurin Go Start Intel Celeron N4020/8GB/256GB ...,New,Alurin,Go,Intel Celeron,8,256,SSD,,15.6,No,299.0
2,ASUS ExpertBook B1 B1502CBA-EJ0424X Intel Core...,New,Asus,ExpertBook,Intel Core i3,8,256,SSD,,15.6,No,789.0
3,MSI Katana GF66 12UC-082XES Intel Core i7-1270...,New,MSI,Katana,Intel Core i7,16,1000,SSD,RTX 3050,15.6,No,1199.0
4,HP 15S-FQ5085NS Intel Core i5-1235U/16GB/512GB...,New,HP,15S,Intel Core i5,16,512,SSD,,15.6,No,669.01


In [12]:
## Q2: Records count
df.shape[0]

2160

In [35]:
## Q3: Laptop brands (unique)
df['Brand'].nunique()

27

In [73]:
## Q4: Missing values
print("Column missing value counts:")
print(df.isnull().sum())

num_missing_cols = df.isnull().any().sum()
print(f"\nThere are {num_missing_cols} columns with missing values.")

Column missing value counts:
Laptop                 0
Status                 0
Brand                  0
Model                  0
CPU                    0
RAM                    0
Storage                0
Storage type          42
GPU                 1371
Screen                 4
Touch                  0
Final Price            0
Screen (updated)       0
dtype: int64

There are 3 columns with missing values.


In [46]:
## Q5: Maximum final price
df['Brand'].unique()
max_price_dell = df[df['Brand'] == 'Dell']['Final Price'].max()
print(max_price_dell)

3936.0


In [71]:
## Q6: Median value of screen

# 1: median
screen_median = df['Screen'].median()
print(f"Median: {screen_median}")

# 2: mode
screen_mode = df['Screen'].mode()[0]
print(f"Mode: {screen_mode}")

# 3: replace missing with mode
df['Screen (updated)'] = df['Screen'].fillna(screen_mode)

# 4: new screen median
new_screen_median = df['Screen (updated)'].median()
print(f"Updated median: {new_screen_median}")

Median: 15.6
Mode: 15.6
New median: 15.6


In [72]:
df.describe()

Unnamed: 0,RAM,Storage,Screen,Final Price,Screen (updated)
count,2160.0,2160.0,2156.0,2160.0,2160.0
mean,15.413889,596.294444,15.168112,1312.638509,15.168912
std,9.867815,361.220506,1.203329,911.475417,1.202357
min,4.0,0.0,10.1,201.05,10.1
25%,8.0,256.0,14.0,661.0825,14.0
50%,16.0,512.0,15.6,1031.945,15.6
75%,16.0,1000.0,15.6,1708.97,15.6
max,128.0,4000.0,18.0,7150.47,18.0


In [93]:
## Q7: Sum of weights
# 1: select "Innjoo" laptops
innjoo_df = df[df['Brand'] == 'Innjoo']

# 2: select only RAM, Storage, and Screen columns
innjoo_df = innjoo_df[['RAM', 'Storage', 'Screen']]

# 3: get underlying numpy array
X = innjoo_df.to_numpy()

# 4: compute XTX
XTX = X.T @ X

# 5: compute the inverse of XTX
XTX_inv = np.linalg.inv(XTX)

# 6: create the y array
y = np.array([1100, 1300, 800, 900, 1000, 1100])

# 7: compute w
w = XTX_inv @ X.T @ y

# 8: sum all elements of w
sum_w = round(w.sum(), 5)
print("Sum of w:", f"{sum_w:.2f}")

Sum of w: 91.30
