In [12]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv('https://raw.githubusercontent.com/alexeygrigorev/datasets/master/laptops.csv')

In [4]:
df.head(5)

Unnamed: 0,Laptop,Status,Brand,Model,CPU,RAM,Storage,Storage type,GPU,Screen,Touch,Final Price
0,ASUS ExpertBook B1 B1502CBA-EJ0436X Intel Core...,New,Asus,ExpertBook,Intel Core i5,8,512,SSD,,15.6,No,1009.0
1,Alurin Go Start Intel Celeron N4020/8GB/256GB ...,New,Alurin,Go,Intel Celeron,8,256,SSD,,15.6,No,299.0
2,ASUS ExpertBook B1 B1502CBA-EJ0424X Intel Core...,New,Asus,ExpertBook,Intel Core i3,8,256,SSD,,15.6,No,789.0
3,MSI Katana GF66 12UC-082XES Intel Core i7-1270...,New,MSI,Katana,Intel Core i7,16,1000,SSD,RTX 3050,15.6,No,1199.0
4,HP 15S-FQ5085NS Intel Core i5-1235U/16GB/512GB...,New,HP,15S,Intel Core i5,16,512,SSD,,15.6,No,669.01


In [5]:
# Q1

pd.__version__

'2.2.2'

In [7]:
# Q2

record_count = len(df)
print(f'Record Count: {record_count}')


Record Count: 2160


In [8]:
# Q3

num_brands = df['Laptop'].nunique()
print(f'Number of unique laptop brands: {num_brands}')


Number of unique laptop brands: 2160


In [9]:
# Q4

missing_columns = df.isnull().sum()
num_missing_columns = (missing_columns > 0).sum()

print(f'Number of columns with missing values: {num_missing_columns}')

Number of columns with missing values: 3


In [10]:
# Q5

dell_notebooks = df[df['Laptop'].str.contains('Dell', case=False, na=False)]
max_price_dell = dell_notebooks['Final Price'].max()

print(f'The maximum final price of Dell notebooks is: {max_price_dell}')


The maximum final price of Dell notebooks is: 3936.0


In [11]:
# Q6

# 1. Calculate the median value of the Screen column (before filling missing values)
initial_median_screen = df['Screen'].median()
print(f"Initial median of 'Screen': {initial_median_screen}")

# 2. Calculate the most frequent value (mode) of the Screen column
most_frequent_screen = df['Screen'].mode()[0]  # Mode returns a series, so we take the first value
print(f"Most frequent value in 'Screen': {most_frequent_screen}")

# 3. Fill missing values in the Screen column with the most frequent value
df['Screen'].fillna(most_frequent_screen, inplace=True)

# 4. Calculate the median value of Screen again after filling missing values
updated_median_screen = df['Screen'].median()
print(f"Updated median of 'Screen' after filling missing values: {updated_median_screen}")

# 5. Compare the initial and updated medians
if initial_median_screen == updated_median_screen:
    print("The median has not changed.")
else:
    print("The median has changed.")


Initial median of 'Screen': 15.6
Most frequent value in 'Screen': 15.6
Updated median of 'Screen' after filling missing values: 15.6
The median has not changed.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Screen'].fillna(most_frequent_screen, inplace=True)


In [16]:
# Q7

# 1. Select all the "Innjoo" laptops from the dataset
innjoo_laptops = df[df['Laptop'].str.contains('Innjoo', case=False, na=False)]

# 2. Select only the columns RAM, Storage, and Screen
X = innjoo_laptops[['RAM', 'Storage', 'Screen']].values

# 3. Get the underlying NumPy array (already done with .values)

# 4. Compute the matrix-matrix multiplication between the transpose of X and X
XTX = np.dot(X.T, X)

# 5. Compute the inverse of XTX
XTX_inv = np.linalg.inv(XTX)

# 6. Create an array y with values [1100, 1300, 800, 900, 1000, 1100]
y = np.array([1100, 1300, 800, 900, 1000, 1100])

# 7. Multiply the inverse of XTX with the transpose of X, then multiply by y
w = np.dot(np.dot(XTX_inv, X.T), y)

# 8. Compute the sum of all elements in w
sum_of_w = np.sum(w)

print(f"Sum of all elements in w: {sum_of_w}")


Sum of all elements in w: 91.2998806299555
