In [2]:
import numpy as np
import pandas as pd

# Check the version of numpy
print(np.__version__)
print(pd.__version__)


1.25.2
2.0.3


In [5]:
import os
import requests

# Create a new folder named 'data'
os.makedirs('data', exist_ok=True)

url = 'https://raw.githubusercontent.com/alexeygrigorev/datasets/master/laptops.csv'
response = requests.get(url)

# Save the .csv file in the 'data' folder
with open('data/laptops.csv', 'wb') as file:
    file.write(response.content)


In [6]:
# Load the CSV file into a DataFrame
df_laptops = pd.read_csv('data/laptops.csv')

# Display the total count of rows
print(f"Total count of rows: {len(df_laptops)}")

# Display the first few rows of the dataframe
df_laptops.head()

Total count of rows: 2160


Unnamed: 0,Laptop,Status,Brand,Model,CPU,RAM,Storage,Storage type,GPU,Screen,Touch,Final Price
0,ASUS ExpertBook B1 B1502CBA-EJ0436X Intel Core...,New,Asus,ExpertBook,Intel Core i5,8,512,SSD,,15.6,No,1009.0
1,Alurin Go Start Intel Celeron N4020/8GB/256GB ...,New,Alurin,Go,Intel Celeron,8,256,SSD,,15.6,No,299.0
2,ASUS ExpertBook B1 B1502CBA-EJ0424X Intel Core...,New,Asus,ExpertBook,Intel Core i3,8,256,SSD,,15.6,No,789.0
3,MSI Katana GF66 12UC-082XES Intel Core i7-1270...,New,MSI,Katana,Intel Core i7,16,1000,SSD,RTX 3050,15.6,No,1199.0
4,HP 15S-FQ5085NS Intel Core i5-1235U/16GB/512GB...,New,HP,15S,Intel Core i5,16,512,SSD,,15.6,No,669.01


In [10]:
df_laptops.describe() 


Unnamed: 0,RAM,Storage,Screen,Final Price
count,2160.0,2160.0,2156.0,2160.0
mean,15.413889,596.294444,15.168112,1312.638509
std,9.867815,361.220506,1.203329,911.475417
min,4.0,0.0,10.1,201.05
25%,8.0,256.0,14.0,661.0825
50%,16.0,512.0,15.6,1031.945
75%,16.0,1000.0,15.6,1708.97
max,128.0,4000.0,18.0,7150.47


In [11]:
brand_count = df_laptops['Brand'].nunique()
print(f"Laptop brands count: {brand_count}")

Laptop brands count: 27


In [12]:
missing_values_count = df_laptops.isnull().sum().gt(0).sum()
print(f"Columns with missing values: {missing_values_count}")


Columns with missing values: 3


In [15]:
max_dell_price = df_laptops[df_laptops['Brand'] == 'Dell']['Final Price'].max()
print(f"Maximum final price of Dell notebooks: {max_dell_price}")

Maximum final price of Dell notebooks: 3936.0


In [16]:

# Q6: Median value of Screen column
median_screen_before = df_laptops['Screen'].median()
most_frequent_screen = df_laptops['Screen'].mode()[0]
df_laptops['Screen'].fillna(most_frequent_screen, inplace=True)
median_screen_after = df_laptops['Screen'].median()

print(f"Median value of Screen before filling missing values: {median_screen_before}")
print(f"Most frequent value of Screen: {most_frequent_screen}")
print(f"Median value of Screen after filling missing values: {median_screen_after}")

Median value of Screen before filling missing values: 15.6
Most frequent value of Screen: 15.6
Median value of Screen after filling missing values: 15.6


In [17]:
import numpy as np

# Step 1: Select all "Innjoo" laptops
innjoo_laptops = df_laptops[df_laptops['Brand'] == 'Innjoo']

# Step 2: Select only columns RAM, Storage, Screen
X_df = innjoo_laptops[['RAM', 'Storage', 'Screen']]

# Step 3: Get the underlying NumPy array
X = X_df.to_numpy()

# Step 4: Compute matrix-matrix multiplication between the transpose of X and X
XTX = X.T @ X

# Step 5: Compute the inverse of XTX
XTX_inv = np.linalg.inv(XTX)

# Step 6: Create an array y with the given values
y = np.array([1100, 1300, 800, 900, 1000, 1100])

# Step 7: Multiply the inverse of XTX with the transpose of X, and then multiply the result by y
w = XTX_inv @ X.T @ y

# Step 8: Sum all the elements of the result
sum_w = w.sum()
print(f"Sum of all the elements of the result: {sum_w}")

Sum of all the elements of the result: 91.29988062995815
