In [3]:
# Import required modules.
import numpy as np
import pandas as pd

### Q1. Pandas version
What's the version of Pandas that you installed?

You can get the version information using the __version__ field:

In [4]:
pd.__version__

'2.2.2'

### Getting the data 

For this homework, we'll use the Laptops Price dataset. Download it from 
[here](https://raw.githubusercontent.com/alexeygrigorev/datasets/master/laptops.csv).

You can do it with wget:

```bash
wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/laptops.csv

In [8]:
# Get the data.
!wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/laptops.csv

--2024-09-18 08:48:53--  https://raw.githubusercontent.com/alexeygrigorev/datasets/master/laptops.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 298573 (292K) [text/plain]
Saving to: ‘laptops.csv’


2024-09-18 08:48:53 (82.7 MB/s) - ‘laptops.csv’ saved [298573/298573]



In [9]:
# Read the data.
df = pd.read_csv('./laptops.csv')

In [10]:
# Show the head of the data.
df.head()

Unnamed: 0,Laptop,Status,Brand,Model,CPU,RAM,Storage,Storage type,GPU,Screen,Touch,Final Price
0,ASUS ExpertBook B1 B1502CBA-EJ0436X Intel Core...,New,Asus,ExpertBook,Intel Core i5,8,512,SSD,,15.6,No,1009.0
1,Alurin Go Start Intel Celeron N4020/8GB/256GB ...,New,Alurin,Go,Intel Celeron,8,256,SSD,,15.6,No,299.0
2,ASUS ExpertBook B1 B1502CBA-EJ0424X Intel Core...,New,Asus,ExpertBook,Intel Core i3,8,256,SSD,,15.6,No,789.0
3,MSI Katana GF66 12UC-082XES Intel Core i7-1270...,New,MSI,Katana,Intel Core i7,16,1000,SSD,RTX 3050,15.6,No,1199.0
4,HP 15S-FQ5085NS Intel Core i5-1235U/16GB/512GB...,New,HP,15S,Intel Core i5,16,512,SSD,,15.6,No,669.01


### Q2. Records count

How many records are in the dataset?

- 12
- 1000
- 2160
- 12160

In [11]:
# Show the shape of the data.
df.shape

(2160, 12)

### Q3. Laptop brands

How many laptop brands are presented in the dataset?

- 12
- 27
- 28
- 2160

In [30]:
# Show number of brands.
len(set(df.Brand))

27

### Q4. Missing values

How many columns in the dataset have missing values?

- 0
- 1
- 2
- 3

In [43]:
# Check for missing values.
miss_list = list(df.isna().sum())
miss_com = [1 if x > 0 else 0 for x in miss_list]
sum(miss_com)

3

### Q5. Maximum final price

What's the maximum final price of Dell notebooks in the dataset?

- 869
- 3691
- 3849
- 3936

In [33]:
# Get maximum final price.
df[df.Brand=='Dell']['Final Price'].max()

np.float64(3936.0)

### Q6. Median value of Screen

1. Find the median value of `Screen` column in the dataset.
2. Next, calculate the most frequent value of the same `Screen` column.
3. Use `fillna` method to fill the missing values in `Screen` column with the most frequent value from the previous step.
4. Now, calculate the median value of `Screen` once again.

Has it changed?

> Hint: refer to existing `mode` and `median` functions to complete the task.

- Yes
- No

In [37]:
# Get the median of the Screen column.
screen_med1 = df.Screen.median()
screen_med1

np.float64(15.6)

In [38]:
# Get the mode of the Screen column.
screen_mod = df.Screen.mode()
screen_mod

0    15.6
Name: Screen, dtype: float64

In [47]:
# Fill the missing values in the Screen column.
df.Screen.fillna(value=screen_mod, inplace=True)

In [48]:
# Get the median of the Screen column again.
screen_med2 = df.Screen.median()
screen_med2

np.float64(15.6)

In [50]:
screen_med1 == screen_med2

np.True_

### Q7. Sum of weights

1. Select all the "Innjoo" laptops from the dataset.
2. Select only columns `RAM`, `Storage`, `Screen`.
3. Get the underlying NumPy array. Let's call it `X`.
4. Compute matrix-matrix multiplication between the transpose of `X` and `X`. To get the transpose, use `X.T`. Let's call the result `XTX`.
5. Compute the inverse of `XTX`.
6. Create an array `y` with values `[1100, 1300, 800, 900, 1000, 1100]`.
7. Multiply the inverse of `XTX` with the transpose of `X`, and then multiply the result by `y`. Call the result `w`.
8. What's the sum of all the elements of the result?

> **Note**: You just implemented linear regression. We'll talk about it in the next lesson.

- 0.43
- 45.29
- 45.58
- 91.30

In [140]:
# Select all the "Innjoo" laptops from the dataset.
df_select = df[df['Model']=='Ioxbook']

In [141]:
# Select only columns RAM, Storage, Screen.
df_select = df_select[['RAM', 'Storage', 'Screen']]
df_select = df_select.drop_duplicates()

In [142]:
# Use df_select to create a matrix.
X = df_select.values
X

array([[  8. , 256. ,  15.6],
       [  8. , 512. ,  15.6],
       [  4. , 128. ,  14. ],
       [  4. , 128. ,  15.6]])

In [143]:
# Compute the transpose of X.
XTX = X.T @ X
XTX

array([[1.60000e+02, 7.16800e+03, 3.68000e+02],
       [7.16800e+03, 3.60448e+05, 1.57696e+04],
       [3.68000e+02, 1.57696e+04, 9.26080e+02]])

In [144]:
# Compute the inverse of X.
XTX_inv = np.linalg.inv(XTX)
XTX_inv

array([[ 2.03112490e-01, -1.99221878e-03, -4.67874299e-02],
       [-1.99221878e-03,  3.04198437e-05,  2.73656425e-04],
       [-4.67874299e-02,  2.73656425e-04,  1.50120096e-02]])

In [147]:
# Create the array y.
y = np.array([1100, 1300, 800, 900, 1000, 1100])

In [148]:
# Compute w
w = (XTX_inv @ X.T) @ y
w

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 3 is different from 4)

In [103]:
w[2]

NameError: name 'w' is not defined

In [128]:
# Select all the "Innjoo" laptops from the dataset.
df_Innjoo = df[df['Model']=='Ioxbook']

In [129]:
df_Innjoo.shape

(8, 12)

In [130]:
X = island_df.values
XTX = X.T.dot(X)

XTX_inv = np.linalg.inv(XTX)
XTX_inv

NameError: name 'island_df' is not defined

In [None]:
y = np.array([950, 1300, 800, 1000, 1300])

In [None]:
w = (XTX_inv @ X.T) @ y

In [None]:
w

In [None]:
w[2]