In [63]:
import numpy as np
from sklearn.decomposition import TruncatedSVD

np.random.seed(42)
x = np.random.rand(6, 4)

x

array([[0.37454012, 0.95071431, 0.73199394, 0.59865848],
       [0.15601864, 0.15599452, 0.05808361, 0.86617615],
       [0.60111501, 0.70807258, 0.02058449, 0.96990985],
       [0.83244264, 0.21233911, 0.18182497, 0.18340451],
       [0.30424224, 0.52475643, 0.43194502, 0.29122914],
       [0.61185289, 0.13949386, 0.29214465, 0.36636184]])

In [64]:
def trunced_svd(X, k):
    
    U, S, Vt = np.linalg.svd(X, full_matrices=False)
    
    U_k = U[:, :k]
    S_k = np.diag(S[:k])
    Vt_k = Vt[:k, :]
    
    X_reduce = U_k @ S_k
    
    return X_reduce, U_k, S_k, Vt_k

X_reduced_manual, U_k, S_k, Vt_k = trunced_svd(x, k=2)

print(X_reduced_manual)

[[-1.29218124  0.38804539]
 [-0.70509141 -0.50191988]
 [-1.27311086 -0.37306325]
 [-0.68971514  0.14434687]
 [-0.74573679  0.2697078 ]
 [-0.6883421   0.03884121]]


---------

In [119]:
import numpy as np


np.random.seed(42)
X = np.random.rand(10, 6)


X_mean = np.mean(X, axis=0)
X_centered = X - X_mean

cov_matrix = (X_centered.T @ X_centered) / (X.shape[0] - 1)


std_devs = np.sqrt(np.diag(cov_matrix))


corr_matrix = cov_matrix / np.outer(std_devs, std_devs)


print("Corr matrix (myself):\n", corr_matrix)

print("\nCorr matrix (NumPy):\n", np.corrcoef(X, rowvar=False))


Corr matrix (myself):
 [[ 1.         -0.48815465 -0.29645208 -0.33650111  0.38110368 -0.13663343]
 [-0.48815465  1.         -0.19186534  0.01336642 -0.48756626 -0.37585227]
 [-0.29645208 -0.19186534  1.          0.15190601 -0.02654463  0.23493526]
 [-0.33650111  0.01336642  0.15190601  1.          0.56730351  0.52739805]
 [ 0.38110368 -0.48756626 -0.02654463  0.56730351  1.          0.29585641]
 [-0.13663343 -0.37585227  0.23493526  0.52739805  0.29585641  1.        ]]

Corr matrix (NumPy):
 [[ 1.         -0.48815465 -0.29645208 -0.33650111  0.38110368 -0.13663343]
 [-0.48815465  1.         -0.19186534  0.01336642 -0.48756626 -0.37585227]
 [-0.29645208 -0.19186534  1.          0.15190601 -0.02654463  0.23493526]
 [-0.33650111  0.01336642  0.15190601  1.          0.56730351  0.52739805]
 [ 0.38110368 -0.48756626 -0.02654463  0.56730351  1.          0.29585641]
 [-0.13663343 -0.37585227  0.23493526  0.52739805  0.29585641  1.        ]]


------------------

In [120]:
cov_matrix_2 = X.T @ X

eig_values, V = np.linalg.eig(cov_matrix_2)

sorted_indices = np.argsort(eig_values)[::-1]

eig_values = eig_values[sorted_indices]

V = V[:, sorted_indices]

Sigma = np.sqrt(eig_values)

U = (X @ V) / Sigma

In [121]:
print("Матрица U:\n", U)
print("Сингулярные значения (Sigma):\n", Sigma)
print("Матрица V^T:\n", V.T)


X_reconstructed = U @ np.diag(Sigma) @ V.T
print("\nПриближенная X:\n", X_reconstructed)

Матрица U:
 [[-0.3270258   0.39832254 -0.0705172  -0.23869729 -0.41319446 -0.46607433]
 [-0.37525344  0.30707188 -0.43939086  0.31555371  0.41574831  0.04634833]
 [-0.23307031 -0.18037747  0.28081412 -0.41403976  0.47918195 -0.08557804]
 [-0.22400352 -0.05069633 -0.12735301 -0.42012065 -0.03866904  0.42573726]
 [-0.27744328  0.16209094  0.4602874  -0.00446837 -0.45942996  0.09226714]
 [-0.38694513 -0.47592658  0.37518976  0.43728455  0.09331332 -0.31651233]
 [-0.2347125  -0.11902999 -0.38420881 -0.20757526  0.03489418 -0.51617144]
 [-0.30992158  0.30639294  0.01421384  0.43712722 -0.03958013  0.25816109]
 [-0.46151152 -0.45565611 -0.26787845 -0.09952142 -0.25389936  0.37221421]
 [-0.23854582  0.37880472  0.36698566 -0.24537569  0.37186114  0.10832329]]
Сингулярные значения (Sigma):
 [3.75602665 1.42424152 1.047824   0.88471329 0.71977987 0.28714759]
Матрица V^T:
 [[-0.33912681 -0.437819   -0.38187686 -0.46816417 -0.34683835 -0.45607353]
 [-0.18858919  0.82126553  0.00232631 -0.05677871

In [122]:
#### Усечённый SVD

k = 2  # Оставим только 2 компоненты

U_k = U[:, :k]           # Берём только первые k столбцов U
Sigma_k = np.diag(Sigma[:k])  # Оставляем только первые k значений
Vt_k = V[:k, :]         # Берём только первые k строк Vt

# Приближенная матрица X (по усеченному SVD)
X_approx = U_k @ Sigma_k @ Vt_k

print("Размеры U_k:", U_k.shape)      # (10, 2)
print("Размеры Sigma_k:", Sigma_k.shape)  # (2, 2)
print("Размеры Vt_k:", Vt_k.shape)    # (2, 6)

print("\nПриближенная X:\n", X_approx)


Размеры U_k: (10, 2)
Размеры Sigma_k: (2, 2)
Размеры Vt_k: (2, 6)

Приближенная X:
 [[ 0.16817743  0.69755752 -0.46767606  0.83479317 -0.34558205  0.52981534]
 [ 0.28650858  0.62498526 -0.59056842  0.94829403 -0.38949434  0.55264897]
 [ 0.40935398 -0.04588957 -0.50021358  0.56521254 -0.22446806  0.20643168]
 [ 0.31694102  0.09937349 -0.43665661  0.55108318 -0.22150297  0.24362566]
 [ 0.25232531  0.38612045 -0.45998306  0.69695989 -0.28491901  0.3846572 ]
 [ 0.78964762 -0.28259098 -0.89389694  0.92706547 -0.36436722  0.27766035]
 [ 0.37319192  0.02703062 -0.48122666  0.57320652 -0.22899365  0.23097267]
 [ 0.20371422  0.57791356 -0.46877474  0.78657687 -0.3241647   0.47589256]
 [ 0.87198815 -0.20606219 -1.02589726  1.11289001 -0.43984767  0.37245386]
 [ 0.06764488  0.61205347 -0.30941505  0.61458558 -0.25623068  0.41900599]]


In [123]:
from sklearn.decomposition import TruncatedSVD

model_SVD = TruncatedSVD(n_components=2)
model_SVD.fit(X)

In [124]:
x_transform = model_SVD.fit_transform(X)

In [125]:
x_transform

array([[ 1.2283176 , -0.5673075 ],
       [ 1.40946191, -0.43734452],
       [ 0.87541828,  0.25690108],
       [ 0.84136318,  0.07220382],
       [ 1.04208434, -0.23085665],
       [ 1.45337621,  0.6778344 ],
       [ 0.88158641,  0.16952745],
       [ 1.16407371, -0.43637755],
       [ 1.73344958,  0.64896435],
       [ 0.89598447, -0.53950941]])

-------------

In [3]:
import numpy as np

np.random.seed(1)
x = np.random.randint(0, 5, size=(2, 2))
y = np.random.randint(0, 5, size=(2, 2))
c = np.random.randint(0, 5, size=(2, 2))

display('x', x, 'y', y, 'c', c)

'x'

array([[3, 4],
       [0, 1]])

'y'

array([[3, 0],
       [0, 1]])

'c'

array([[4, 4],
       [1, 2]])

In [11]:
x + y + c

array([[10,  8],
       [ 1,  4]])

In [13]:
y + x + c

array([[10,  8],
       [ 1,  4]])

In [14]:
c + x + y

array([[10,  8],
       [ 1,  4]])

In [28]:
t = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
z = np.array([3, 1, 6])

t @ t.T

array([[  5,  14,  23],
       [ 14,  50,  86],
       [ 23,  86, 149]])

In [31]:
display(x, c)

array([[3, 4],
       [0, 1]])

array([[4, 4],
       [1, 2]])

In [32]:
x @ c

array([[16, 20],
       [ 1,  2]])

In [47]:
display((3 * 4 + 4 * 1), (3 * 4 + 4 * 2), (0 * 4 + 1 * 1), (0 * 4 + 1 * 2))

16

20

1

2

In [38]:
1 * 4

4

In [11]:
def transform(value):
    if value == 0:
        return 100
    else:
        return value
    

v = np.vectorize(transform)(x)
v

array([[  3,   4],
       [100,   1]])

In [42]:
x = np.array([[0, 2], [1, 0]])

y = np.array([[1, -2], [1, 0]])

c = np.array([[3, -1], [2, 0]])

In [57]:
print(f' x  \n {x} \n y \n {y} \n z \n {c}')

 x  
 [[0 2]
 [1 0]] 
 y 
 [[ 1 -2]
 [ 1  0]] 
 z 
 [[ 3 -1]
 [ 2  0]]


In [36]:
x @ y @ c

array([[ 2,  0],
       [ 1, -2]])

In [40]:
1 * 0 + 2 * 1

2

--------------

In [10]:
import numpy as np
from sklearn.datasets import make_classification

# Создаём матрицу (3x2)
X = np.array([[4, 0],
              [3, -5],
              [0, 2]])


In [11]:
U, Sigma, Vt = np.linalg.svd(X, full_matrices=False)

# Выводим полное разложение
print("U:\n", U)
print("Sigma:\n", Sigma)
print("V^T:\n", Vt)

U:
 [[-0.40593268  0.87362387]
 [-0.88401129 -0.30087213]
 [ 0.23182471  0.38243601]]
Sigma:
 [6.49097419 3.4448881 ]
V^T:
 [[-0.65872463  0.75238412]
 [ 0.75238412  0.65872463]]


In [12]:
# Берём только первую сингулярную пару
U_1 = U[:, 0].reshape(-1, 1)  # Первый столбец U
Sigma_1 = np.diag([Sigma[0]])  # Оставляем только первый элемент Σ
Vt_1 = Vt[0, :].reshape(1, -1)  # Первая строка V^T

# Приближённая матрица
X_approx = U_1 @ Sigma_1 @ Vt_1
# только к кратных признаков
X_transform = U_1 @ Sigma_1
# Выводим усечённое разложение
print("\nПриближённая матрица X (k=1):\n", X_approx)

print(" X_trans \n", X_transform)


Приближённая матрица X (k=1):
 [[ 1.73567256 -1.9824558 ]
 [ 3.77982417 -4.31725115]
 [-0.9912279   1.13216372]]
 X_trans 
 [[-2.63489853]
 [-5.73809448]
 [ 1.50476823]]


In [14]:
U_1

array([[-0.40593268],
       [-0.88401129],
       [ 0.23182471]])

In [15]:
Sigma_1

array([[6.49097419]])

In [60]:
from sklearn.decomposition import TruncatedSVD
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.linear_model import ARDRegression, BayesianRidge, ElasticNet, GammaRegressor, HuberRegressor, LogisticRegression

df = pd.read_csv('mobile_price_range_data.csv')

df.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [56]:
df['price_range'].value_counts()

price_range
1    500
2    500
3    500
0    500
Name: count, dtype: int64

In [47]:
df.corr()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
battery_power,1.0,0.011252,0.011482,-0.041847,0.033334,0.015665,-0.004004,0.034085,0.001844,-0.029727,...,0.014901,-0.008402,-0.000653,-0.029959,-0.021421,0.05251,0.011522,-0.010516,-0.008343,0.200723
blue,0.011252,1.0,0.021419,0.035198,0.003593,0.013443,0.041177,0.004049,-0.008605,0.036161,...,-0.006872,-0.041533,0.026351,-0.002952,0.000613,0.013934,-0.030236,0.010061,-0.021863,0.020573
clock_speed,0.011482,0.021419,1.0,-0.001315,-0.000434,-0.043073,0.006545,-0.014364,0.01235,-0.005724,...,-0.014523,-0.009476,0.003443,-0.029078,-0.007378,-0.011432,-0.046433,0.019756,-0.024471,-0.006606
dual_sim,-0.041847,0.035198,-0.001315,1.0,-0.029123,0.003187,-0.015679,-0.022142,-0.008979,-0.024658,...,-0.020875,0.014291,0.041072,-0.011949,-0.016666,-0.039404,-0.014008,-0.017117,0.02274,0.017444
fc,0.033334,0.003593,-0.000434,-0.029123,1.0,-0.01656,-0.029133,-0.001791,0.023618,-0.013356,...,-0.00999,-0.005176,0.015099,-0.011014,-0.012373,-0.006829,0.001793,-0.014828,0.020085,0.021998
four_g,0.015665,0.013443,-0.043073,0.003187,-0.01656,1.0,0.00869,-0.001823,-0.016537,-0.029706,...,-0.019236,0.007448,0.007313,0.027166,0.037005,-0.046628,0.584246,0.016758,-0.01762,0.014772
int_memory,-0.004004,0.041177,0.006545,-0.015679,-0.029133,0.00869,1.0,0.006886,-0.034214,-0.02831,...,0.010441,-0.008335,0.032813,0.037771,0.011731,-0.00279,-0.009366,-0.026999,0.006993,0.044435
m_dep,0.034085,0.004049,-0.014364,-0.022142,-0.001791,-0.001823,0.006886,1.0,0.021756,-0.003504,...,0.025263,0.023566,-0.009434,-0.025348,-0.018388,0.017003,-0.012065,-0.002638,-0.028353,0.000853
mobile_wt,0.001844,-0.008605,0.01235,-0.008979,0.023618,-0.016537,-0.034214,0.021756,1.0,-0.018989,...,0.000939,9e-05,-0.002581,-0.033855,-0.020761,0.006209,0.001551,-0.014368,-0.000409,-0.030302
n_cores,-0.029727,0.036161,-0.005724,-0.024658,-0.013356,-0.029706,-0.02831,-0.003504,-0.018989,1.0,...,-0.006872,0.02448,0.004868,-0.000315,0.025826,0.013148,-0.014733,0.023774,-0.009964,0.004399


In [42]:
x = np.array(df.iloc[::, :-1])
y = np.array(df.iloc[::, -1])

In [44]:
x.shape

(2000, 20)

In [48]:
model_SVD = TruncatedSVD(n_components=2)
model_SVD.fit(x)

In [49]:
x_svd = model_SVD.fit_transform(df.iloc[::, :-1])

print(sum(model_SVD.explained_variance_ratio_))

x_svd

0.8083341597950824


array([[ 2668.10162073,  -773.71796695],
       [ 3501.96749335,   234.43011545],
       [ 3259.32610403,    35.06483137],
       ...,
       [ 4041.44201888,   172.08136986],
       [ 1651.89470442,   632.92105192],
       [ 3707.62700602, -1594.68298851]])

In [52]:
x_svd[:, 0]

array([2668.10162073, 3501.96749335, 3259.32610403, ..., 4041.44201888,
       1651.89470442, 3707.62700602])

In [62]:
x_train, x_test, y_train, y_test = train_test_split(x_svd, y, test_size=0.3, random_state=42)

In [63]:
model_log = LogisticRegression(multi_class='ovr')
model_log.fit(x_train, y_train)

In [64]:
y_pred = model_log.predict(x_test)

In [70]:
print(sum(y_test == y_pred))

491


In [104]:
(600 - 491) / 600

0.18166666666666667

----

In [105]:
X = np.array([[4, 0],
              [3, -5],
              [0, 2]])

U, Sigma, Vt = np.linalg.svd(X, full_matrices=False)


print("U:\n", U)
print("Sigma:\n", Sigma)
print("V^T:\n", Vt)


# Берём только первую сингулярную пару
U_1 = U[:, 0].reshape(-1, 1)  # Первый столбец U
Sigma_1 = np.diag([Sigma[0]])  # Оставляем только первый элемент Σ
Vt_1 = Vt[0, :].reshape(1, -1)  # Первая строка V^T

# Приближённая матрица
X_approx = U_1 @ Sigma_1 @ Vt_1

# Выводим усечённое разложение
print("\nПриближённая матрица X (k=1):\n", X_approx)


U:
 [[-0.40593268  0.87362387]
 [-0.88401129 -0.30087213]
 [ 0.23182471  0.38243601]]
Sigma:
 [6.49097419 3.4448881 ]
V^T:
 [[-0.65872463  0.75238412]
 [ 0.75238412  0.65872463]]

Приближённая матрица X (k=1):
 [[ 1.73567256 -1.9824558 ]
 [ 3.77982417 -4.31725115]
 [-0.9912279   1.13216372]]
