# Лабораторная работа #1

Подключим Google Drive и укажем путь к датасету

In [30]:
from google.colab import drive
drive.mount ('/content/drive')

data_file_path = '/content/drive/MyDrive/Colab Notebooks/Laptop_price.csv'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Загрузим датасет в PD и выполним разбиение данных

In [31]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv(data_file_path)

X = df.drop(columns=['Price']) # Удалим все столбци, кроме 'Price', чтобы избежать запоминания ответов
y = df['Price'] # столбец 'Price' выделим как ключевой (в нем содержатся ответы)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # разделим на трейн и тест выборки

Работа с пайплайном

In [27]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from xgboost import XGBRegressor
import joblib

In [29]:
# Разделяем числовые и категориальные признаки
num_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
cat_features = X.select_dtypes(include=['object']).columns.tolist()

# Создаем трансформеры
num_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
cat_transformer = Pipeline([
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('encoder', OneHotEncoder(handle_unknown='ignore'))
])
# Объеденяем трансформеры
preprocessor = ColumnTransformer([
    ('num', num_transformer, num_features),
    ('cat', cat_transformer, cat_features)
])
# Создаем финальный пайплайн. Он состоит из предобработчика (который объеденили ранее) и модели XGBRegressor
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5))
])
# Обучаем модель на train выборке и бекапим на диск
pipeline.fit(X_train, y_train)
joblib.dump(pipeline, '/content/drive/MyDrive/Colab Notebooks/laptop_price_model.pkl')

['/content/drive/MyDrive/Colab Notebooks/laptop_price_model.pkl']

Сгенерируем SSH-ключи

In [114]:
!ssh-keygen -t rsa -b 4096 -C "IT_Sem4_Lab1" #-f /content/drive/MyDrive/LabWorks/id_rsa
!cp ~/.ssh/id_rsa /content/drive/MyDrive/LabWorks
!cp ~/.sshid_rsa.pub /content/drive/MyDrive/LabWorks/

Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa): ^C
cp: cannot stat '/root/.sshid_rsa.pub': No such file or directory


Восстановим ранее сгенерированный ключ

In [None]:
!mkdir -p /root/.ssh
!chmod 700 /root/.ssh

!cp /content/drive/MyDrive/LabWorks/id_rsa ~/.ssh
!cp /content/drive/MyDrive/LabWorks/id_rsa.pub ~/.ssh

In [116]:
#Вывод публичного ключа
!cat ~/.ssh/id_rsa.pub

ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC+Xwb2IPvBcq4o39UXgktSMKHm3RnwikpzYShsE6TkxLLetNtonyBeSPLyRkxO3JEZWUX7AW7oke355iK8It0qxF5W2nGw5mSuso3RKiD1A9MM6NqIEQYH6NFoeIhdiCppnBPwLGHtIr6qJmse7Ukp6HPcZda8Yn3cfUSaG/8e8/MVznVOC5XgYdth6FdAcGgKa4bTW+XIBSvUh1tJaso9u728/SjvUlWd+ueR0dfELPL9Nd/G8E4J0FMxvKM+bT+M+OmrB6OcYrrfJ/VA5x49bBbFn6rjqbS1gXFAnW98UTErN2TkRuvig2cANqt2LcaXro7o9isDVJlQlfFkBtjqA7Q7GlT/mvZY9ENVlHzm/EK4mzswPs+b/OHzMmVxpUgftFDm8lT8r59WzJIipBu8+et2N2ANS0CrD/ZGmlvkqHeRCOyKtsDYEqHAiljg0cwH3A3SfHwkx/vFunv5DShqT4EiVNDEOLwmBL1wZM4WGpakUdaaPC6Wd0LiaofJoyHZKVOmPh3bX03srgOTDPfNHt4jdcI+aS5zMTnMHRr3enAQ51Eb9IDHGvFPXIG8Zk+4NHgks0Bm3p0SXIJnCjrMHknbfXff+p2yY2IYypcdWbnti5Nz47ETh4gGz0fzJSl1pxCpabzF7H1of1HiU/S9gH8206fGIfPplja3Rt9u7w== IT_Sem4_Lab1


Создаем config и known_hosts

In [91]:
%cd ~/.ssh

!touch config
!touch known_hosts

/.ssh


In [133]:
!sed -i 'd' ~/.ssh/config # очищаем config перед заполнением

!echo "Host github.com" >> ~/.ssh/config
!echo "  Hostname github.com" >> ~/.ssh/config
!echo "  User git" >> ~/.ssh/config
!echo "  IdentityFile ~/.ssh/id_rsa" >> ~/.ssh/config
!echo "  IdentitiesOnly yes" >> ~/.ssh/config

!cat ~/.ssh/config

Host github.com
  Hostname github.com
  User git
  IdentityFile ~/.ssh/id_rsa
  IdentitiesOnly yes


In [119]:
!sed -i 'd' ~/.ssh/known_hosts # очищаем known_hosts перед заполнением

!echo "github.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl" >> ~/.ssh/known_hosts
!echo "github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=" >> ~/.ssh/known_hosts
!echo "github.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=" >> ~/.ssh/known_hosts

!cat /.ssh/known_hosts

github.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl
github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=
github.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=


In [120]:
!git --version

git version 2.34.1


Подключим git репозиторий и сделаем первый коммит

In [57]:
%cd /content/drive/MyDrive/Colab\ Notebooks
!git init
!git add Sem4Lab1.ipynb
!git add laptop_price_model.pkl
!git add Laptop_price.csv

!git config --global user.email "145021797+morganuk@users.noreply.github.com"
!git config --global user.name "Morgan"

!git commit -m "Добавлен ML-пайплайн"

/content/drive/MyDrive/Colab Notebooks
Reinitialized existing Git repository in /content/drive/MyDrive/Colab Notebooks/.git/
[master (root-commit) ba8e6fe] Добавлен ML-пайплайн
 3 files changed, 1002 insertions(+)
 create mode 100644 Laptop_price.csv
 create mode 100644 Sem4Lab1.ipynb
 create mode 100644 laptop_price_model.pkl


In [126]:
!git remote add origin git@github.com:morganuk/it_labwork_4sem.git

error: remote origin already exists.


In [73]:
#!git config --global ssh.knownHostsFile /.ssh/known_hosts
#!git config --global ssh.strictHostKeyChecking yes

In [137]:
!git branch -M main
!git push -u origin main

git@github.com: Permission denied (publickey).
fatal: Could not read from remote repository.

Please make sure you have the correct access rights
and the repository exists.
