# Лабораторная работа #1

## Часть 1

Подключим Google Drive и укажем путь к датасету

In [1]:
from google.colab import drive
drive.mount ('/content/drive')

data_file_path = '/content/drive/MyDrive/Colab Notebooks/Laptop_price.csv'

Mounted at /content/drive


Загрузим датасет в PD и выполним разбиение данных

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv(data_file_path)

X = df.drop(columns=['Price']) # Удалим все столбци, кроме 'Price', чтобы избежать запоминания ответов
y = df['Price'] # столбец 'Price' выделим как ключевой (в нем содержатся ответы)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # разделим на трейн и тест выборки

## Часть 2. Работа с пайплайном

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from xgboost import XGBRegressor
import joblib

# Разделяем числовые и категориальные признаки
num_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
cat_features = X.select_dtypes(include=['object']).columns.tolist()

# Создаем трансформеры
num_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
cat_transformer = Pipeline([
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('encoder', OneHotEncoder(handle_unknown='ignore'))
])
# Объеденяем трансформеры
preprocessor = ColumnTransformer([
    ('num', num_transformer, num_features),
    ('cat', cat_transformer, cat_features)
])
# Создаем финальный пайплайн. Он состоит из предобработчика (который объеденили ранее) и модели XGBRegressor
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5))
])
# Обучаем модель на train выборке и бекапим на диск
pipeline.fit(X_train, y_train)
joblib.dump(pipeline, '/content/drive/MyDrive/Colab Notebooks/laptop_price_model.pkl')

['/content/drive/MyDrive/Colab Notebooks/laptop_price_model.pkl']

## Часть 3. Версионирование с Git

**(А)** Сгенерируем SSH-ключ и скопируем его себе на гугл-диск

In [None]:
#!rm ~/.ssh
!mkdir -p /root/.ssh
!chmod 777 /root/.ssh


!ssh-keygen -t rsa -b 4096 -C "IT_Sem4_Lab1" -f ~/.ssh/labw_ssh_key
!cp ~/.ssh/labw_ssh_key /content/drive/MyDrive/LabWorks
!cp ~/.ssh/labw_ssh_key.pub /content/drive/MyDrive/LabWorks

%cd ~/.ssh/
!ls

Generating public/private rsa key pair.
Enter passphrase (empty for no passphrase): 
Enter same passphrase again: 
Your identification has been saved in /root/.ssh/labw_ssh_key
Your public key has been saved in /root/.ssh/labw_ssh_key.pub
The key fingerprint is:
SHA256:WeafGN9GiEzb+UYMD/zT0uQulS/r040Ey5kmu25W4tg IT_Sem4_Lab1
The key's randomart image is:
+---[RSA 4096]----+
|                 |
|           .     |
|          + +   .|
|         B +.O =.|
|        S *.+=O.=|
|          o=B*.=.|
|         +.*+.Bo+|
|        . E  oo+o|
|         +o. .o. |
+----[SHA256]-----+
/root/.ssh
labw_ssh_key  labw_ssh_key.pub


**(Б)** Восстановим ранее сгенерированный ключ

In [8]:
#!rm ~/.ssh
!mkdir -p /root/.ssh
!chmod 700 /root/.ssh

!cp /content/drive/MyDrive/LabWorks/labw_ssh_key ~/.ssh
!cp /content/drive/MyDrive/LabWorks/labw_ssh_key.pub ~/.ssh

%cd ~/.ssh
!ls

/root/.ssh
labw_ssh_key  labw_ssh_key.pub


In [None]:
#Вывод публичного ключа
!cat ~/.ssh/labw_ssh_key.pub

Создаем config и known_hosts для работы SSH

In [9]:
%cd ~/.ssh

!touch config
!touch known_hosts

!ls

/root/.ssh
config	known_hosts  labw_ssh_key  labw_ssh_key.pub


In [10]:
#Заполняем config
!sed -i 'd' ~/.ssh/config # очищаем config перед заполнением

!echo "Host github.com" >> ~/.ssh/config
#!echo "  Hostname github.com" >> ~/.ssh/config
!echo "  User git" >> ~/.ssh/config
!echo "  IdentityFile ~/.ssh/labw_ssh_key" >> ~/.ssh/config
!echo "  PreferredAuthentications publickey" >> ~/.ssh/config

#!echo "  IdentitiesOnly yes" >> ~/.ssh/config

!cat ~/.ssh/config  # Проверяем как заполнился

Host github.com
  User git
  IdentityFile ~/.ssh/labw_ssh_key
  PreferredAuthentications publickey


In [11]:
#Заполняем known_hosts
!sed -i 'd' ~/.ssh/known_hosts # очищаем known_hosts перед заполнением

# https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/githubs-ssh-key-fingerprints
!echo "github.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl" >> ~/.ssh/known_hosts
!echo "github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=" >> ~/.ssh/known_hosts
!echo "github.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=" >> ~/.ssh/known_hosts

!cat ~/.ssh/known_hosts # Проверяем как заполнился

github.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl
github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=
github.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=


Проверка Git

In [12]:
!git --version

#Проверка связи
!ssh -i ~/.ssh/labw_ssh_key -T git@github.com

git version 2.34.1
Hi morganuk! You've successfully authenticated, but GitHub does not provide shell access.


Подключим git репозиторий и сделаем первый коммит

In [None]:
%cd /content/drive/MyDrive/Colab\ Notebooks
!git init
!git add Sem4Lab1.ipynb
!git add laptop_price_model.pkl
!git add Laptop_price.csv

!git config --global user.email "145021797+morganuk@users.noreply.github.com"
!git config --global user.name "Morgan"

!git commit -m "Добавлен ML-пайплайн"

In [None]:
# Добавляем удаленный репозиторий
!git remote add origin git@github.com:morganuk/it_labwork_4sem.git

error: remote origin already exists.


In [None]:
#!git config --global ssh.knownHostsFile /.ssh/known_hosts
#!git config --global ssh.strictHostKeyChecking yes

In [None]:
!git branch -M main       # Выбираем ветку
!git push -u origin main  # Пушим в удаленный репозиторий

Enumerating objects: 14, done.
Counting objects:   7% (1/14)Counting objects:  14% (2/14)Counting objects:  21% (3/14)Counting objects:  28% (4/14)Counting objects:  35% (5/14)Counting objects:  42% (6/14)Counting objects:  50% (7/14)Counting objects:  57% (8/14)Counting objects:  64% (9/14)Counting objects:  71% (10/14)Counting objects:  78% (11/14)Counting objects:  85% (12/14)Counting objects:  92% (13/14)Counting objects: 100% (14/14)Counting objects: 100% (14/14), done.
Delta compression using up to 2 threads
Compressing objects:   7% (1/14)Compressing objects:  14% (2/14)Compressing objects:  21% (3/14)Compressing objects:  28% (4/14)Compressing objects:  35% (5/14)Compressing objects:  42% (6/14)Compressing objects:  50% (7/14)Compressing objects:  57% (8/14)Compressing objects:  64% (9/14)Compressing objects:  71% (10/14)Compressing objects:  78% (11/14)Compressing objects:  85% (12/14)Compressing objects:  92% (13/14)Compressing objects: 100% (14/14)

## Часть 4. Fast API

In [2]:
!pip install fastapi
!pip install python-multipart
!pip install pyngrok
!pip install uvicorn

Collecting fastapi
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting starlette<0.47.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.46.2-py3-none-any.whl.metadata (6.2 kB)
Downloading fastapi-0.115.12-py3-none-any.whl (95 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading starlette-0.46.2-py3-none-any.whl (72 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: starlette, fastapi
Successfully installed fastapi-0.115.12 starlette-0.46.2
Collecting python-multipart
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Downloading python_multipart-0.0.20-py3-none-any.whl (24 kB)
Installing collected packages: python-multipart
Successfully installed python-multipart-0.0.20
Collecting pyngrok
  Downloading pyngrok-7.2.7-py3-none-any.whl.metadata (9.4 

In [3]:
%%writefile app.py

from fastapi import FastAPI, File, UploadFile
import pandas as pd
import joblib
from io import BytesIO

app = FastAPI()

# Загрузка обученной модели
model_path = "/content/drive/MyDrive/Colab Notebooks/laptop_price_model.pkl"
model = joblib.load(model_path)

@app.post("/predict/")
async def predict(file: UploadFile = File(...)):
    content = await file.read()
    df = pd.read_csv(BytesIO(content))
    predictions = model.predict(df)
    return {"predictions": predictions.tolist()}

Writing app.py


In [4]:
# Добавляем токен
from google.colab import userdata
from pyngrok import ngrok

#!ngrok config add-authtoken YOUR_AUTH_TOKEN          # Хранить токен в коде не безопасно
ngrok.set_auth_token(userdata.get('NGROK_TOKEN'))     # Используем секреты Colab'a




In [None]:
# Запускаем uvicorn в фона
!nohup uvicorn app:app --host 0.0.0.0 --port 8000 --reload > fastapi.log 2>&1 &

In [None]:
# Запускаем uvicorn без фона
!uvicorn app:app --host 0.0.0.0 --port 8000 &

In [None]:
#from pyngrok import ngrok
# Подключаем публичный URL
public_url = ngrok.connect(8000)
print("API доступно по адресу:", public_url)


In [7]:
# Другой способ подключения
import nest_asyncio
from pyngrok import ngrok
import uvicorn
from app import app

ngrok_tunnel = ngrok.connect(8000)
print('Public URL:', ngrok_tunnel.public_url)
nest_asyncio.apply()

uvicorn.run(app, host='0.0.0.0', port=8000)

Public URL: https://02d5-34-16-196-107.ngrok-free.app


INFO:     Started server process [132]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     2a02:2168:83e8:c300:bdab:2b13:770f:d0fe:0 - "GET / HTTP/1.1" 404 Not Found
INFO:     2a02:2168:83e8:c300:bdab:2b13:770f:d0fe:0 - "GET /predict HTTP/1.1" 307 Temporary Redirect
INFO:     2a02:2168:83e8:c300:bdab:2b13:770f:d0fe:0 - "GET /predict/ HTTP/1.1" 405 Method Not Allowed


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [132]


In [None]:
%cd /content/drive/MyDrive/Colab\ Notebooks
!git status

In [17]:
!git commit -m "Добавлен FastAPI"
!git push origin main

On branch main
Your branch is up to date with 'origin/main'.

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	[31mmodified:   Sem4Lab1.ipynb[m

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	[31mLabWork1.ipynb[m
	[31mUntitled[m
	[31m__pycache__/[m
	[31mapp.py[m
	[31mconfig[m
	[31mencript.ipynb[m
	[31mfastapi.log[m
	[31mit_sem4_ssh_key[m
	[31mit_sem4_ssh_key.pub[m
	[31mknown_hosts[m
	[31mpipline.ipynb[m
	[31m"\320\232\320\276\320\277\320\270\321\217 \320\261\320\273\320\276\320\272\320\275\320\276\321\202\320\260 \"Sem4Lab1.ipynb\""[m

no changes added to commit (use "git add" and/or "git commit -a")
Everything up-to-date
