In [3]:
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from xgboost import XGBRegressor
from fastapi import FastAPI, UploadFile, File
from io import BytesIO

import pandas as pd
import joblib

In [6]:
df = pd.read_csv('Laptop_price.csv')
X = df.drop(columns=['Price'])
y = df['Price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
num_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
cat_features = X.select_dtypes(include=['object']).columns.tolist()
num_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
cat_transformer = Pipeline([
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('encoder', OneHotEncoder(handle_unknown='ignore'))
])
preprocessor = ColumnTransformer([
    ('num', num_transformer, num_features),
    ('cat', cat_transformer, cat_features)
])
pipeline = Pipeline([
    ('preprocessor', preprocessor),
   ('model', XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5))
])
pipeline.fit(X_train, y_train)
joblib.dump(pipeline, 'laptop_price_model.pkl')

['laptop_price_model.pkl']

In [76]:
!git init

Reinitialized existing Git repository in /content/drive/MyDrive/Mtuci/.git/


In [83]:
!git add pipline.ipynb
!git add Laptop_price.csv

In [71]:
!git commit -m "Добавлен ML-пайплайн"

On branch main
nothing to commit, working tree clean


In [110]:
!git remote add origin https://github.com/rowdysss/pipline_lab.git

error: remote origin already exists.


In [113]:
!git push -u origin main

Enumerating objects: 4, done.
Counting objects: 100% (4/4), done.
Delta compression using up to 2 threads
Compressing objects: 100% (4/4), done.
Writing objects: 100% (4/4), 40.55 KiB | 2.70 MiB/s, done.
Total 4 (delta 0), reused 0 (delta 0), pack-reused 0
To https://github.com/rowdysss/pipline_lab.git
 * [new branch]      main -> main
Branch 'main' set up to track remote branch 'main' from 'origin'.


In [84]:
%%writefile app.py

app = FastAPI()

# Загрузка обученной модели
model_path = "laptop_price_model.pkl"
model = joblib.load(model_path)

@app.post("/predict/")
async def predict(file: UploadFile = File(...)):
    content = await file.read()
    df = pd.read_csv(BytesIO(content))
    predictions = model.predict(df)
    return {"predictions": predictions.tolist()}

Writing app.py
