<a href="https://colab.research.google.com/github/miuceo/ML_intro/blob/main/california_housing_Pipline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [43]:
import pandas as pd
import numpy as np
import sklearn # scikit-learn kutubxonasini chaqirib olish

# Online datasetni yuklab olish
url = "https://github.com/ageron/handson-ml2/blob/master/datasets/housing/housing.csv?raw=true"
df = pd.read_csv(url)

from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(df , test_size = 0.2, random_state = 42)

housing = train_set.drop("median_house_value", axis = 1)
test_set.drop("median_house_value", axis = 1)

housing_num = housing.drop("ocean_proximity", axis = 1)

In [36]:
housing_num.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income
14196,-117.03,32.71,33.0,3126.0,627.0,2300.0,623.0,3.2596
8267,-118.16,33.77,49.0,3382.0,787.0,1314.0,756.0,3.8125
17445,-120.48,34.66,4.0,1897.0,331.0,915.0,336.0,4.1563
14265,-117.11,32.69,36.0,1421.0,367.0,1418.0,355.0,1.9425
2271,-119.8,36.78,43.0,2382.0,431.0,874.0,380.0,3.5542


**Creating our own Transformer**

In [37]:
from pickle import NONE
from sklearn.base import BaseEstimator, TransformerMixin
rooms_ix, bedrooms_ix, population_ix, households_ix = 3, 4, 5, 6

class CombineAttributeAdder(BaseEstimator, TransformerMixin):

    def __init__(self, add_bedrooms_per_room = True):
        self.add_bedrooms_per_room = add_bedrooms_per_room

    def fit(self, X, y=NONE):
        return self

    def transform(self, X):
        rooms_per_household = X[:, rooms_ix] / X[:, households_ix]
        population_per_household = X[:, population_ix] / X[:, households_ix]

        if self.add_bedrooms_per_room == True:
            bedroom_per_room = X[:, bedrooms_ix] / X[:, rooms_ix]
            return np.c_[rooms_per_household, population_per_household, bedroom_per_room]

        else :
            return np.c_[rooms_per_household, population_per_household]

**Numeric Pipline = Raqamli konveyer**

In [38]:
from sklearn.pipeline import Pipeline # Konveyer yasash
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

num_pipeline = Pipeline([
            ("imputer", SimpleImputer(strategy="median")),
            ("attribs_adder", CombineAttributeAdder()),
            ("std_scaler", StandardScaler())
])

num_pipeline.fit_transform(housing_num)

array([[-0.17491646,  0.05137609, -0.2117846 ],
       [-0.40283542, -0.11736222,  0.34218528],
       [ 0.08821601, -0.03227969, -0.66165785],
       ...,
       [-0.60675918,  0.02030568,  0.99951387],
       [ 0.40217517,  0.00707608, -0.79086209],
       [-0.85144571, -0.08535429,  1.69520292]])

**Text Pipline = Matnli konveyer**

In [48]:
from sklearn.compose import ColumnTransformer

num_attribs = list(housing_num)
cat_attribs = ["ocean_proximity"]

full_pipline = ColumnTransformer([
    ('num', num_pipeline, num_attribs),
    ('cat', OneHotEncoder(), cat_attribs)
])

housing_prepared = full_pipline.fit_transform(housing)

In [53]:
housing_prepared[0:5, :]

array([[-0.17491646,  0.05137609, -0.2117846 ,  0.        ,  0.        ,
         0.        ,  0.        ,  1.        ],
       [-0.40283542, -0.11736222,  0.34218528,  0.        ,  0.        ,
         0.        ,  0.        ,  1.        ],
       [ 0.08821601, -0.03227969, -0.66165785,  0.        ,  0.        ,
         0.        ,  0.        ,  1.        ],
       [-0.60001532,  0.07750687,  0.78303162,  0.        ,  0.        ,
         0.        ,  0.        ,  1.        ],
       [ 0.3490073 , -0.06883176, -0.55036364,  0.        ,  1.        ,
         0.        ,  0.        ,  0.        ]])