In [9]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression

In [23]:
pipe = make_pipeline(
    StandardScaler(),
    LogisticRegression(random_state = 0)
)

In [24]:
X,y = load_iris(return_X_y = True)

In [25]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = .3)

In [28]:
pipe.fit(X_train,y_train)
#Pipeline(steps=[('standardscaler', StandardScaler()),('logisticregression', LogisticRegression(random_state=0))])

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('logisticregression', LogisticRegression(random_state=0))])

In [30]:
y_pred = pipe.predict(X_test)
print('Accuracy score',accuracy_score(y_test,y_pred))

Accuracy score 0.9333333333333333


# # cross validation

In [32]:
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
from sklearn.model_selection import cross_validate

In [43]:
X,y = make_regression(n_samples = 200000,random_state = 0)

In [44]:
lr = LinearRegression(fit_intercept = True)
result = cross_validate(lr,X,y)

In [45]:
result['test_score']

array([1., 1., 1., 1., 1.])

## Automatic parameter searches

# Randomized search

In [55]:
from sklearn.datasets import make_regression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

In [78]:
df = pd.read_csv("C:/Users/VEDIT/Desktop/datascience/datasets/linear_regression/house_prices_datasets/house_price_dataset_original_v2_cleaned.csv")

In [79]:
df.head()

Unnamed: 0,land_size_sqm,house_size_sqm,no_of_rooms,no_of_bathrooms,large_living_room,parking_space,front_garden,swimming_pool,distance_to_school,wall_fence,house_age,water_front,distance_to_supermarket_km,crime_rate_index,room_size,property_value
0,201,177,3,1,0,1,1,0,3.3,1,10,0,6.8,0.9,0,165432
1,196,182,4,3,1,1,0,1,1.2,1,11,0,4.1,1.42,1,187043
2,198,182,4,4,1,1,0,1,5.9,0,20,0,2.1,4.12,1,148658
3,178,166,2,3,0,1,0,0,5.9,0,5,0,0.7,4.36,0,123785
4,183,165,3,1,1,1,0,0,3.8,1,8,0,0.7,0.42,0,156470


In [80]:
y = df.pop('property_value')

In [81]:
df

Unnamed: 0,land_size_sqm,house_size_sqm,no_of_rooms,no_of_bathrooms,large_living_room,parking_space,front_garden,swimming_pool,distance_to_school,wall_fence,house_age,water_front,distance_to_supermarket_km,crime_rate_index,room_size
0,201,177,3,1,0,1,1,0,3.3,1,10,0,6.8,0.90,0
1,196,182,4,3,1,1,0,1,1.2,1,11,0,4.1,1.42,1
2,198,182,4,4,1,1,0,1,5.9,0,20,0,2.1,4.12,1
3,178,166,2,3,0,1,0,0,5.9,0,5,0,0.7,4.36,0
4,183,165,3,1,1,1,0,0,3.8,1,8,0,0.7,0.42,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4949,297,261,4,4,1,1,1,1,2.9,1,20,0,4.1,3.99,2
4950,302,282,4,2,1,1,1,1,3.8,1,12,1,4.8,2.09,2
4951,153,147,3,4,1,1,0,0,3.3,1,7,0,4.7,3.98,3
4952,180,158,2,1,0,1,0,1,2.2,0,1,0,3.4,3.75,2


In [82]:
X = df.values

In [87]:
param_distributions = {'n_estimators':randint(1,5),
                         'max_depth':randint(5,10)}

In [88]:
search = RandomizedSearchCV(estimator = RandomForestRegressor(random_state = 0),
                           n_iter = 5,
                           param_distributions = param_distributions,
                           random_state = 0
                           )

In [89]:
search.fit(X_train,y_train)

RandomizedSearchCV(estimator=RandomForestRegressor(random_state=0), n_iter=5,
                   param_distributions={'max_depth': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001DE16773670>,
                                        'n_estimators': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001DE166B57C0>},
                   random_state=0)

In [90]:
search.best_params_

{'max_depth': 7, 'n_estimators': 1}

In [91]:
search.score(X_test,y_test)

0.9031563845050216

In [92]:
search.score(X_train,y_train)

0.9855052457205964

## bagging classfier

In [96]:
from sklearn.datasets import make_classification
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier

In [103]:
X, y = make_classification(n_samples=100, n_features=4,n_informative=2, n_redundant=0,random_state=0, shuffle=False)

In [106]:
clf = BaggingClassifier(base_estimator = SVC(),n_estimators = 10,random_state = 0)
clf.fit(X,y)

BaggingClassifier(base_estimator=SVC(), random_state=0)

In [111]:
clf.predict([[30, 20, 90, 1100]])

array([1])