In [1]:
# Imports

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Fetch the dataset

from sklearn.datasets import fetch_california_housing

In [2]:
dataset = fetch_california_housing()
dataset

{'data': array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
           37.88      , -122.23      ],
        [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
           37.86      , -122.22      ],
        [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
           37.85      , -122.24      ],
        ...,
        [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
           39.43      , -121.22      ],
        [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
           39.43      , -121.32      ],
        [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
           39.37      , -121.24      ]]),
 'target': array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]),
 'frame': None,
 'target_names': ['MedHouseVal'],
 'feature_names': ['MedInc',
  'HouseAge',
  'AveRooms',
  'AveBedrms',
  'Population',
  'AveOccup',
  'Latitude',
  'Longitude'],
 'DESCR': '.. _california_housing_dataset:\n

In [3]:
# Make a dataset of the data
housing_data = pd.DataFrame(dataset['data'], columns=dataset['feature_names'])

# Add the target column - MedHouseVal
housing_data['Target'] = dataset['target']

## Test SVR method

In [4]:
# Imports TestTrain & Estimator/Model
from sklearn.model_selection import train_test_split
from sklearn import svm

# Split the data in X features and y target
X = housing_data.drop('Target', axis =1)
y = housing_data['Target']

# Split the date into test and train 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


# Train the model
regr = svm.SVR()
regr.fit(X_train,y_train)

# Test the model
regr.score(X_test,y_test)

-0.025634680323543213

## Testing Ensemble Regressor Method

In [5]:
# Import Estimator/Model
from sklearn.ensemble import BaggingRegressor
from sklearn.neighbors import KNeighborsRegressor

# Train the model

bagging = BaggingRegressor(KNeighborsRegressor(),
                          max_samples=0.5, max_features=0.5)
bagging.fit(X_train,y_train)

# Test the model

bagging.score(X_test,y_test)

0.4640060456113786

## SVR Method with kernel - 'rbf'

In [6]:
# Imports TestTrain & Estimator/Model
from sklearn.model_selection import train_test_split
from sklearn import svm

# Split the data in X features and y target
X = housing_data.drop('Target', axis =1)
y = housing_data['Target']

# Split the date into test and train 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


# Train the model
regr = svm.SVR(kernel='rbf')
regr.fit(X_train,y_train)

# Test the model
regr.score(X_test,y_test)

-0.024809759432054213

## Lasso Method

In [7]:
# Imports TestTrain & Estimator/Model
from sklearn.linear_model import Lasso

# Train the model
regr = Lasso(alpha=0.01)
regr.fit(X_train,y_train)

# Test the model
regr.score(X_test,y_test)

0.608232562494603

## Using the Random Forest Regressor

In [15]:
# Import Model
from sklearn.ensemble import RandomForestRegressor

# Setup Random Seed
np.random.seed(42)

# Create the data
X = housing_data.drop('Target', axis=1)
y = housing_data['Target']

# Split into test and train
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)

# Create random forest model
regr = RandomForestRegressor()
regr.fit(X_train,y_train)

# Check the score
regr.score(X_test,y_test)

0.8065734772187598

## Testing a classification problem

In [21]:
# Import a dataset

from sklearn.datasets import load_iris

dataset = load_iris()
dataset.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [36]:
# Prepare data

X = pd.DataFrame(dataset['data'],columns=dataset['feature_names'])

y = pd.Series(dataset['target'])

In [37]:
# Split into train & test

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)

In [39]:
# Import the model and fit it

from sklearn.svm import SVC



clf = SVC()
clf.fit(X_train,y_train)

In [40]:
clf.score(X_test,y_test)

0.9333333333333333