In [None]:

from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.grid_search import GridSearchCV
from sklearn.svm import SVC
from sklearn.datasets import load_iris
from numpy import genfromtxt, savetxt

from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest

turbo_csv_filename = os.path.join('turbo_predict.csv')
y = genfromtxt(open('AllFeatures_Train.csv','r'), delimiter=',', usecols=0, dtype=str)[0:]
X = genfromtxt(open('AllFeatures_Train.csv','r'), delimiter=',', dtype='f8')[:,1:]


pca = PCA(n_components=2)

selection = SelectKBest(k=1)

# Building estimator from PCA and Univariate selection:

combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])

# Use combined features to transform dataset:
X_features = combined_features.fit(X, y).transform(X)
np.savetxt("transformed_dataset.csv", X_features, delimiter=",")
svm = SVC(kernel="linear")

# Do grid search over k, n_components and C:

pipeline = Pipeline([("features", combined_features), ("svm", svm)])

param_grid = dict(features__pca__n_components=[1, 2, 3],
                  features__univ_select__k=[1, 2],
                  svm__C=[0.1, 1, 10])

grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=10)
grid_search.fit(X, y)
print(grid_search.best_estimator_)

Fitting 3 folds for each of 18 candidates, totalling 54 fits
[CV] features__pca__n_components=1, svm__C=0.1, features__univ_select__k=1 
[CV]  features__pca__n_components=1, svm__C=0.1, features__univ_select__k=1, score=0.992141 -   1.2s
[CV] features__pca__n_components=1, svm__C=0.1, features__univ_select__k=1 


[Parallel(n_jobs=1)]: Done   1 tasks       | elapsed:    1.2s


[CV]  features__pca__n_components=1, svm__C=0.1, features__univ_select__k=1, score=0.994083 -   0.9s
[CV] features__pca__n_components=1, svm__C=0.1, features__univ_select__k=1 
[CV]  features__pca__n_components=1, svm__C=0.1, features__univ_select__k=1, score=0.992079 -   0.4s
[CV] features__pca__n_components=1, svm__C=1, features__univ_select__k=1 
[CV]  features__pca__n_components=1, svm__C=1, features__univ_select__k=1, score=0.992141 -   5.7s
[CV] features__pca__n_components=1, svm__C=1, features__univ_select__k=1 


[Parallel(n_jobs=1)]: Done   4 tasks       | elapsed:    8.5s


[CV]  features__pca__n_components=1, svm__C=1, features__univ_select__k=1, score=0.994083 -   0.4s
[CV] features__pca__n_components=1, svm__C=1, features__univ_select__k=1 
[CV]  features__pca__n_components=1, svm__C=1, features__univ_select__k=1, score=0.992079 -   0.3s
[CV] features__pca__n_components=1, svm__C=10, features__univ_select__k=1 
[CV]  features__pca__n_components=1, svm__C=10, features__univ_select__k=1, score=0.992141 -   4.9s
[CV] features__pca__n_components=1, svm__C=10, features__univ_select__k=1 


[Parallel(n_jobs=1)]: Done   7 tasks       | elapsed:   14.4s


[CV]  features__pca__n_components=1, svm__C=10, features__univ_select__k=1, score=0.994083 -   0.5s
[CV] features__pca__n_components=1, svm__C=10, features__univ_select__k=1 
[CV]  features__pca__n_components=1, svm__C=10, features__univ_select__k=1, score=0.992079 -   0.3s
[CV] features__pca__n_components=1, svm__C=0.1, features__univ_select__k=2 
[CV]  features__pca__n_components=1, svm__C=0.1, features__univ_select__k=2, score=0.990177 -   2.3s
[CV] features__pca__n_components=1, svm__C=0.1, features__univ_select__k=2 
[CV]  features__pca__n_components=1, svm__C=0.1, features__univ_select__k=2, score=0.988166 -   1.1s
[CV] features__pca__n_components=1, svm__C=0.1, features__univ_select__k=2 
[CV]  features__pca__n_components=1, svm__C=0.1, features__univ_select__k=2, score=0.994059 -   0.8s
[CV] features__pca__n_components=1, svm__C=1, features__univ_select__k=2 


[Parallel(n_jobs=1)]: Done  12 tasks       | elapsed:   19.9s


[CV]  features__pca__n_components=1, svm__C=1, features__univ_select__k=2, score=0.990177 -   1.7s
[CV] features__pca__n_components=1, svm__C=1, features__univ_select__k=2 
[CV]  features__pca__n_components=1, svm__C=1, features__univ_select__k=2, score=0.986193 -   1.3s
[CV] features__pca__n_components=1, svm__C=1, features__univ_select__k=2 
[CV]  features__pca__n_components=1, svm__C=1, features__univ_select__k=2, score=0.994059 -   0.8s
[CV] features__pca__n_components=1, svm__C=10, features__univ_select__k=2 
[CV]  features__pca__n_components=1, svm__C=10, features__univ_select__k=2, score=0.990177 -   1.6s
[CV] features__pca__n_components=1, svm__C=10, features__univ_select__k=2 
[CV]  features__pca__n_components=1, svm__C=10, features__univ_select__k=2, score=0.986193 -   1.2s
[CV] features__pca__n_components=1, svm__C=10, features__univ_select__k=2 


[Parallel(n_jobs=1)]: Done  17 tasks       | elapsed:   26.9s


[CV]  features__pca__n_components=1, svm__C=10, features__univ_select__k=2, score=0.994059 -   0.8s
[CV] features__pca__n_components=2, svm__C=0.1, features__univ_select__k=1 
[CV]  features__pca__n_components=2, svm__C=0.1, features__univ_select__k=1, score=0.986248 -   0.5s
[CV] features__pca__n_components=2, svm__C=0.1, features__univ_select__k=1 
[CV]  features__pca__n_components=2, svm__C=0.1, features__univ_select__k=1, score=0.994083 -   0.5s
[CV] features__pca__n_components=2, svm__C=0.1, features__univ_select__k=1 
[CV]  features__pca__n_components=2, svm__C=0.1, features__univ_select__k=1, score=0.998020 -   0.3s
[CV] features__pca__n_components=2, svm__C=1, features__univ_select__k=1 
[CV]  features__pca__n_components=2, svm__C=1, features__univ_select__k=1, score=0.988212 -   0.4s
[CV] features__pca__n_components=2, svm__C=1, features__univ_select__k=1 
[CV]  features__pca__n_components=2, svm__C=1, features__univ_select__k=1, score=0.994083 -   0.4s
[CV] features__pca__n_c

[Parallel(n_jobs=1)]: Done  24 tasks       | elapsed:   31.2s


[CV]  features__pca__n_components=2, svm__C=10, features__univ_select__k=1, score=0.988212 -   0.4s
[CV] features__pca__n_components=2, svm__C=10, features__univ_select__k=1 
[CV]  features__pca__n_components=2, svm__C=10, features__univ_select__k=1, score=0.994083 -   0.9s
[CV] features__pca__n_components=2, svm__C=10, features__univ_select__k=1 
[CV]  features__pca__n_components=2, svm__C=10, features__univ_select__k=1, score=0.998020 -   0.7s
[CV] features__pca__n_components=2, svm__C=0.1, features__univ_select__k=2 
[CV]  features__pca__n_components=2, svm__C=0.1, features__univ_select__k=2, score=0.988212 -   6.7s
[CV] features__pca__n_components=2, svm__C=0.1, features__univ_select__k=2 
[CV]  features__pca__n_components=2, svm__C=0.1, features__univ_select__k=2, score=0.986193 -   0.4s
[CV] features__pca__n_components=2, svm__C=0.1, features__univ_select__k=2 
[CV]  features__pca__n_components=2, svm__C=0.1, features__univ_select__k=2, score=0.994059 -   0.8s
[CV] features__pca_

[Parallel(n_jobs=1)]: Done  31 tasks       | elapsed:   42.7s


[CV]  features__pca__n_components=2, svm__C=1, features__univ_select__k=2, score=0.986193 -  16.2s
[CV] features__pca__n_components=2, svm__C=1, features__univ_select__k=2 
[CV]  features__pca__n_components=2, svm__C=1, features__univ_select__k=2, score=0.994059 -   1.1s
[CV] features__pca__n_components=2, svm__C=10, features__univ_select__k=2 
[CV]  features__pca__n_components=2, svm__C=10, features__univ_select__k=2, score=0.988212 -   0.9s
[CV] features__pca__n_components=2, svm__C=10, features__univ_select__k=2 
[CV]  features__pca__n_components=2, svm__C=10, features__univ_select__k=2, score=0.986193 -  16.2s
[CV] features__pca__n_components=2, svm__C=10, features__univ_select__k=2 
[CV]  features__pca__n_components=2, svm__C=10, features__univ_select__k=2, score=0.994059 -   1.1s
[CV] features__pca__n_components=3, svm__C=0.1, features__univ_select__k=1 
[CV]  features__pca__n_components=3, svm__C=0.1, features__univ_select__k=1, score=0.988212 -   0.5s
[CV] features__pca__n_comp

[Parallel(n_jobs=1)]: Done  40 tasks       | elapsed:  1.3min


[CV]  features__pca__n_components=3, svm__C=1, features__univ_select__k=1, score=0.986193 -   8.7s
[CV] features__pca__n_components=3, svm__C=1, features__univ_select__k=1 
[CV]  features__pca__n_components=3, svm__C=1, features__univ_select__k=1, score=0.996040 -   0.2s
[CV] features__pca__n_components=3, svm__C=10, features__univ_select__k=1 
[CV]  features__pca__n_components=3, svm__C=10, features__univ_select__k=1, score=0.988212 -   0.3s
[CV] features__pca__n_components=3, svm__C=10, features__univ_select__k=1 
[CV]  features__pca__n_components=3, svm__C=10, features__univ_select__k=1, score=0.986193 -   8.7s
[CV] features__pca__n_components=3, svm__C=10, features__univ_select__k=1 
[CV]  features__pca__n_components=3, svm__C=10, features__univ_select__k=1, score=0.996040 -   0.2s
[CV] features__pca__n_components=3, svm__C=0.1, features__univ_select__k=2 
[CV]  features__pca__n_components=3, svm__C=0.1, features__univ_select__k=2, score=0.982318 -   0.2s
[CV] features__pca__n_comp

[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed:  1.7min


In [None]:
from sklearn.datasets import load_boston
from sklearn.ensemble import RandomForestRegressor
import numpy as np
#Load boston housing dataset as an example
Y = genfromtxt(open('AllFeatures_Train.csv','r'), delimiter=',', usecols=0, dtype=str)[0:]
X = genfromtxt(open('AllFeatures_Train.csv','r'), delimiter=',', dtype='f8')[:,1:]
names = X[0]
rf = RandomForestRegressor()
rf.fit(X, Y)
print "Features sorted by their score:"
print sorted(zip(map(lambda x: round(x, 4), rf.feature_importances_), names), 
             reverse=True)