# Classification - How does a school's environment affect students' Math and ELA Assessment Scores?

Predict whether a school will not meet (1), approach (2), meet (3), or exceed (4) target

## Data Preprocessing

**Import Packages**

In [1]:
import pandas as pd
import numpy as np

# libraries for cleaning and preprocessing data
from sklearn.preprocessing import StandardScaler

# libraries for modeling
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
import lightgbm as lgb

# libraries for evaluating models
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve, roc_auc_score

# libraries for visualizations
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', message='numpy.dtype size changed')

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


**Read Data**

In [2]:
nyc_schools = pd.read_csv('Data/nyc_schools.csv')
nyc_schools.head()

Unnamed: 0,DBN,School Name,Enrollment,Supportive Environment Rating,Student Achievement Rating,Interesting and challenging curriculum,Effective teaching and learning,Effective school assessment,Clear communication - high expectations,Teacher collaboration,...,Student Attendance Rate,Percent of Students Chronically Absent,Teacher Attendance Rate,Collaborative Teachers Score,Effective School Leadership Score,Rigorous Instruction Score,Strong Family-Community Ties Score,Trust Score,Borough,District
0,01M015,P.S. 015 Roberto Clemente,173,4,4,4,3,3,3,3,...,0.939,0.186,0.985,4.12,4.09,3.91,3.8,3.99,Manhattan,01M
1,01M019,P.S. 019 Asher Levy,244,3,4,4,4,4,4,4,...,0.914,0.353,0.968,4.27,4.25,4.64,4.1,3.55,Manhattan,01M
2,01M020,P.S. 020 Anna Silver,463,2,2,2,2,2,3,2,...,0.93,0.274,0.967,2.76,3.04,1.94,3.64,3.12,Manhattan,01M
3,01M034,P.S. 034 Franklin D. Roosevelt,320,2,2,2,2,2,3,3,...,0.901,0.391,0.976,2.4,2.45,2.04,3.33,2.32,Manhattan,01M
4,01M063,The STAR Academy - P.S.63,177,3,4,4,3,4,3,4,...,0.927,0.261,0.98,4.53,4.07,4.14,4.01,3.95,Manhattan,01M


In [4]:
nyc_schools['District'] = nyc_schools['District'].apply(lambda x:
                                                       841 if x == '84M'
                                                       else (842 if x == '84X'
                                                            else (843 if x == '84K'
                                                                 else(844 if x == '84Q'
                                                                     else (845 if x == '84R'
                                                                          else x[:2])))))

**Define features and target**

In [8]:
X = nyc_schools.drop('Student Achievement Rating',
                    axis=1).iloc[:, 2:]
y = nyc_schools['Student Achievement Rating']

**Encode categorical variables**

In [9]:
X = pd.get_dummies(X, drop_first=True)

**Train-test split**

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=220)

**Standardize variables**

In [11]:
scale = StandardScaler()
X_train_scale = pd.DataFrame(scale.fit_transform(X_train),
                             columns=X.columns)
X_test_scale = pd.DataFrame(scale.transform(X_test),
                            columns=X.columns)

## Modeling

### Random Forest

In [16]:
# Vanilla Model
rf_v = RandomForestClassifier(random_state=220)

rf_v.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=220,
                       verbose=0, warm_start=False)

In [17]:
rf_v.score(X_test, y_test)

0.51171875

**Tuning hyperparameters**

In [23]:
# parameter grid
rf_grid = {'n_estimators': [120, 300, 500],
           'min_samples_split': [2, 3, 4],
           'min_samples_leaf': [1, 2, 3],
           'max_features': ['log2', 'auto']}

In [204]:
rf_gs = GridSearchCV(estimator=RandomForestClassifier(random_state=220),
                     param_grid=rf_grid,
                     scoring='f1_micro',
                     cv=5, verbose=1, n_jobs=-1)

rf_gs.fit(X_train, y_train)

Fitting 5 folds for each of 54 candidates, totalling 270 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    5.7s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   29.0s
[Parallel(n_jobs=-1)]: Done 270 out of 270 | elapsed:   42.6s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=RandomForestClassifier(bootstrap=True, class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators='warn', n_jobs=None,
                                              oob_score=False, random_state=220,
                                              verbose=0, warm_start=False),
             iid='warn', n_jobs=-1,
             param_grid={'m

In [205]:
print('Best params:', rf_gs.best_params_)
print('Train score: %.3f' % rf_gs.best_score_)
print('Test score: %.3f' % rf_gs.score(X_test, y_test))

Best params: {'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300}
Train score: 0.534
Test score: 0.590


### LightGBM

In [26]:
# Vanilla Model
lgb_v = lgb.LGBMClassifier(random_state=220)

lgb_v.fit(X_train, y_train)

LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
               importance_type='split', learning_rate=0.1, max_depth=-1,
               min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
               n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
               random_state=220, reg_alpha=0.0, reg_lambda=0.0, silent=True,
               subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [27]:
rf_v.score(X_test, y_test)

0.51171875

**Tuning hyperparameters**

In [38]:
# parameter grid
lgb_grid = {'learning_rate': [0.025, 0.015],
            'max_depth': [15, 17, 20],
            'min_child_weight': [7, 8, 9],
            'subsample': [0.1, 0.3, 0.5]}

In [206]:
lgb_gs = GridSearchCV(estimator=lgb.LGBMClassifier(random_state=220),
                      param_grid=lgb_grid,
                      scoring='f1_micro',
                      cv=5, verbose=1, n_jobs=-1)

lgb_gs.fit(X_train, y_train)

Fitting 5 folds for each of 54 candidates, totalling 270 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    5.5s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   24.7s
[Parallel(n_jobs=-1)]: Done 270 out of 270 | elapsed:   36.1s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=LGBMClassifier(boosting_type='gbdt', class_weight=None,
                                      colsample_bytree=1.0,
                                      importance_type='split',
                                      learning_rate=0.1, max_depth=-1,
                                      min_child_samples=20,
                                      min_child_weight=0.001,
                                      min_split_gain=0.0, n_estimators=100,
                                      n_jobs=-1, num_leaves=31, objective=None,
                                      random_state=220, reg_alpha=0.0,
                                      reg_lambda=0.0, silent=True,
                                      subsample=1.0, subsample_for_bin=200000,
                                      subsample_freq=0),
             iid='warn', n_jobs=-1,
             param_grid={'learning_rate': [0.025, 0.015],
                         'max

In [207]:
print('Best params:', lgb_gs.best_params_)
print('Train score: %.3f' % lgb_gs.best_score_)
print('Test score: %.3f' % lgb_gs.score(X_test, y_test))

Best params: {'learning_rate': 0.025, 'max_depth': 17, 'min_child_weight': 8, 'subsample': 0.1}
Train score: 0.529
Test score: 0.562


### Logistic Regression

In [41]:
# Vanilla Model
log_v = LogisticRegression(random_state=220)

log_v.fit(X_train_scale, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=220, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [42]:
log_v.score(X_test_scale, y_test)

0.57421875

**Tuning hyperparameters**

In [218]:
# parameter grid
log_grid = {'penalty': ['l1', 'l2'],
           'C': [0.001, 0.01, 1]}

In [219]:
log_gs = GridSearchCV(estimator=LogisticRegression(random_state=220),
                      param_grid=log_grid,
                      scoring='f1_micro',
                      cv=5, verbose=1, n_jobs=-1)

log_gs.fit(X_train_scale, y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  15 out of  30 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    0.9s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=100, multi_class='warn',
                                          n_jobs=None, penalty='l2',
                                          random_state=220, solver='warn',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='warn', n_jobs=-1,
             param_grid={'C': [0.001, 0.01, 1], 'penalty': ['l1', 'l2']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='f1_micro', verbose=1)

In [220]:
print('Best params:', log_gs.best_params_)
print('Train score: %.3f' % log_gs.best_score_)
print('Test score: %.3f' % log_gs.score(X_test_scale, y_test))

Best params: {'C': 0.01, 'penalty': 'l2'}
Train score: 0.518
Test score: 0.590


### KNN

In [60]:
# Vanilla Model
knn_v = KNeighborsClassifier()

knn_v.fit(X_train_scale, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

In [61]:
knn_v.score(X_test_scale, y_test)

0.453125

**Tuning hyperparameters**

In [75]:
# parameter grid
knn_grid = {'n_neighbors': [34, 36, 38],
           'p': [2, 3]}

In [210]:
knn_gs = GridSearchCV(estimator=KNeighborsClassifier(),
                     param_grid=knn_grid,
                     scoring='f1_micro',
                     cv=5, verbose=1, n_jobs=-1)

knn_gs.fit(X_train_scale, y_train)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  15 out of  30 | elapsed:    0.4s remaining:    0.4s
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:    1.1s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             iid='warn', n_jobs=-1,
             param_grid={'n_neighbors': [34, 36, 38], 'p': [2, 3]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='f1_micro', verbose=1)

In [211]:
print('Best params:', knn_gs.best_params_)
print('Train score: %.3f' % knn_gs.best_score_)
print('Test score: %.3f' % knn_gs.score(X_test_scale, y_test))

Best params: {'n_neighbors': 36, 'p': 2}
Train score: 0.504
Test score: 0.531


### SVM

In [12]:
# Vanilla Model
svm_v = SVC(kernel='linear', random_state=220)

svm_v.fit(X_train_scale, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='linear', max_iter=-1, probability=False, random_state=220,
    shrinking=True, tol=0.001, verbose=False)

In [13]:
svm_v.score(X_test_scale, y_test)

0.58203125

**Tuning hyperparameters**

In [14]:
# parameter grid
svm_grid = {'C': [0.01, 0.1, 0.5],
           'class_weight': ['balanced', None],
           'kernel': ['rbf', 'linear', 'poly']}

In [15]:
svm_gs = GridSearchCV(estimator=SVC(random_state=220),
                     param_grid=svm_grid,
                     scoring='f1_micro',
                     cv=5, verbose=1, n_jobs=-1)

svm_gs.fit(X_train_scale, y_train)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    3.0s
[Parallel(n_jobs=-1)]: Done  90 out of  90 | elapsed:    4.0s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                           probability=False, random_state=220, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=-1,
             param_grid={'C': [0.01, 0.1, 0.5],
                         'class_weight': ['balanced', None],
                         'kernel': ['rbf', 'linear', 'poly']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='f1_micro', verbose=1)

In [16]:
print('Best params:', svm_gs.best_params_)
print('Train score: %.3f' % svm_gs.best_score_)
print('Test score: %.3f' % svm_gs.score(X_test_scale, y_test))

Best params: {'C': 0.5, 'class_weight': None, 'kernel': 'linear'}
Train score: 0.517
Test score: 0.594


## Modeling with balanced classes

In [99]:
from sklearn.utils import resample
from imblearn.over_sampling import SMOTE

### Undersampling

In [121]:
nyc_schools['Student Achievement Rating'].value_counts()

3    531
2    358
4    347
1     40
Name: Student Achievement Rating, dtype: int64

In [108]:
undersampled = resample(nyc_schools[nyc_schools['Student Achievement Rating']
                                    != 3],
                        replace=False,  # sample without replacement
                        # match number in majority class
                        n_samples=sum(nyc_schools['Student Achievement Rating']
                                      == 3),
                        random_state=220)  # reproducible results
output_df = pd.concat([nyc_schools.loc[nyc_schools['Student Achievement Rating']
                                       == 3],
                       undersampled])

In [109]:
output_df['Student Achievement Rating'].value_counts()

3    531
2    256
4    242
1     33
Name: Student Achievement Rating, dtype: int64

In [156]:
X_u = output_df.drop(columns='Student Achievement Rating',
                          axis=1)
y_u = output_df['Student Achievement Rating']

In [157]:
X_u = pd.get_dummies(X_u, drop_first=True)

In [163]:
X_train_u, X_test_u, y_train_u, y_test_u = train_test_split(X_u, y_u, test_size=0.2,
                                                            random_state=220)

In [165]:
X_train_scale = pd.DataFrame(scale.fit_transform(X_train_u),
                             columns=X_u.columns)
X_test_scale = pd.DataFrame(scale.transform(X_test_u),
                            columns=X_u.columns)

### SVM

In [166]:
# Vanilla Model
svm_v = SVC(kernel='linear', random_state=220)

svm_v.fit(X_train_scale, y_train_u)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='linear', max_iter=-1, probability=False, random_state=220,
    shrinking=True, tol=0.001, verbose=False)

In [167]:
svm_v.score(X_test_scale, y_test_u)

0.49295774647887325

**Tuning hyperparameters**

In [171]:
# parameter grid
svm_grid = {'C': [0.001, 0.005, 0.01],
           'class_weight': ['balanced', None]}

In [172]:
svm_gs = GridSearchCV(estimator=SVC(kernel='linear', random_state=220),
                     param_grid=svm_grid,
                     scoring='recall_micro',
                     cv=5, verbose=1, n_jobs=-1)

svm_gs.fit(X_train_scale, y_train_u)

Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:   19.5s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='linear',
                           max_iter=-1, probability=False, random_state=220,
                           shrinking=True, tol=0.001, verbose=False),
             iid='warn', n_jobs=-1,
             param_grid={'C': [0.001, 0.01, 0.1],
                         'class_weight': ['balanced', None]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='recall_micro', verbose=1)

In [173]:
print('Best params:', svm_gs.best_params_)
print('Train score: %.3f' % svm_gs.best_score_)
print('Test score: %.3f' % svm_gs.score(X_test_scale, y_test_u))

Best params: {'C': 0.001, 'class_weight': 'balanced'}
Train score: 0.535
Test score: 0.516


### Oversampling

In [174]:
oversampled = resample(nyc_schools[nyc_schools['Student Achievement Rating']
                                   != 1],
                       replace=True,  # sample without replacement
                       # match number in majority class
                       n_samples=sum(nyc_schools['Student Achievement Rating']
                                      == 1),
                       random_state=220)  # reproducible results
output_df_over = pd.concat([nyc_schools.loc[nyc_schools['Student Achievement Rating']
                                            == 1],
                            oversampled])
X_train_o, y_train_o = output_df_over.drop(columns='Student Achievement Rating',
                                           axis=1
                                           ), output_df['Student Achievement Rating']

In [175]:
output_df_over['Student Achievement Rating'].value_counts()

1    40
3    17
4    14
2     9
Name: Student Achievement Rating, dtype: int64

In [176]:
X_o = output_df.drop(columns='Student Achievement Rating',
                          axis=1)
y_o = output_df['Student Achievement Rating']

In [177]:
X_o = pd.get_dummies(X_u, drop_first=True)

In [178]:
X_train_o, X_test_o, y_train_o, y_test_o = train_test_split(X_o, y_o, test_size=0.2,
                                                            random_state=220)

In [179]:
X_train_scale = pd.DataFrame(scale.fit_transform(X_train_o),
                             columns=X_u.columns)
X_test_scale = pd.DataFrame(scale.transform(X_test_o),
                            columns=X_u.columns)

### SVM

In [180]:
# Vanilla Model
svm_v = SVC(kernel='linear', random_state=220)

svm_v.fit(X_train_scale, y_train_o)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='linear', max_iter=-1, probability=False, random_state=220,
    shrinking=True, tol=0.001, verbose=False)

In [181]:
svm_v.score(X_test_scale, y_test_o)

0.49295774647887325

**Tuning hyperparameters**

In [191]:
# parameter grid
svm_grid = {'C': [0.001, 0.005, 0.01],
           'class_weight': ['balanced', None]}

In [192]:
svm_gs = GridSearchCV(estimator=SVC(kernel='linear', random_state=220),
                     param_grid=svm_grid,
                     scoring='recall_micro',
                     cv=5, verbose=1, n_jobs=-1)

svm_gs.fit(X_train_scale, y_train_o)

Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:   19.9s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='linear',
                           max_iter=-1, probability=False, random_state=220,
                           shrinking=True, tol=0.001, verbose=False),
             iid='warn', n_jobs=-1,
             param_grid={'C': [0.001, 0.005, 0.01],
                         'class_weight': ['balanced', None]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='recall_micro', verbose=1)

In [193]:
print('Best params:', svm_gs.best_params_)
print('Train score: %.3f' % svm_gs.best_score_)
print('Test score: %.3f' % svm_gs.score(X_test_scale, y_test_o))

Best params: {'C': 0.001, 'class_weight': 'balanced'}
Train score: 0.535
Test score: 0.516


### SMOTE

In [139]:
sm_model = SMOTE(sampling_strategy='minority',
                 random_state=220)
X_train_s, y_train_s = sm_model.fit_sample(X_train_scale, y_train)

### SVM

In [140]:
# Vanilla Model
svm_v = SVC(kernel='linear', random_state=220)

svm_v.fit(X_train_s, y_train_s)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='linear', max_iter=-1, probability=False, random_state=220,
    shrinking=True, tol=0.001, verbose=False)

In [141]:
svm_v.score(X_test_scale, y_test)

0.5390625

**Tuning hyperparameters**

In [153]:
# parameter grid
svm_grid = {'C': [5, 10, 20],
           'class_weight': ['balanced', None]}

In [154]:
svm_gs = GridSearchCV(estimator=SVC(kernel='linear', random_state=220),
                     param_grid=svm_grid,
                     scoring='recall_micro',
                     cv=5, verbose=1, n_jobs=-1)

svm_gs.fit(X_train_s, y_train_s)

Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:   27.9s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='linear',
                           max_iter=-1, probability=False, random_state=220,
                           shrinking=True, tol=0.001, verbose=False),
             iid='warn', n_jobs=-1,
             param_grid={'C': [5, 10, 20], 'class_weight': ['balanced', None]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='recall_micro', verbose=1)

In [155]:
print('Best params:', svm_gs.best_params_)
print('Train score: %.3f' % svm_gs.best_score_)
print('Test score: %.3f' % svm_gs.score(X_test_scale, y_test))

Best params: {'C': 10, 'class_weight': None}
Train score: 0.641
Test score: 0.547


## Findings

In [17]:
svm_gs.best_estimator_.coef_[0]

array([-4.73018381e-01, -1.45318580e-01, -8.96877068e-02, -3.02415008e-01,
        1.49335269e-01,  8.65988148e-02,  3.30389850e-01,  3.28626443e-02,
       -2.30741517e-01, -5.40368810e-01,  1.98860590e-01, -4.92866396e-03,
       -2.64893857e-01, -3.79053251e-01,  2.74759351e-02,  5.79614319e-02,
        2.39510239e-01, -4.56401952e-01,  2.35774056e-01, -2.43315519e-01,
       -2.40624743e-01, -4.48404243e-02, -2.72438432e-01,  6.16450047e-02,
       -6.17384178e-01, -2.62124816e-01, -4.17254457e-02,  3.04247340e-01,
        1.45005506e-01, -2.17288080e-01, -2.06371642e-01,  3.81384079e-02,
       -1.23587407e-01,  5.25391689e-01, -1.09413456e-01, -1.45626836e-01,
        8.88178420e-16,  7.58013985e-02, -2.22044605e-16,  0.00000000e+00,
       -1.38576378e-01, -3.63122491e-01, -3.76021548e-01, -2.96599585e-01,
       -2.38613965e-01, -1.49179300e-01, -5.41401322e-02, -2.70765913e-01,
       -2.09949002e-02, -2.53765538e-02,  8.25403558e-02, -1.28937212e-01,
       -3.17293104e-03,  

In [18]:
# create a DataFrame of the features and coefficients 
svm_feat_imp = pd.DataFrame(zip(X_train_scale.columns,
                                svm_gs.best_estimator_.coef_[0],
                                np.abs(svm_gs.best_estimator_.coef_[0])),
                            columns=['Feature', 'Coefficient',
                                     'Absolute Coefficient'])

# sort values by the absolute coefficient
svm_feat_imp.sort_values(by=['Absolute Coefficient'],
                         ascending=False, inplace=True)

svm_feat_imp.head(10)

Unnamed: 0,Feature,Coefficient,Absolute Coefficient
24,Student Attendance Rate,-0.617384,0.617384
9,"Identifying, tracking, and meeting goals",-0.540369,0.540369
33,Borough_Manhattan,0.525392,0.525392
0,Enrollment,-0.473018,0.473018
17,Percent HRA Eligible,-0.456402,0.456402
13,Percent Students with Disabilities,-0.379053,0.379053
42,District_03,-0.376022,0.376022
41,District_02,-0.363122,0.363122
6,Teacher collaboration,0.33039,0.33039
27,Collaborative Teachers Score,0.304247,0.304247
