# Summary

* Alternative coarse grid search
* np.logspace(-5, 2, 5) (5 values between 1e-5 and 100)
* Shows best values seem to be in the order of [1e-3, 1]

![](../img/results-2.png)

## Load Data

In [None]:
import re
import time
import sys
import warnings
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import sklearn
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.linear_model import SGDClassifier
from sklearn.utils.fixes import loguniform
from sklearn.utils import shuffle

dataframe = pd.read_csv('../URL_Classification.csv', header=None)
dataframe.columns = ["index", "url", "label"]

## Preprocessing

In [None]:
dataframe['url'] = dataframe['url'].apply(lambda x: np.str_(x))

## Prepare labels

In [None]:
labels = list(set(dataframe['label']))
labels.sort()
label2id = {l: n for n, l in enumerate(labels)}
dataframe['label'] = dataframe['label'].map(label2id)

## Transform with count vectorizer

In [None]:
count_vectorizer = CountVectorizer(analyzer='char_wb', ngram_range=(5, 5))
X = count_vectorizer.fit_transform(dataframe['url'])
y = dataframe['label'].values

X, y = shuffle(X, y)

## Optimize C parameter with GridSearchCV

In [None]:
if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore" # Also affect subprocesses


clf = sklearn.svm.LinearSVC(class_weight='balanced')

grid = {'C': np.logspace(-5, 2, 5)}
clf = GridSearchCV(clf, param_grid=grid, n_jobs=5, cv=2, verbose=1)

start = time.time()
search = clf.fit(X, y)
print(f'\nCV Time: {time.time()-start}')
search.best_params_

## Report results

In [24]:
pd.DataFrame(clf.cv_results_).sort_values('rank_test_score')

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,params,split0_test_score,split1_test_score,mean_test_score,std_test_score,rank_test_score
2,539.761316,12.990654,1.414075,0.03118,0.031623,{'C': 0.03162277660168379},0.600692,0.601333,0.601012,0.000321,1
3,1924.483413,13.34467,1.189416,0.087414,1.778279,{'C': 1.7782794100389228},0.559065,0.559444,0.559254,0.000189,2
4,2291.973409,94.544122,0.809129,0.114329,100.0,{'C': 100.0},0.526994,0.527645,0.52732,0.000326,3
1,146.29209,7.7084,1.486314,0.009537,0.000562,{'C': 0.0005623413251903491},0.518797,0.51854,0.518668,0.000129,4
0,115.905974,1.056549,1.418416,0.042932,1e-05,{'C': 1e-05},0.362953,0.36396,0.363457,0.000504,5
