## Mathews notebook for Machine Learning SVC modeling

### Load All Necessary Model Modules

In [1]:
# Load Starter Modules

# Python package for plotting charts
import matplotlib.pyplot as plt
# Python package for dataframe creation and manipulation
import pandas as pd
# Python package for low level math functions
import numpy as np
# Python package for third party operating systems
import os

In [2]:
# Load Random Forest Modules

# Python package for statistical modeling of RandomForests Grid Search
from sklearn.datasets import make_classification
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV
from sklearn.ensemble import RandomForestClassifier

In [3]:
# Load SVC Modules

# Python package for statistical modeling: support vector classifier
from sklearn.svm import SVC 
# Python package for statistical modeling: 
from sklearn.preprocessing import StandardScaler, MinMaxScaler
# Python package for statistical modeling: split dataset
from sklearn.model_selection import train_test_split
# Python package for statistical modeling: print report
from sklearn.metrics import classification_report

### Create Dataset

In [4]:
# Read File

honeybee_csv = pd.read_csv(os.path.join('Resources', 'Data', 'growth_designation.csv'))
honeybee_csv.head()

Unnamed: 0,Region,FIPS,ColonyCount,ColonyGrowth,GrowthOutcome,TotalProduction,YieldPerColony,PricePerLB,ProductionValue,Stocks,YearRecorded,ClothianidinLB,ImidaclopridLB,ThiamethoxamLB,AcetamipridLB,ThiaclopridLB,CombinedNeonicLB
0,3,1,15000,-6.25,0.0,960000,64,0.87,835000,96000,1996,0.0,819.24,0.0,0.0,0.0,819.24
1,3,1,14000,-6.67,0.0,924000,66,0.81,748000,92000,1997,0.0,14781.55,0.0,0.0,0.0,14781.55
2,3,1,16000,14.29,1.0,1136000,71,0.72,818000,159000,1998,0.0,4048.35,0.0,0.0,0.0,4048.35
3,3,1,17000,6.25,1.0,1156000,68,0.56,647000,185000,1999,0.0,2758.42,0.0,0.0,0.0,2758.42
4,3,1,16000,-5.88,0.0,1248000,78,0.59,736000,187000,2000,0.0,3305.17,0.0,0.0,0.0,3305.17


In [5]:
# Drop Columns

honeybee_csv = honeybee_csv.drop(columns=['ColonyGrowth', 'YearRecorded', 'Region', 'FIPS'])

In [6]:
# Target Outcome Column

target = honeybee_csv['GrowthOutcome']
target_names = ['Growth', 'No-Growth']

In [7]:
# Drop Column

honeybee_data = honeybee_csv.drop(['GrowthOutcome'], axis=1)
honeybee_names = honeybee_data.columns
honeybee_data.head()

Unnamed: 0,ColonyCount,TotalProduction,YieldPerColony,PricePerLB,ProductionValue,Stocks,ClothianidinLB,ImidaclopridLB,ThiamethoxamLB,AcetamipridLB,ThiaclopridLB,CombinedNeonicLB
0,15000,960000,64,0.87,835000,96000,0.0,819.24,0.0,0.0,0.0,819.24
1,14000,924000,66,0.81,748000,92000,0.0,14781.55,0.0,0.0,0.0,14781.55
2,16000,1136000,71,0.72,818000,159000,0.0,4048.35,0.0,0.0,0.0,4048.35
3,17000,1156000,68,0.56,647000,185000,0.0,2758.42,0.0,0.0,0.0,2758.42
4,16000,1248000,78,0.59,736000,187000,0.0,3305.17,0.0,0.0,0.0,3305.17


In [8]:
# Split Data

x_train, x_test, y_train, y_test = train_test_split(honeybee_data, target, random_state=1)

### Perform Grid Search

In [9]:
# Create Grid Search

param_grid = {'max_depth': [3, 5, 10],
              'min_samples_split': [2, 5, 10]}
base_estimator = RandomForestClassifier(random_state=0)
x, y = make_classification(n_samples=1000, random_state=0)
sh = HalvingGridSearchCV(base_estimator, param_grid, cv=5,
                         factor=2, resource='n_estimators',
                        max_resources=30).fit(x, y)
sh.best_estimator_
RandomForestClassifier(max_depth=5, n_estimators=800, random_state=0)

RandomForestClassifier(max_depth=5, n_estimators=800, random_state=0)

### Create Random Forest

In [10]:
# Create RandomForest Classifier

kepler_186f_RFClassifier = RandomForestClassifier(max_depth=5, n_estimators=24, random_state=0)
# Fit the RandomForestClassifier to train data
kepler_186f_RFClassifier = kepler_186f_RFClassifier.fit(x_train, y_train)
# Score the RandomForestClassifier with test data
print(kepler_186f_RFClassifier.score(x_test, y_test))
sorted(zip(kepler_186f_RFClassifier.feature_importances_, honeybee_names), reverse=True)

0.6383928571428571


[(0.13506942629527685, 'ProductionValue'),
 (0.1293348610954676, 'ColonyCount'),
 (0.11959049754245596, 'TotalProduction'),
 (0.10503215132491568, 'ImidaclopridLB'),
 (0.09825509207201129, 'YieldPerColony'),
 (0.09762632018383137, 'PricePerLB'),
 (0.09355024536478085, 'Stocks'),
 (0.06944012096727122, 'ThiamethoxamLB'),
 (0.06782750821269004, 'CombinedNeonicLB'),
 (0.03763874372935174, 'ClothianidinLB'),
 (0.03213234856403119, 'AcetamipridLB'),
 (0.014502684647916072, 'ThiaclopridLB')]

### Build SVC Model 1

In [11]:
# Create SVC Classifier

honeybee_svc_model = SVC(kernel='rbf', cache_size=5000)
#.fit(x_train, y_train)

In [12]:
# Fit Model

honeybee_svc_model.fit(x_train, y_train)

SVC(cache_size=5000)

In [13]:
# Scale Data

scaler = StandardScaler().fit(x_train)
x_train_scaled = scaler.transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [14]:
# SVC Model Accuracy

print(f'Test accuracy: %.3f' % honeybee_svc_model.score(x_test, y_test))

Test accuracy: 0.647


In [15]:
# Calculate Report

honeybee_prediction = honeybee_svc_model.predict(x_test)
print(classification_report(y_test, honeybee_prediction,
                            target_names = target_names))

              precision    recall  f1-score   support

      Growth       0.67      0.93      0.78       149
   No-Growth       0.38      0.08      0.13        75

    accuracy                           0.65       224
   macro avg       0.52      0.51      0.46       224
weighted avg       0.57      0.65      0.56       224



### Build SVC Model 2

In [16]:
# Keep Only

honeybee_data2 = honeybee_csv[['ProductionValue', 'TotalProduction', 'ImidaclopridLB','Stocks','PricePerLB']]
honeybee_names = honeybee_data.columns
honeybee_data2.head()

Unnamed: 0,ProductionValue,TotalProduction,ImidaclopridLB,Stocks,PricePerLB
0,835000,960000,819.24,96000,0.87
1,748000,924000,14781.55,92000,0.81
2,818000,1136000,4048.35,159000,0.72
3,647000,1156000,2758.42,185000,0.56
4,736000,1248000,3305.17,187000,0.59


In [17]:
# Split Data

x_train, x_test, y_train, y_test = train_test_split(honeybee_data2, target, random_state=1)

In [18]:
# Create SVC Classifier

honeybee_svc_model2 = SVC(kernel='rbf', cache_size=5000).fit(x_train, y_train)

In [19]:
# Scale Data

scaler = StandardScaler().fit(x_train)
x_train_scaled = scaler.transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [20]:
# SVC Model Accuracy

print(f'Test accuracy: %.3f' % honeybee_svc_model2.score(x_test, y_test))

Test accuracy: 0.652


In [21]:
# Calculate Report

honeybee_prediction = honeybee_svc_model2.predict(x_test)
print(classification_report(y_test, honeybee_prediction,
                            target_names = target_names))

              precision    recall  f1-score   support

      Growth       0.67      0.93      0.78       149
   No-Growth       0.41      0.09      0.15        75

    accuracy                           0.65       224
   macro avg       0.54      0.51      0.47       224
weighted avg       0.58      0.65      0.57       224



### Build SVC Model 3

In [22]:
# Keep Only

honeybee_data3 = honeybee_csv[['ClothianidinLB', 'ImidaclopridLB', 'ThiamethoxamLB','AcetamipridLB','ThiaclopridLB']]
honeybee_names = honeybee_data.columns
honeybee_data2.head()

Unnamed: 0,ProductionValue,TotalProduction,ImidaclopridLB,Stocks,PricePerLB
0,835000,960000,819.24,96000,0.87
1,748000,924000,14781.55,92000,0.81
2,818000,1136000,4048.35,159000,0.72
3,647000,1156000,2758.42,185000,0.56
4,736000,1248000,3305.17,187000,0.59


In [23]:
# Split Data

x_train, x_test, y_train, y_test = train_test_split(honeybee_data3, target, random_state=1)

In [24]:
# Create SVC Classifier

honeybee_svc_model3 = SVC(kernel='rbf', cache_size=5000).fit(x_train, y_train)

In [25]:
# Scale Data

scaler = StandardScaler().fit(x_train)
x_train_scaled = scaler.transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [26]:
# SVC Model Accuracy

print(f'Test accuracy: %.3f' % honeybee_svc_model3.score(x_test, y_test))

Test accuracy: 0.661


In [27]:
# Calculate Report

honeybee_prediction = honeybee_svc_model3.predict(x_test)
print(classification_report(y_test, honeybee_prediction,
                            target_names = target_names))

              precision    recall  f1-score   support

      Growth       0.67      0.96      0.79       149
   No-Growth       0.45      0.07      0.12        75

    accuracy                           0.66       224
   macro avg       0.56      0.51      0.45       224
weighted avg       0.60      0.66      0.56       224



In [None]:
### Build SVC Model 4