## Mathews notebook for Machine Learning SVC modeling

In [1]:
# Load Modules

# Python package for plotting charts
import matplotlib.pyplot as plt
# Python package for dataframe creation and manipulation
import pandas as pd
# Python package for low level math functions
import numpy as np
# Python package for third party operating systems
import os
# Python package for statistical modeling: split dataset
from sklearn.model_selection import train_test_split
# Python package for statistical modeling: support vector classifier
from sklearn.svm import SVC 
# Python package for statistical modeling: 
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler

In [2]:
# Read File

honeybee_csv = pd.read_csv(os.path.join('Resources', 'Output', 'HoneybeeDatasetSVCModel.csv'))
honeybee_csv.head()

Unnamed: 0,Region,FIPS,ColonyCount,ColonyGrowth,GrowthOutcome,TotalProduction,YieldPerColony,PricePerLB,ProductionValue,Stocks,YearRecorded,ClothianidinLB,ImidaclopridLB,ThiamethoxamLB,AcetamipridLB,ThiaclopridLB,CombinedNeonicLB
0,3,1,16000,,,928000,58,0.69,640000,28000,1995,0.0,1579.61,0.0,0.0,0.0,1579.61
1,3,1,15000,-6.25,0.0,960000,64,0.87,835000,96000,1996,0.0,819.24,0.0,0.0,0.0,819.24
2,3,1,14000,-6.67,0.0,924000,66,0.81,748000,92000,1997,0.0,14781.55,0.0,0.0,0.0,14781.55
3,3,1,16000,14.29,1.0,1136000,71,0.72,818000,159000,1998,0.0,4048.35,0.0,0.0,0.0,4048.35
4,3,1,17000,6.25,1.0,1156000,68,0.56,647000,185000,1999,0.0,2758.42,0.0,0.0,0.0,2758.42


In [3]:
# Drop NaN Rows

honeybee_csv = honeybee_csv.apply(pd.to_numeric, errors='coerce')
honeybee_csv = honeybee_csv.dropna()
honeybee_csv.head()

Unnamed: 0,Region,FIPS,ColonyCount,ColonyGrowth,GrowthOutcome,TotalProduction,YieldPerColony,PricePerLB,ProductionValue,Stocks,YearRecorded,ClothianidinLB,ImidaclopridLB,ThiamethoxamLB,AcetamipridLB,ThiaclopridLB,CombinedNeonicLB
1,3,1,15000,-6.25,0.0,960000,64,0.87,835000,96000,1996,0.0,819.24,0.0,0.0,0.0,819.24
2,3,1,14000,-6.67,0.0,924000,66,0.81,748000,92000,1997,0.0,14781.55,0.0,0.0,0.0,14781.55
3,3,1,16000,14.29,1.0,1136000,71,0.72,818000,159000,1998,0.0,4048.35,0.0,0.0,0.0,4048.35
4,3,1,17000,6.25,1.0,1156000,68,0.56,647000,185000,1999,0.0,2758.42,0.0,0.0,0.0,2758.42
5,3,1,16000,-5.88,0.0,1248000,78,0.59,736000,187000,2000,0.0,3305.17,0.0,0.0,0.0,3305.17


In [4]:
# Target Outcome Column

target = honeybee_csv['GrowthOutcome']
target_names = ['Growth', 'No-Growth']

In [5]:
# Drop Column

honeybee_data = honeybee_csv.drop(['GrowthOutcome', 'ColonyGrowth', 'YearRecorded', 'Region', 'FIPS'], axis=1)
honeybee_names = honeybee_data.columns
honeybee_data.head()

Unnamed: 0,ColonyCount,TotalProduction,YieldPerColony,PricePerLB,ProductionValue,Stocks,ClothianidinLB,ImidaclopridLB,ThiamethoxamLB,AcetamipridLB,ThiaclopridLB,CombinedNeonicLB
1,15000,960000,64,0.87,835000,96000,0.0,819.24,0.0,0.0,0.0,819.24
2,14000,924000,66,0.81,748000,92000,0.0,14781.55,0.0,0.0,0.0,14781.55
3,16000,1136000,71,0.72,818000,159000,0.0,4048.35,0.0,0.0,0.0,4048.35
4,17000,1156000,68,0.56,647000,185000,0.0,2758.42,0.0,0.0,0.0,2758.42
5,16000,1248000,78,0.59,736000,187000,0.0,3305.17,0.0,0.0,0.0,3305.17


In [6]:
# Split Data

x_train, x_test, y_train, y_test = train_test_split(honeybee_data, target, random_state=42)

In [7]:
# Scale Data

# X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
# scaler = StandardScaler().fit(x_train)
# x_train_scaled = scaler.transform(x_train)
# x_test_scaled = scaler.transform(x_test)

In [18]:
# Create SVC Classifier

honeybee_svc_model = SVC(kernel='rbf', cache_size=5000)
#.fit(x_train, y_train)

In [19]:
# Fit Model

honeybee_svc_model.fit(x_train, y_train)

SVC(cache_size=5000)

In [20]:
# SVC Model Accuracy

print(f'Test accuracy: %.3f' % honeybee_svc_model.score(x_test, y_test))

Test accuracy: 0.683


In [21]:
# Calculate Report

honeybee_prediction = honeybee_svc_model.predict(x_test)
print(classification_report(y_test, honeybee_prediction,
                            target_names = target_names))

              precision    recall  f1-score   support

      Growth       0.68      0.97      0.80       146
   No-Growth       0.71      0.15      0.25        78

    accuracy                           0.68       224
   macro avg       0.69      0.56      0.53       224
weighted avg       0.69      0.68      0.61       224

