<a href="https://colab.research.google.com/github/pravincoder/Machine-Learning-Models-Tutorial/blob/main/SVC_GolfData.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Support vector classifier on Golf Data (Club Type)

In [5]:
# Imports
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline

In [6]:
# Read Csv
df = pd.read_csv('golf_data_processed.csv')

In [7]:
# Dataframe info and describe
df.info()
df.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 78 entries, 0 to 77
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Hole                    78 non-null     int64  
 1   Score                   78 non-null     int64  
 2   Shot Distance in Yards  78 non-null     int64  
 3   Shot Outcome in Yards   78 non-null     float64
 4   Club Type               78 non-null     object 
 5   Break                   78 non-null     object 
 6   Tournament Yards        78 non-null     float64
 7   Condition               78 non-null     object 
 8   Outcome                 78 non-null     object 
dtypes: float64(2), int64(3), object(4)
memory usage: 5.6+ KB


Unnamed: 0,Hole,Score,Shot Distance in Yards,Shot Outcome in Yards,Tournament Yards
count,78.0,78.0,78.0,78.0,78.0
mean,4.974359,4.012821,53.551282,47.858974,113.230769
std,2.623443,1.012822,52.883024,48.448587,13.527195
min,1.0,2.0,1.0,1.0,85.0
25%,3.0,3.0,7.25,6.0,107.0
50%,5.0,4.0,16.0,12.5,110.0
75%,7.0,5.0,110.0,98.0,122.0
max,9.0,6.0,143.0,128.0,135.0


In [15]:
df.drop('Condition',axis=1)

Unnamed: 0,Hole,Score,Shot Distance in Yards,Shot Outcome in Yards,Club Type,Break,Tournament Yards,Outcome
0,1,5,116,108.0,4,3,121.0,2
1,1,5,8,5.0,9,1,121.0,1
2,1,5,3,3.0,8,5,121.0,1
3,2,6,112,70.0,6,3,123.0,0
4,2,6,42,50.0,7,3,123.0,1
...,...,...,...,...,...,...,...,...
73,8,4,8,7.0,8,5,122.0,1
74,8,4,1,1.0,8,5,122.0,1
75,9,3,136,125.0,4,4,135.0,2
76,9,3,11,9.0,8,5,135.0,1


In [16]:
# check for nan in dataframe
df.isna().sum()

Unnamed: 0,0
Hole,0
Score,0
Shot Distance in Yards,0
Shot Outcome in Yards,0
Club Type,0
Break,0
Tournament Yards,0
Condition,0
Outcome,0


In [17]:
# Data Preprocessing for categorical values
# Encode target variable (Club Type) and other categorical features
encoder_club = LabelEncoder()
df['Club Type'] = encoder_club.fit_transform(df['Club Type'])  # putter = 1 , Sw =2....9

encoder_break = LabelEncoder()
df['Break'] = encoder_break.fit_transform(df['Break'])

encoder_outcome = LabelEncoder()
df['Outcome'] = encoder_outcome.fit_transform(df['Outcome'])


In [18]:
# Define features (X) and target (y)
X = df.drop('Club Type', axis=1)  # Exclude Club Type from features
y = df['Club Type']  # Target variable


In [19]:
y.value_counts()

Unnamed: 0_level_0,count
Club Type,Unnamed: 1_level_1
8,32
6,17
4,7
7,7
9,5
0,3
5,3
2,2
1,1
3,1


In [22]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
X_train.shape

(66, 8)

In [21]:
# Train
clf = svm.SVC(kernel='rbf',random_state=42)
clf.fit(X_train, y_train)

ValueError: could not convert string to float: 'Haze'

In [None]:
# Test
y_pred = clf.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


# Support Vector Classifier (for Break)

In [None]:
# Read Csv
df = pd.read_csv('/content/drive/MyDrive/golf_data_processed.csv')

In [None]:
# Dataframe info and describe
df.info()
df.describe()

In [None]:
# check for nan in dataframe
df.isna().sum()

In [None]:
# Data Preprocessing for categorical values
# Encode target variable (Club Type) and other categorical features
encoder_club = LabelEncoder()
df['Club Type'] = encoder_club.fit_transform(df['Club Type'])

encoder_break = LabelEncoder()
df['Break'] = encoder_break.fit_transform(df['Break'])

encoder_outcome = LabelEncoder()
df['Outcome'] = encoder_outcome.fit_transform(df['Outcome'])


In [None]:
# Define features (X) and target (y)
X = df.drop('Break', axis=1)  # Exclude Club Type from features
y = df['Break']  # Target variable


In [None]:
y.value_counts()

In [None]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)


In [None]:
# Train
clf = svm.SVC(kernel='linear',random_state=42)
clf.fit(X_train, y_train)

In [None]:
# Test
y_pred = clf.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
report = classification_report(y_test, y_pred)
print("Classification Report:\n", report)
metrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", metrix)

## Linear Regression model (Score)

In [None]:
X = df.drop(['Score'],axis=1)
y = df['Score']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)


In [None]:
# Linear model
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
y_pred=model.predict(X_test)

In [None]:
# Metric
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("R-squared:", r2)