# Name: Harsh Siddhapura
# ASU ID: 1230169813

# Lab 11: Cross Validation with Confusion Matrix Evaluation



In [1]:
from sklearn.datasets import load_svmlight_file
from sklearn import svm
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

In [2]:
print("Loading Dataset...")
X,y = load_svmlight_file("a9a")

print("Creating classifier object...")
clf = svm.SVC(kernel='linear', C=1, random_state= 42) 

print("Training classifier with cross validation, k=5")
scores = cross_val_score(clf, X, y, cv=5)

print("Training Complete!")

acc = scores.mean() 
stdiv = scores.std()
print("Cross Validation Mean Accuracy = %0.2f" % acc )
print("Standard Deviation of the Mean Accuracy across all runs = %0.5f" % stdiv)

Loading Dataset...
Creating classifier object...
Training classifier with cross validation, k=5
Training Complete!
Cross Validation Mean Accuracy = 0.85
Standard Deviation of the Mean Accuracy across all runs = 0.00179


#### What is the obtained result?  
Accuracy obtained is 0.85 or 85%

### Linear Kernel with Confusion Matrix

In [2]:
print("Loading Dataset...")
X,y = load_svmlight_file("a9a")

print("Creating classifier object...")
clf = svm.SVC(kernel='linear', C=1, random_state= 42) 

print("Training classifier with cross validation, k=5")
y_pred = cross_val_predict(clf, X, y, cv=5)

print("Training Complete. Computing Confusion Matrix...\n")
matrix = confusion_matrix(y, y_pred)
print(f'Confusion Matrix: \n{matrix}')

tn, fp, fn, tp = matrix.ravel()
print(f'\nTrue Positive (TP) = {tp}')
print(f'True Negative (TN) = {tn}')
print(f'False Positive (FP) = {fp}')
print(f'False Negative (FN) = {fn}')

# Calculate and print TPR (True Positive Rate)
tpr = tp / (tp + fn)
print('\n1. TPR (True Positive Rate) = %0.2f' % tpr)

# Calculate and print TNR (True Negative Rate)
tnr = tn / (tn + fp)
print('2. TNR (True Negative Rate) = %0.2f' % tnr)

# Calculate and print Recall
recall = recall_score(y, y_pred)
print('3. Recall = %0.2f' % recall)

# Calculate and print Precision
precision = precision_score(y, y_pred)
print('4. Precision = %0.2f' % precision)

# Calculate and print F-Measure
f_measure = f1_score(y, y_pred)
print('5. F-Measure = %0.2f' % f_measure)

Loading Dataset...
Creating classifier object...
Training classifier with cross validation, k=5
Training Complete. Computing Confusion Matrix...

Confusion Matrix: 
[[22941  1779]
 [ 3195  4646]]

True Positive (TP) = 4646
True Negative (TN) = 22941
False Positive (FP) = 1779
False Negative (FN) = 3195

1. TPR (True Positive Rate) = 0.59
2. TNR (True Negative Rate) = 0.93
3. Recall = 0.59
4. Precision = 0.72
5. F-Measure = 0.65


#### Calculations:

- Confusion Matrix:
    - True Positive (TP) = 4646
    - True Negative (TN) = 22941
    - False Positive (FP) = 1779
    - False Negative (FN) = 3195

1. TPR (True Positive Rate): TP / ( TP + FN ) = 0.59
2. TNR (True Negative Rate): TN / ( TN + FP ) = 0.93
3. Recall: TP / ( TP + FN ) = 0.59
4. Precision: TP / ( TP + FP ) = 0.72
5. F-Measure: 2 * ( (Precision * Recall) / (Precision + Recall) ) = 0.65


### Modify the code to use an rbf kernel with gamma = 0.1. Print the obtained confusion matrix.

In [3]:
print("Loading Dataset...")
X,y = load_svmlight_file("a9a")

print("Creating classifier object...")
clf = svm.SVC(kernel='rbf', C=1, gamma=0.1, random_state= 42) 

print("Training classifier with cross validation, k=5")
y_pred = cross_val_predict(clf, X, y, cv=5)

print("Training Complete. Computing Confusion Matrix...\n")
matrix = confusion_matrix(y, y_pred)
print(f'Confusion Matrix: \n{matrix}')

tn, fp, fn, tp = matrix.ravel()
print(f'\nTrue Positive (TP) = {tp}')
print(f'True Negative (TN) = {tn}')
print(f'False Positive (FP) = {fp}')
print(f'False Negative (FN) = {fn}')

# Calculate and print TPR (True Positive Rate)
tpr = tp / (tp + fn)
print('\n1. TPR (True Positive Rate) = %0.2f' % tpr)

# Calculate and print TNR (True Negative Rate)
tnr = tn / (tn + fp)
print('2. TNR (True Negative Rate) = %0.2f' % tnr)

# Calculate and print Recall
recall = recall_score(y, y_pred)
print('3. Recall = %0.2f' % recall)

# Calculate and print Precision
precision = precision_score(y, y_pred)
print('4. Precision = %0.2f' % precision)

# Calculate and print F-Measure
f_measure = f1_score(y, y_pred)
print('5. F-Measure = %0.2f' % f_measure)

Loading Dataset...
Creating classifier object...
Training classifier with cross validation, k=5
Training Complete. Computing Confusion Matrix...

Confusion Matrix: 
[[22987  1733]
 [ 3263  4578]]

True Positive (TP) = 4578
True Negative (TN) = 22987
False Positive (FP) = 1733
False Negative (FN) = 3263

1. TPR (True Positive Rate) = 0.58
2. TNR (True Negative Rate) = 0.93
3. Recall = 0.58
4. Precision = 0.73
5. F-Measure = 0.65


#### Calculations:

- Confusion Matrix:
    - True Positive (TP) = 4578
    - True Negative (TN) = 22987
    - False Positive (FP) = 1733
    - False Negative (FN) = 3263

1. TPR (True Positive Rate): TP / ( TP + FN ) = 0.58
2. TNR (True Negative Rate): TN / ( TN + FP ) = 0.93
3. Recall: TP / ( TP + FN ) = 0.58
4. Precision: TP / ( TP + FP ) = 0.73
5. F-Measure: 2 * ( (Precision * Recall) / (Precision + Recall) ) = 0.65


### Modify the code to use an rbf kernel with gamma = 0.01. Print the obtained confusion matrix.

In [4]:
print("Loading Dataset...")
X,y = load_svmlight_file("a9a")

print("Creating classifier object...")
clf = svm.SVC(kernel='rbf', C=1, gamma=0.01, random_state= 42) 

print("Training classifier with cross validation, k=5")
y_pred = cross_val_predict(clf, X, y, cv=5)

print("Training Complete. Computing Confusion Matrix...\n")
matrix = confusion_matrix(y, y_pred)
print(f'Confusion Matrix: \n{matrix}')

tn, fp, fn, tp = matrix.ravel()
print(f'\nTrue Positive (TP) = {tp}')
print(f'True Negative (TN) = {tn}')
print(f'False Positive (FP) = {fp}')
print(f'False Negative (FN) = {fn}')

# Calculate and print TPR (True Positive Rate)
tpr = tp / (tp + fn)
print('\n1. TPR (True Positive Rate) = %0.2f' % tpr)

# Calculate and print TNR (True Negative Rate)
tnr = tn / (tn + fp)
print('2. TNR (True Negative Rate) = %0.2f' % tnr)

# Calculate and print Recall
recall = recall_score(y, y_pred)
print('3. Recall = %0.2f' % recall)

# Calculate and print Precision
precision = precision_score(y, y_pred)
print('4. Precision = %0.2f' % precision)

# Calculate and print F-Measure
f_measure = f1_score(y, y_pred)
print('5. F-Measure = %0.2f' % f_measure)

Loading Dataset...
Creating classifier object...
Training classifier with cross validation, k=5
Training Complete. Computing Confusion Matrix...

Confusion Matrix: 
[[23082  1638]
 [ 3429  4412]]

True Positive (TP) = 4412
True Negative (TN) = 23082
False Positive (FP) = 1638
False Negative (FN) = 3429

1. TPR (True Positive Rate) = 0.56
2. TNR (True Negative Rate) = 0.93
3. Recall = 0.56
4. Precision = 0.73
5. F-Measure = 0.64


#### Calculations:

- Confusion Matrix:
    - True Positive (TP) = 4412
    - True Negative (TN) = 23082
    - False Positive (FP) = 1638
    - False Negative (FN) = 3429

1. TPR (True Positive Rate): TP / ( TP + FN ) = 0.56
2. TNR (True Negative Rate): TN / ( TN + FP ) = 0.93
3. Recall: TP / ( TP + FN ) = 0.56
4. Precision: TP / ( TP + FP ) = 0.73
5. F-Measure: 2 * ( (Precision * Recall) / (Precision + Recall) ) = 0.64


### Modify the code to use a polynomial kernel with a degree = 2. Print the obtained confusion matrix.

In [5]:
print("Loading Dataset...")
X,y = load_svmlight_file("a9a")

print("Creating classifier object...")
clf = svm.SVC(kernel='poly', C=1, degree=2, random_state= 42) 

print("Training classifier with cross validation, k=5")
y_pred = cross_val_predict(clf, X, y, cv=5)

print("Training Complete. Computing Confusion Matrix...\n")
matrix = confusion_matrix(y, y_pred)
print(f'Confusion Matrix: \n{matrix}')

tn, fp, fn, tp = matrix.ravel()
print(f'\nTrue Positive (TP) = {tp}')
print(f'True Negative (TN) = {tn}')
print(f'False Positive (FP) = {fp}')
print(f'False Negative (FN) = {fn}')

# Calculate and print TPR (True Positive Rate)
tpr = tp / (tp + fn)
print('\n1. TPR (True Positive Rate) = %0.2f' % tpr)

# Calculate and print TNR (True Negative Rate)
tnr = tn / (tn + fp)
print('2. TNR (True Negative Rate) = %0.2f' % tnr)

# Calculate and print Recall
recall = recall_score(y, y_pred)
print('3. Recall = %0.2f' % recall)

# Calculate and print Precision
precision = precision_score(y, y_pred)
print('4. Precision = %0.2f' % precision)

# Calculate and print F-Measure
f_measure = f1_score(y, y_pred)
print('5. F-Measure = %0.2f' % f_measure)

Loading Dataset...
Creating classifier object...
Training classifier with cross validation, k=5
Training Complete. Computing Confusion Matrix...

Confusion Matrix: 
[[23054  1666]
 [ 3284  4557]]

True Positive (TP) = 4557
True Negative (TN) = 23054
False Positive (FP) = 1666
False Negative (FN) = 3284

1. TPR (True Positive Rate) = 0.58
2. TNR (True Negative Rate) = 0.93
3. Recall = 0.58
4. Precision = 0.73
5. F-Measure = 0.65


#### Calculations:

- Confusion Matrix:
    - True Positive (TP) = 4557
    - True Negative (TN) = 23054
    - False Positive (FP) = 1666
    - False Negative (FN) = 3284

1. TPR (True Positive Rate): TP / ( TP + FN ) = 0.58
2. TNR (True Negative Rate): TN / ( TN + FP ) = 0.93
3. Recall: TP / ( TP + FN ) = 0.58
4. Precision: TP / ( TP + FP ) = 0.73
5. F-Measure: 2 * ( (Precision * Recall) / (Precision + Recall) ) = 0.65
