# Linear SVC Assignment

In [None]:
import numpy as np
import pandas as pd

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

### Import the admissions data set (admissions.csv).

In [None]:
data = pd.read_csv('https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/admissions.csv')
data.head()

Unnamed: 0,GRE,TOEFL,SchoolRank,SOP,LOR,GPA,Research,Admitted
0,337,118,4,4.5,4.5,9.65,1,1
1,324,107,4,4.0,4.5,8.87,1,1
2,316,104,3,3.0,3.5,8.0,1,1
3,322,110,3,3.5,2.5,8.67,1,1
4,314,103,2,2.0,3.0,8.21,0,0


### Split the data into training and test sets, with the test set comprising 30% of the data.  Use `'Admitted'` as the target.

In [None]:
X = data.drop('Admitted', axis=1)
y = data['Admitted']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

### Generate an SVC model with a linear kernel. Set the regularization parameter (C) = 10. Check the score for both train and test sets. 

In [None]:
svm = SVC(C=10, kernel='linear')
svm.fit(X_train,y_train)
svm.score(X_train,y_train)

0.8678571428571429

In [None]:
svm.score(X_test,y_test)

0.8333333333333334

### Choose some other values for C and show the difference between the scores for the train and test sets.

In [None]:
svm = SVC(C=.027, kernel='linear')
svm.fit(X_train,y_train)
svm.score(X_train,y_train)

0.8678571428571429

In [None]:
svm.score(X_test,y_test)

0.85

### What if we switched up the target variable? Let assume that we know whether a student was admitted. Let's try to predict what their SchoolRank was. 

Create an SVC model with a linear kernel with the SchoolRank field as the target variable. Report both the train and the test scores.

In [None]:
X = data.drop('SchoolRank', axis=1)
y = data['SchoolRank']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
svm = SVC(C=10, kernel='linear')
svm.fit(X_train,y_train)
svm.score(X_train,y_train)

0.6035714285714285

In [None]:
svm.score(X_test,y_test)

0.6333333333333333

### Show confusion matrices for the training and test sets, and a classification report for the test set. What trends do you notice?

In [None]:
train_pred = svm.predict(X_train)
test_pred = svm.predict(X_test)
print(confusion_matrix(y_train, train_pred), '\n')
print(confusion_matrix(y_test, test_pred))
print(classification_report(y_test,test_pred))

[[16  4  2  0  0]
 [ 6 34 24  1  0]
 [ 0 15 70  8  0]
 [ 0  3 15 29 13]
 [ 0  0  3 17 20]] 

[[ 2  2  0  0  0]
 [ 6 25  9  1  1]
 [ 1  7 29  3  0]
 [ 0  1  1  9  3]
 [ 0  0  4  5 11]]
              precision    recall  f1-score   support

           1       0.22      0.50      0.31         4
           2       0.71      0.60      0.65        42
           3       0.67      0.72      0.70        40
           4       0.50      0.64      0.56        14
           5       0.73      0.55      0.63        20

    accuracy                           0.63       120
   macro avg       0.57      0.60      0.57       120
weighted avg       0.66      0.63      0.64       120

