# Baseline Model for Question 1 (data from 2013, 2018, 2020)
## Multi-Label Classification using Classifier Chains model


In [47]:
#load dependencies
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from skmultilearn.problem_transform import ClassifierChain
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import MultinomialNB, GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from scipy.sparse import csr_matrix

from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import recall_score, precision_score


**This is after importing preprocessing comments**


In [None]:
#### This code is from preprocessing to be used before 
#### running code below to create clean texts for fitting 
#----------------------------------------------------------------

# #Create class object
# c_pp = comment_preprocessor()

# #clean X training set
# clean_doc_train, vocab_train = c_pp.preprocess_text(list(X_train_Q1['Comment']))
# #clean X valid
# clean_doc_valid, vocab_valid = c_pp.preprocess_text(list(X_valid_Q1['Comment']))

# ## For baseline, convert list of lists into list of sentences using the following code

# clean_text = []
# for docs in clean_doc_train:
#     clean_text.append(' '.join(docs)) 

# #save clean text 
# X_train_Q1['clean_text'] = clean_text
# X_train_Q1.to_csv('data/X_train_Q1_clean.csv', index=False)

# #validation set preprocess
# clean_text_val = []
# for docs in clean_doc_valid:
#     clean_text_val.append(' '.join(docs))

# #save valid clean text 
# X_valid_Q1['clean_text'] = clean_text_val
# X_valid_Q1.to_csv('data/X_valid_Q1_clean.csv', index=False)

In [4]:
#load in preprocessed train and validation datas
#your path here

X_train_Q1 = pd.read_csv('data/X_train_Q1_clean.csv')
X_valid_Q1 = pd.read_csv('data/X_valid_Q1_clean.csv')

y_train_Q1 = pd.read_csv('data/y_train_Q1.csv')
y_valid_Q1 = pd.read_csv('data/y_valid_Q1.csv')

## Tfid Vectorizer Representation
First we'll use Tfid as vectorizer


In [5]:
#Tfid Vectorizer Representation

def tfid_vectorizer(train, valid):
    """
    Fits the TfidVectorizer() on your X training 
    set and transform on X validation set
    Returns the matrixes.
    """
    tfid = TfidfVectorizer() 
    X = tfid.fit_transform(train)
    X_valid = tfid.transform(valid)
    return X, X_valid


#To view the representation
#bow = pd.DataFrame(X_train_tfid, columns=sorted(tfid.vocabulary_), index=final_comments)
#bow.head()

In [105]:
#Vectorize X_train and convert Y_train to an array

X_train, X_valid = tfid_vectorizer(X_train_Q1['clean_text'].values.astype('U'), 
                                    X_valid_Q1['clean_text'].values.astype('U')) #had to convert type 
#ytrain for all themes and subthemes
Y_train = (np.array(y_train_Q1))

In [126]:
#slice y to themes and subthemes
#y_train
subthemes_ytrain = y_train_Q1.loc[:, 'CPD_Improve_new_employee_orientation':'OTH_Covid']
themes_ytrain = y_train_Q1[['CPD','CB', 'EWC', 'Exec', 'FEW', 'SP', 'RE', 'Sup', 'SW', 'TEPE',
                            'VMG', 'OTH', 'Unrelated']]

#y_valid
subthemes_yvalid = y_valid_Q1.loc[:, 'CPD_Improve_new_employee_orientation':'OTH_Covid']
themes_yvalid = y_valid_Q1[['CPD','CB', 'EWC', 'Exec', 'FEW', 'SP', 'RE', 'Sup', 
                                      'SW', 'TEPE','VMG', 'OTH', 'Unrelated']]

#shape check: 13 themes and 62 subthemes
print('Theme columns:',themes_ytrain.shape[1])
print('Subtheme columns:', subthemes_ytrain.shape[1])

Theme columns: 13
Subtheme columns: 62


In [124]:
np.shape(subthemes_yvalid)

(2594, 62)

In [127]:
np.shape(themes_yvalid)

(2594, 13)

# Classifier Chain
## Subthemes only
### trying different models to choose best baseline classifier

**Starting with Subthemes Y only**

In [17]:
#Parts of code adapated from DSCI 573 lab 4
#Dictionary of Base Models

models = {
    'LinearSVC': LinearSVC(),
    'MultinomialNB' : MultinomialNB(),
    'GaussianNB' : GaussianNB()#,
    #'Random Forest' : RandomForestClassifier(), too slow will use function 
    #'KNeighborsClassifier': KNeighborsClassifier(),
    #'Neural Net' : MLPClassifier()
}


In [128]:
#For subthemes only
#Note takes about 15 min to run

results_dict = []

for model_name, model in models.items():

    classifier_chain = ClassifierChain(model)
    model = classifier_chain.fit(X_train, subthemes_ytrain)
    train = model.score(X_train, np.array(subthemes_ytrain))
    valid = model.score(X_valid, np.array(subthemes_yvalid))
    y_pred = model.predict(X_valid)
    recall = recall_score(np.array(subthemes_yvalid), y_pred, average= 'micro')
    precision = precision_score(np.array(subthemes_yvalid), y_pred, average= 'micro')
    
    case= {'Model': model_name,
           'Train Accuracy': train,
           'Validation Accuracy': valid,
           'Recall Score': recall,
           'Precision Score': precision}
    
    results_dict.append(case)

In [21]:
#inital look at df
pd.DataFrame(results_dict)

Unnamed: 0,Model,Train Accuracy,Validation Accuracy,Recall Score,Precision Score
0,LinearSVC,0.79867,0.328065,0.403465,0.709312
1,MultinomialNB,0.05185,0.043562,0.034901,0.933775
2,GaussianNB,0.448921,0.024672,0.291584,0.074228


In [28]:
#list-dictionary of basemodels
results_dict2 = []

In [187]:
def Classifier_Chain(Ytrain, Yvalid, base_model):
    """
    Fits a Classifier Chain Model with the base classifier of choice and 
    using either themes or subthemes Y.
    Show results of training accuracy score, validation accuracy score, 
    validation recall and precision scores.
    """
    classifier_chain = ClassifierChain(base_model)
    
    model = classifier_chain.fit(X_train, subthemes_ytrain)
    train = model.score(X_train, np.array(subthemes_ytrain))
    valid = model.score(X_valid, np.array(subthemes_yvalid))
    y_pred = model.predict(X_valid)
    recall = recall_score(np.array(subthemes_yvalid), y_pred, average= 'micro')
    precision = precision_score(np.array(subthemes_yvalid), y_pred, average= 'micro')
    
    case= {'Model': base_model,
           'Train Accuracy': train,
           'Validation Accuracy': valid,
           'Recall Score': recall,
           'Precision Score': precision}
    
    results_dict2.append(case)    

In [29]:
#Random Forest' : RandomForestClassifier()
#Note takes quite some time ~15 min on my computer
Classifier_Chain(subthemes_ytrain, subthemes_yvalid, RandomForestClassifier())
results_dict2

[{'Model': RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                         criterion='gini', max_depth=None, max_features='auto',
                         max_leaf_nodes=None, max_samples=None,
                         min_impurity_decrease=0.0, min_impurity_split=None,
                         min_samples_leaf=1, min_samples_split=2,
                         min_weight_fraction_leaf=0.0, n_estimators=100,
                         n_jobs=None, oob_score=False, random_state=None,
                         verbose=0, warm_start=False),
  'Train Accuracy': 0.9966268311488049,
  'Validation Accuracy': 0.19082498072474943,
  'Recall Score': 0.1948019801980198,
  'Precision Score': 0.8619934282584885}]

In [44]:
results_dict2[0]['Model'] = 'RandomForest' #rename

In [48]:
#Decision Tree' : DecisionTreeClassifier()
#Note takes quite some time ~15 min on my computer
Classifier_Chain(subthemes_ytrain, subthemes_yvalid, DecisionTreeClassifier())
results_dict2

[{'Model': 'RandomForest',
  'Train Accuracy': 0.9966268311488049,
  'Validation Accuracy': 0.19082498072474943,
  'Recall Score': 0.1948019801980198,
  'Precision Score': 0.8619934282584885},
 {'Model': DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                         max_depth=None, max_features=None, max_leaf_nodes=None,
                         min_impurity_decrease=0.0, min_impurity_split=None,
                         min_samples_leaf=1, min_samples_split=2,
                         min_weight_fraction_leaf=0.0, presort='deprecated',
                         random_state=None, splitter='best'),
  'Train Accuracy': 0.9979760986892829,
  'Validation Accuracy': 0.22629144178874325,
  'Recall Score': 0.4373762376237624,
  'Precision Score': 0.4723336006415397}]

In [51]:
results_dict2[1]['Model'] = 'DecisionTree' #rename

In [53]:
#LinearSVC(multi_class= "crammer_singer")
#"crammer_singer" optimizes a joint objective over all classes.
Classifier_Chain(subthemes_ytrain, subthemes_yvalid, LinearSVC(multi_class= "crammer_singer"))


In [55]:
results_dict2[2]['Model'] = 'LinearSVC(multi_class= "crammer_singer")' #rename

In [188]:
#Trying last year's base classifier LinearSVC(C=0.5, tol=0.2)

results_dict2 = []
Classifier_Chain(subthemes_ytrain, subthemes_yvalid, LinearSVC(C=0.5, tol=0.2))

In [191]:
results_dict2[3]['Model'] = 'LinearSVC(C=0.5, tol=0.2) 2019 capstone base' #rename

In [193]:
results_dict.extend(results_dict2)

In [194]:
my_results =pd.DataFrame(results_dict)
my_results

Unnamed: 0,Model,Train Accuracy,Validation Accuracy,Recall Score,Precision Score
0,LinearSVC,0.79867,0.328065,0.403465,0.709312
1,MultinomialNB,0.05185,0.043562,0.034901,0.933775
2,GaussianNB,0.448921,0.024672,0.291584,0.074228
3,RandomForest,0.996627,0.190825,0.194802,0.861993
4,DecisionTree,0.997976,0.226291,0.437376,0.472334
5,"LinearSVC(multi_class= ""crammer_singer"")",0.707209,0.343099,0.42401,0.655067
6,"LinearSVC(C=0.5, tol=0.2) 2019 capstone base",0.562355,0.315343,0.372772,0.758308


***All models overfitting especially the Tree models. Best one overall is LinearSVC with default multi-class='ovr'. Still overfitting though, try tune regularization (less regularization is larger C)***

In [97]:
#Looping through various C value in LinearSVC

scores = []
for i in (10.0**np.arange(-4,4)):
    model = ClassifierChain(LinearSVC(C = i, max_iter = 100000)).fit(X_train, subthemes_ytrain)
    #train = model.score(X_train, np.array(subthemes_ytrain))
    valid = model.score(X_valid, np.array(subthemes_yvalid))
    
    scores.append(valid)

In [99]:
10.0**np.arange(-4,4)

array([1.e-04, 1.e-03, 1.e-02, 1.e-01, 1.e+00, 1.e+01, 1.e+02, 1.e+03])

In [98]:
scores

[0.0007710100231303007,
 0.0007710100231303007,
 0.04741711642251349,
 0.2459521973785659,
 0.32806476484194297,
 0.2848882035466461,
 0.2579028527370856,
 0.24710871241326138]

> Regularization doesn't help here. The best validation score is given by the default C hyperparameter = 1.0. Let's try this on LinearSVC(multi-class='crammer_singer')

In [101]:
#Looping through various C value in LinearSVC multiclass= crammer_singer 

scores = []
for i in (10.0**np.arange(-4,4)):
    model = ClassifierChain(LinearSVC(multi_class= "crammer_singer", C = i, max_iter = 100000)).fit(X_train, subthemes_ytrain)
    #train = model.score(X_train, np.array(subthemes_ytrain))
    valid = model.score(X_valid, np.array(subthemes_yvalid))
    
    scores.append(valid)

In [102]:
scores

[0.0007710100231303007,
 0.0007710100231303007,
 0.009252120277563608,
 0.19275250578257516,
 0.3430994602929838,
 0.2679259830377795,
 0.25212027756360833,
 0.24710871241326138]

> Same results for C values of crammer_singer. Seems that Linear(SVC) with its default regularization gives the highest

> Due to lack of resources could not find proper way to GridSeach with multilabel classifier. https://stackoverflow.com/questions/26018543/gridsearch-for-multi-label-classification-in-scikit-learn

In [112]:
models

{'LinearSVC': LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
           intercept_scaling=1, loss='squared_hinge', max_iter=1000,
           multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
           verbose=0),
 'MultinomialNB': MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True),
 'GaussianNB': GaussianNB(priors=None, var_smoothing=1e-09)}

## For themes Y 

## working the same steps on Themes to find base classifier

In [129]:
#For themes only
#Note takes about ~15 min to run
results_dict_themes = []

for model_name, model in models.items():

    classifier_chain = ClassifierChain(model)
    model = classifier_chain.fit(X_train, themes_ytrain)
    train = model.score(X_train, np.array(themes_ytrain))
    valid = model.score(X_valid, np.array(themes_yvalid))
    y_pred = model.predict(X_valid)
    recall = recall_score(np.array(themes_yvalid), y_pred, average= 'micro')
    precision = precision_score(np.array(themes_yvalid), y_pred, average= 'micro')
    
    case= {'Model': model_name,
           'Train Accuracy': train,
           'Validation Accuracy': valid,
           'Recall Score': recall,
           'Precision Score': precision}
    
    results_dict_themes.append(case)

In [130]:
pd.DataFrame(results_dict_themes)

Unnamed: 0,Model,Train Accuracy,Validation Accuracy,Recall Score,Precision Score
0,LinearSVC,0.816596,0.484194,0.61194,0.759259
1,MultinomialNB,0.246338,0.20586,0.175235,0.949102
2,GaussianNB,0.363724,0.04973,0.588447,0.143599


In [150]:
#empty out list
results_dict3 = []

In [151]:
def Classifier_Chain(Ytrain, Yvalid, base_model):
    """
    Fits a Classifier Chain Model with the base classifier of choice and 
    using either themes or subthemes Y.
    Show results of training accuracy score, validation accuracy score, 
    validation recall and precision scores.
    """
    classifier_chain = ClassifierChain(base_model)
    
    model = classifier_chain.fit(X_train, themes_ytrain) #subthemes_ytrain)
    train = model.score(X_train, np.array(themes_ytrain)) #subthemes_ytrain
    valid = model.score(X_valid, np.array(themes_yvalid)) #subthemes_yvalid
    y_pred = model.predict(X_valid)
    recall = recall_score(np.array(themes_yvalid), y_pred, average= 'micro') #subthemes_yvalid
    precision = precision_score(np.array(themes_yvalid), y_pred, average= 'micro') #subthemes_yvalid
    
    case= {'Model': base_model,
           'Train Accuracy': train,
           'Validation Accuracy': valid,
           'Recall Score': recall,
           'Precision Score': precision}
    
    #print(case)
    results_dict3.append(case)    

In [152]:
#Random Forest' : RandomForestClassifier()
#Note takes quite some time ~15 min on my computer
Classifier_Chain(themes_ytrain, themes_yvalid, RandomForestClassifier())

In [153]:
results_dict3[0]['Model'] = 'RandomForest' #rename

In [154]:
#Decision Tree' : DecisionTreeClassifier()
#Note takes quite some time ~15 min on my computer
Classifier_Chain(themes_ytrain, themes_yvalid, DecisionTreeClassifier())

In [155]:
results_dict3[1]['Model'] = 'DecisionTree' #rename

In [157]:
#LinearSVC(multi_class= "crammer_singer")
#"crammer_singer" optimizes a joint objective over all classes.
Classifier_Chain(themes_ytrain, themes_yvalid, LinearSVC(multi_class= "crammer_singer"))


In [158]:
results_dict3[2]['Model'] = 'LinearSVC(multi_class= "crammer_singer")' #rename

In [162]:
results_dict_themes.extend(results_dict3)

In [166]:
pd.DataFrame(results_dict_themes)

Unnamed: 0,Model,Train Accuracy,Validation Accuracy,Recall Score,Precision Score
0,LinearSVC,0.816596,0.484194,0.61194,0.759259
1,MultinomialNB,0.246338,0.20586,0.175235,0.949102
2,GaussianNB,0.363724,0.04973,0.588447,0.143599
3,RandomForest,0.998554,0.427525,0.466556,0.857724
4,DecisionTree,0.998554,0.371241,0.604478,0.606826
5,"LinearSVC(multi_class= ""crammer_singer"")",0.757806,0.49229,0.624378,0.716688


> For themes, also looks like LinearSVC() is the best. Hypothesizing that the best regularization parameter C for this model is their default values. Let's see if regularizing Decision Tree will help with overfitting and get better validation results

In [174]:
#Looping through various max_depth value in LinearSVC multiclass= crammer_singer 

scores = []
for i in range(10,100,10):
    model = ClassifierChain(DecisionTreeClassifier(max_depth = i)).fit(X_train, themes_ytrain)
    #train = model.score(X_train, np.array(subthemes_ytrain))
    valid = model.score(X_valid, np.array(themes_yvalid))
    
    scores.append(valid)

In [175]:
scores

[0.3797224363916731,
 0.4055512721665382,
 0.4109483423284503,
 0.40670778720123363,
 0.3982266769468003,
 0.40400925212027755,
 0.39552814186584423,
 0.39167309175019277,
 0.38781804163454126]

> Looks like none of these max depths improves the validation accuracy with None of result 0.42

In [177]:
#reset list
results_dict3 = []

In [178]:
#Trying last year's base classifier LinearSVC(C=0.5, tol=0.2)
Classifier_Chain(themes_ytrain, themes_yvalid, LinearSVC(C=0.5, tol=0.2))

In [181]:
results_dict3[0]['Model'] = 'LinearSVC(C=0.5, tol=0.2) 2019 capstone base' #rename

In [183]:
results_dict_themes.extend(results_dict3)

In [185]:
themes_results = pd.DataFrame(results_dict_themes)

# Summary of results:
## Subthemes

In [176]:
#SUBTHEMES RESULTS 
print("SUBTHEMES RESULTS:")
my_results

SUBTHEMES RESULTS:


Unnamed: 0,Model,Train Accuracy,Validation Accuracy,Recall Score,Precision Score
0,LinearSVC,0.79867,0.328065,0.403465,0.709312
1,MultinomialNB,0.05185,0.043562,0.034901,0.933775
2,GaussianNB,0.448921,0.024672,0.291584,0.074228
3,RandomForest,0.996627,0.190825,0.194802,0.861993
4,DecisionTree,0.997976,0.226291,0.437376,0.472334
5,"LinearSVC(multi_class= ""crammer_singer"")",0.707209,0.343099,0.42401,0.655067


> LinearSVC() worked the best with their default C regularization parameter. 

    * train score: 0.798
    * validation score: 0.328
    * recall score: 0.403
    * precision score: 0.709

## Themes

In [186]:
#THEMES RESULTS 
print("THEMES RESULTS:")

themes_results

THEMES RESULTS:


Unnamed: 0,Model,Train Accuracy,Validation Accuracy,Recall Score,Precision Score
0,LinearSVC,0.816596,0.484194,0.61194,0.759259
1,MultinomialNB,0.246338,0.20586,0.175235,0.949102
2,GaussianNB,0.363724,0.04973,0.588447,0.143599
3,RandomForest,0.998554,0.427525,0.466556,0.857724
4,DecisionTree,0.998554,0.371241,0.604478,0.606826
5,"LinearSVC(multi_class= ""crammer_singer"")",0.757806,0.49229,0.624378,0.716688
6,"LinearSVC(C=0.5, tol=0.2) 2019 capstone base",0.710678,0.48882,0.594804,0.787701


> LAST YEARS RESULTS: Results from 2019 Capstone results on test data for baseline model (BoW CountVectorizer and BinaryRelevance(LinearSVC(C=0.5, tol=0.2))

     * Accuracy: 45%
     * Recall: 0.64
     * Precision: 0.74
  
> OUR RESULTS: Using LinearSVC(C=0.5, tol=0.2) 2019 capstone base, overfitting went down and only recall decreased in score. Our best results for baseline model is:

     * Accuracy: 49%
     * Recall: 0.60
     * Precision: 0.79

<br>
<br>
<br>
<br>
<br>
<br>

***Previous work:***

### Using CountVecorizer for comparison vs TFID

In [85]:
#CountVectorizer Vectorizer Representation

def count_vectorizer(train, valid):
    cv = CountVectorizer() 
    X = cv.fit_transform(train)
    X_valid = cv.transform(valid)
    return X, X_valid

#bow = pd.DataFrame(X_train_tfid, columns=sorted(tfid.vocabulary_), index=final_comments)
#X_valid_tfid = tfid.transform(X_valid)

In [86]:
#Vectorize X_train and convert Y_train to an array

cvX_train, cvX_valid = count_vectorizer(X_train_Q1['clean_text'].values.astype('U'), 
                                    X_valid_Q1['clean_text'].values.astype('U')) #had to convert type 


In [87]:
print("CountVectorizer- FOR THEMES ONLY using LinearSVC():\n")
Classifier_Chain(cvX_train, themes_ytrain, cvX_valid, themes_yvalid, LinearSVC())

CountVectorizer- FOR THEMES ONLY using LinearSVC():

Training Score for Classifer Chain: 0.9588473400154202
Validation Score for Classifer Chain: 0.41518889745566695
Validation Recall: 0.6337755666113875
Validation Precision: 0.6297720406481736


In [88]:
print("CountVectorizer-FOR SUBTHEMES ONLY using LinearSVC():\n")
Classifier_Chain(cvX_train, subthemes_ytrain, cvX_valid, subthemes_yvalid, LinearSVC())


CountVectorizer-FOR SUBTHEMES ONLY using LinearSVC():

Training Score for Classifer Chain: 0.968195836545875
Validation Score for Classifer Chain: 0.27833461835003853
Validation Recall: 0.4777227722772277
Validation Precision: 0.5130249867091973


In [89]:
print("CountVectorizer- FOR THEMES ONLY using Decision Tree():\n")

Classifier_Chain(cvX_train, themes_ytrain, cvX_valid, themes_yvalid, DecisionTreeClassifier())

CountVectorizer- FOR THEMES ONLY using Decision Tree():

Training Score for Classifer Chain: 0.9985543562066307
Validation Score for Classifer Chain: 0.39321511179645335
Validation Recall: 0.6252072968490879
Validation Precision: 0.5751334858886347


In [90]:
print("CountVectorizer- FOR SUBTHEMES ONLY using Decision Tree:\n")

Classifier_Chain(cvX_train, subthemes_ytrain, cvX_valid, subthemes_yvalid, DecisionTreeClassifier())

CountVectorizer- FOR SUBTHEMES ONLY using Decision Tree:

Training Score for Classifer Chain: 0.9979760986892829
Validation Score for Classifer Chain: 0.2243639167309175
Validation Recall: 0.45024752475247526
Validation Precision: 0.44376677238350815


### ***For all themes and subthemes:***

In [79]:
#LinearSVC multi_class= "ovr"
#Multiclass as One-Vs-The-Rest:

classifier_svc = ClassifierChain(
    classifier = LinearSVC(multi_class= "ovr")
)
classifier_svc.fit(X_train, Y_train)

ClassifierChain(classifier=LinearSVC(C=1.0, class_weight=None, dual=True,
                                     fit_intercept=True, intercept_scaling=1,
                                     loss='squared_hinge', max_iter=1000,
                                     multi_class='ovr', penalty='l2',
                                     random_state=None, tol=0.0001, verbose=0),
                order=None, require_dense=[True, True])

In [80]:
#Train score
print("Training Score for LinearSVC Classifer Chain:", 
      classifier_svc.score(X_train, Y_train))
print("Validation Score for LinearSVC Classifer Chain:",
     classifier_svc.score(X_valid, np.array(y_valid_Q1)))

y_pred = classifier_svc.predict(X_valid)
print("Validation Recall for LinearSVC Classifer Chain:",
      recall_score(np.array(y_valid_Q1), y_pred, average= 'micro'))
print("Validation Precision for LinearSVC Classifer Chain:",
     precision_score(np.array(y_valid_Q1), y_pred, average= 'micro'))
      

Training Score for LinearSVC Classifer Chain: 0.7907671549730146
Validation Score for LinearSVC Classifer Chain: 0.3338473400154202
Validation Recall for LinearSVC Classifer Chain: 0.5321232697832332
Validation Precision for LinearSVC Classifer Chain: 0.6822367319604888


In [81]:
#Recall score for training 

y_pred = classifier_svc.predict(X_train)
recall_score(Y_train, y_pred, average= 'micro')

0.8729869538341413