# Language Models

In [2]:
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.utils import class_weight
from sklearn.model_selection import RandomizedSearchCV,StratifiedKFold
from sklearn.dummy import DummyClassifier
import pickle
import numpy as np

In [3]:
models=["bert","roberta","distill","xlm","xlnet","xlmroberta","albert"]
folds=["fold1","fold2"]
majorfolder= "../data/"
for mdl in models:
    for fld in folds:
        xtrain=pickle.load(open(majorfolder+mdl+"/"+fld+"/"+"xtrain.pkl",'rb'))
        xtest=pickle.load(open(majorfolder+mdl+"/"+fld+"/"+"xtest.pkl",'rb'))
        ytrain=pickle.load(open(majorfolder+mdl+"/"+fld+"/"+"ytrain.pkl",'rb'))
        ytest=pickle.load(open(majorfolder+mdl+"/"+fld+"/"+"ytest.pkl",'rb'))
        

        print("------------------------------------------")
        print("Model-",mdl,"Folds:",fld)
        print("------------------------------------------")

        class_weights = list(class_weight.compute_class_weight('balanced',np.unique(ytrain),np.array(ytrain)))
        w_array = np.ones(ytrain.shape[0], dtype = 'float')

        for i, val in enumerate(ytrain):
            w_array[i] = class_weights[val-1]

        clf= SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, max_iter=100, random_state=42,class_weight="balanced",warm_start=True)
        clf2=XGBClassifier(n_estimators=200,random_state=10,max_depth=3,learning_rate =0.1)

        clf.fit(np.array(xtrain),np.array(ytrain).astype(np.int32))
        clf2.fit(np.array(xtrain),np.array(ytrain).astype(np.int32))

        ypred=clf.predict(xtest)
        ypred2=clf2.predict(xtest)

        from sklearn.metrics import classification_report

        print("------------SVM----------------------------------------------")
        print(classification_report(np.array(ytest).ravel(),ypred,digits=5))


        print("------------XGB----------------------------------------------")
        print(classification_report(np.array(ytest).ravel(),ypred2,digits=5))


------------------------------------------
Model- bert Folds: fold1
------------------------------------------
------------SVM----------------------------------------------
              precision    recall  f1-score   support

           0    1.00000   0.05172   0.09836        58
           1    0.55200   0.76667   0.64186        90
           2    0.20000   0.08333   0.11765        12
           3    0.29412   0.62500   0.40000        24

    accuracy                        0.47826       184
   macro avg    0.51153   0.38168   0.31447       184
weighted avg    0.63662   0.47826   0.40481       184

------------XGB----------------------------------------------
              precision    recall  f1-score   support

           0    0.77419   0.41379   0.53933        58
           1    0.59574   0.93333   0.72727        90
           2    1.00000   0.08333   0.15385        12
           3    0.27273   0.12500   0.17143        24

    accuracy                        0.60870       184
   m

  _warn_prf(average, modifier, msg_start, len(result))


------------SVM----------------------------------------------
              precision    recall  f1-score   support

           0    0.80000   0.14286   0.24242        56
           1    0.53289   0.90000   0.66942        90
           2    0.00000   0.00000   0.00000        15
           3    0.22727   0.21739   0.22222        23

    accuracy                        0.51087       184
   macro avg    0.39004   0.31506   0.28352       184
weighted avg    0.53254   0.51087   0.42899       184

------------XGB----------------------------------------------
              precision    recall  f1-score   support

           0    0.62500   0.35714   0.45455        56
           1    0.56738   0.88889   0.69264        90
           2    0.00000   0.00000   0.00000        15
           3    0.27273   0.13043   0.17647        23

    accuracy                        0.55978       184
   macro avg    0.36628   0.34412   0.33091       184
weighted avg    0.50183   0.55978   0.49919       184

------

  _warn_prf(average, modifier, msg_start, len(result))


KeyboardInterrupt: 