# Language Models

In [7]:
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.utils import class_weight
from sklearn.model_selection import RandomizedSearchCV,StratifiedKFold
from sklearn.dummy import DummyClassifier
import pickle
import numpy as np

params = {
        'min_child_weight': [1, 5, 10],
        'gamma': [0.5, 1, 1.5, 2, 5],
        'subsample': [0.6, 0.8, 1.0],
        'colsample_bytree': [0.6, 0.8, 1.0],
        'max_depth': [3, 4, 5]
        }





In [8]:
models=["distill","xlm","xlnet","xlmroberta","albert"]
folds=["fold1","fold2"]

for mdl in models:
    for fld in folds:
        xtrain=pickle.load(open(mdl+"/"+fld+"/"+"xtrain.pkl",'rb'))
        xtest=pickle.load(open(mdl+"/"+fld+"/"+"xtest.pkl",'rb'))
        ytrain=pickle.load(open(mdl+"/"+fld+"/"+"ytrain.pkl",'rb'))
        ytest=pickle.load(open(mdl+"/"+fld+"/"+"ytest.pkl",'rb'))
        

        print("------------------------------------------")
        print("Model-",mdl,"Folds:",fld)
        print("------------------------------------------")

        class_weights = list(class_weight.compute_class_weight('balanced',np.unique(ytrain),np.array(ytrain)))
        w_array = np.ones(ytrain.shape[0], dtype = 'float')

        for i, val in enumerate(ytrain):
            w_array[i] = class_weights[val-1]

        clf= SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, max_iter=100, random_state=42,class_weight="balanced",warm_start=True)
        clf2=XGBClassifier(n_estimators=200,random_state=10,max_depth=3,learning_rate =0.1)

        clf.fit(np.array(xtrain),np.array(ytrain).astype(np.int32))
        clf2.fit(np.array(xtrain),np.array(ytrain).astype(np.int32))

        ypred=clf.predict(xtest)
        ypred2=clf2.predict(xtest)

        from sklearn.metrics import classification_report

        print("------------SVM----------------------------------------------")
        print(classification_report(np.array(ytest).ravel(),ypred,digits=5))


        print("------------XGB----------------------------------------------")
        print(classification_report(np.array(ytest).ravel(),ypred2,digits=5))


------------------------------------------
Model- distill Folds: fold1
------------------------------------------
------------SVM----------------------------------------------
              precision    recall  f1-score   support

           1    0.71053   0.46552   0.56250        58
           2    0.61364   0.90000   0.72973        90
           3    0.16667   0.08333   0.11111        12
           4    0.37500   0.12500   0.18750        24

    accuracy                        0.60870       184
   macro avg    0.46646   0.39346   0.39771       184
weighted avg    0.58390   0.60870   0.56595       184

------------XGB----------------------------------------------
              precision    recall  f1-score   support

           1    0.71429   0.43103   0.53763        58
           2    0.61029   0.92222   0.73451        90
           3    0.00000   0.00000   0.00000        12
           4    0.20000   0.08333   0.11765        24

    accuracy                        0.59783       184
 

  _warn_prf(average, modifier, msg_start, len(result))


------------SVM----------------------------------------------
              precision    recall  f1-score   support

           1    0.64516   0.68966   0.66667        58
           2    0.68000   0.75556   0.71579        90
           3    0.37500   0.25000   0.30000        12
           4    0.42857   0.25000   0.31579        24

    accuracy                        0.63587       184
   macro avg    0.53218   0.48630   0.49956       184
weighted avg    0.61633   0.63587   0.62101       184

------------XGB----------------------------------------------
              precision    recall  f1-score   support

           1    0.63889   0.39655   0.48936        58
           2    0.57343   0.91111   0.70386        90
           3    0.00000   0.00000   0.00000        12
           4    0.50000   0.08333   0.14286        24

    accuracy                        0.58152       184
   macro avg    0.42808   0.34775   0.33402       184
weighted avg    0.54709   0.58152   0.51717       184

------

  _warn_prf(average, modifier, msg_start, len(result))


------------SVM----------------------------------------------
              precision    recall  f1-score   support

           1    0.54167   0.46429   0.50000        56
           2    0.63158   0.80000   0.70588        90
           3    0.66667   0.13333   0.22222        15
           4    0.26316   0.21739   0.23810        23

    accuracy                        0.57065       184
   macro avg    0.52577   0.40375   0.41655       184
weighted avg    0.56102   0.57065   0.54532       184

------------XGB----------------------------------------------
              precision    recall  f1-score   support

           1    0.63636   0.37500   0.47191        56
           2    0.57931   0.93333   0.71489        90
           3    0.00000   0.00000   0.00000        15
           4    0.50000   0.13043   0.20690        23

    accuracy                        0.58696       184
   macro avg    0.42892   0.35969   0.34843       184
weighted avg    0.53953   0.58696   0.51916       184

------

  _warn_prf(average, modifier, msg_start, len(result))


------------SVM----------------------------------------------
              precision    recall  f1-score   support

           1    0.31522   1.00000   0.47934        58
           2    0.00000   0.00000   0.00000        90
           3    0.00000   0.00000   0.00000        12
           4    0.00000   0.00000   0.00000        24

    accuracy                        0.31522       184
   macro avg    0.07880   0.25000   0.11983       184
weighted avg    0.09936   0.31522   0.15110       184

------------XGB----------------------------------------------
              precision    recall  f1-score   support

           1    0.53571   0.25862   0.34884        58
           2    0.56081   0.92222   0.69748        90
           3    0.00000   0.00000   0.00000        12
           4    0.57143   0.16667   0.25806        24

    accuracy                        0.55435       184
   macro avg    0.41699   0.33688   0.32610       184
weighted avg    0.51771   0.55435   0.48478       184

------

  _warn_prf(average, modifier, msg_start, len(result))


------------SVM----------------------------------------------
              precision    recall  f1-score   support

           1    0.00000   0.00000   0.00000        56
           2    0.48913   1.00000   0.65693        90
           3    0.00000   0.00000   0.00000        15
           4    0.00000   0.00000   0.00000        23

    accuracy                        0.48913       184
   macro avg    0.12228   0.25000   0.16423       184
weighted avg    0.23925   0.48913   0.32133       184

------------XGB----------------------------------------------
              precision    recall  f1-score   support

           1    0.52941   0.32143   0.40000        56
           2    0.55319   0.86667   0.67532        90
           3    0.00000   0.00000   0.00000        15
           4    0.42857   0.13043   0.20000        23

    accuracy                        0.53804       184
   macro avg    0.37779   0.32963   0.31883       184
weighted avg    0.48528   0.53804   0.47706       184

------

  _warn_prf(average, modifier, msg_start, len(result))


------------SVM----------------------------------------------
              precision    recall  f1-score   support

           1    0.62500   0.25862   0.36585        58
           2    0.57522   0.72222   0.64039        90
           3    0.18519   0.41667   0.25641        12
           4    0.25000   0.20833   0.22727        24

    accuracy                        0.48913       184
   macro avg    0.40885   0.40146   0.37248       184
weighted avg    0.52306   0.48913   0.47493       184

------------XGB----------------------------------------------
              precision    recall  f1-score   support

           1    0.51515   0.29310   0.37363        58
           2    0.56934   0.86667   0.68722        90
           3    0.20000   0.08333   0.11765        12
           4    0.44444   0.16667   0.24242        24

    accuracy                        0.54348       184
   macro avg    0.43223   0.35244   0.35523       184
weighted avg    0.51188   0.54348   0.49321       184

------