In [1]:
import pandas as pd

import utils
from tfidf_model import TFIDFModel

In [26]:
custom_names = [
    utils.COL_BOOK_SIMILARITY,
    utils.COL_CONTAINS_EMOTICON,
    utils.COL_CONTAINS_LINK,
    utils.COL_WORD_COUNT,
    utils.COL_CHAR_COUNT,
    utils.COL_UPPERCASE_COUNT,
    utils.COL_QUESTION_COUNT,
]

In [3]:
data_df, target_df = utils.load_discussions_data()

In [None]:
tfidf = TFIDFModel(
    max_iter=2500, regularization=0.15, solver="sag", custom_feature_names=custom_names
)
tfidf.fit(data_df, target_df)

In [44]:
max_coefs = tfidf._log_reg.coef_.max(axis=0)[: len(custom_names)]
best_class_idxs = tfidf._log_reg.coef_.argmax(axis=0)[: len(custom_names)]
best_classes = tfidf._log_reg.classes_[best_class_idxs]
custom_features_df = pd.DataFrame(
    list(zip(custom_names, max_coefs, best_classes)),
    columns=["Custom Feature", "Max Coefficient", "Predictive Class"],
)

In [54]:
custom_features_df.sort_values(by="Max Coefficient", ascending=False, inplace=True)
custom_features_df

Unnamed: 0,Custom Feature,Max Coefficient,Predictive Class
4,Char Count,3.032205,Content Discussion
0,Book Similarity,2.560155,Content Discussion
3,Word Count,1.202205,Assignment Instructions
5,Uppercase Count,1.118973,Greeting
6,Question Words Count,1.096541,Content Question
2,Contains Link,0.68751,Outside Material
1,Contains Emoticon,0.485253,Emoticon/Non-verbal


In [55]:
custom_features_df.to_latex()

'\\begin{tabular}{llrl}\n\\toprule\n{} &        Custom Feature &  Max Coefficient &         Predictive Class \\\\\n\\midrule\n4 &            Char Count &         3.032205 &       Content Discussion \\\\\n0 &       Book Similarity &         2.560155 &       Content Discussion \\\\\n3 &            Word Count &         1.202205 &  Assignment Instructions \\\\\n5 &       Uppercase Count &         1.118973 &                 Greeting \\\\\n6 &  Question Words Count &         1.096541 &         Content Question \\\\\n2 &         Contains Link &         0.687510 &         Outside Material \\\\\n1 &     Contains Emoticon &         0.485253 &      Emoticon/Non-verbal \\\\\n\\bottomrule\n\\end{tabular}\n'