In [1]:
import pandas as pd
import numpy as np

In [12]:
from sklearn.linear_model import LogisticRegressionCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score, make_scorer

from imblearn.over_sampling import SMOTE

In [13]:
class LogitKappaEstimator:
    def __init__(self, name_str, data_df):
        self.course_name_ = name_str
        self.course_df_ = data_df
        self.kappa_train_ = 0.0
        self.min_k_train_ = 0.0
        self.max_k_train_ = 0.0
        self.kappa_test_ = 0.0
        self.scores_ = {}
    
    def compute_kappa(self):
        data_clean = pd.get_dummies(self.course_df_)
        
        labels = np.array(data_clean.engaged)
        features = np.array(data_clean.drop('engaged', axis = 1))
        
        features_train, features_test, labels_train, labels_test = train_test_split(features, labels, 
                                                                                    train_size = 0.8, 
                                                                                    random_state = 20130810)
    
        sm = SMOTE(random_state = 20130810, ratio = 1.0)
        features_train_smote, labels_train_smote = sm.fit_sample(features_train, labels_train) 
        
        logit = LogisticRegressionCV(cv = 10, 
                                     solver = "saga",
                                     scoring = make_scorer(cohen_kappa_score),
                                     n_jobs = 3, 
                                     random_state = 20130810)
        
        logit.fit(features_train_smote, labels_train_smote)
        
        self.scores_ = logit.scores_
        

### 1. CB22x - The Ancient Greek Hero

In [14]:
cb22x = pd.read_feather("data/HarvardX_CB22x_2013_Spring.feather")
cb22x_logit = LogitKappaEstimator("CB22x", cb22x)

In [None]:
cb22x_logit.compute_kappa()




## 