# GLM via SKLearn

Please email: Joshua_Zimmer@hms.harvard.edu with questions

In [31]:
import numpy as np
import pandas as pd
import sklearn.linear_model
from scipy import stats
import matplotlib.pyplot as plt

%matplotlib inline
%load_ext autoreload
%autoreload 2

# Example General SKLearn GLM Implementation

In [29]:
def create_glm(model_name=None, **kwargs):
    """
    Creates a scikit-learn GLM model.
    
    Parameters:
        * model_name: Distribution type for the GLM
        * **kwargs: All other keyword arguments for the relevant scikit-learn model
    
    Returns:
        * mdl: scikit-learn model constructed with relevant parameters
    """

    model_name_options = {'Normal', 'Poisson', 'Gamma', 'Logistic', 'Multinomial'}
    tweedie_lookup = {
        'Normal': 0,
        'Poisson': 1,
        'Gamma': 2
    }
    
#     if model_name in {'Normal', 'Poisson', 'Gamma'} or type(model_name) == float or type(model_name) == int:
#         power = model_name if type(model_name) == float or type(model_name) == int else tweedie_lookup[model_name]
    if model_name in {'Normal', 'Poisson', 'Gamma'}:
        power = tweedie_lookup[model_name]
        kwargs['power'] = power
        mdl = sklearn.linear_model.TweedieRegressor(**kwargs)
    elif model_name in {'Logistic', 'Multinomial'}:
        multi_class = 'multinomial' if model_name == 'Multinomial' else 'auto'
        mdl = sklearn.linear_model.LogisticRegression(**kwargs)
    else:
        print('Distribution not yet implemented.')
        raise NotYetImplementedError()
    
    return mdl

# Example Usage

In [30]:
model_params = {'fit_intercept': True}

for model_name in ['Normal', 'Poisson', 'Gamma', 'Logistic', 'Multinomial']:
    print(create_glm(model_name, **model_params))

TweedieRegressor(power=0)
TweedieRegressor(power=1)
TweedieRegressor(power=2)
LogisticRegression()
LogisticRegression()
