In [2]:
import numpy as np
import pandas as pd
from typing import Tuple, List
from scipy.special import expit

In [3]:
# import dataset

def import_dataset(path: str) -> Tuple[np.ndarray, np.array]:
    """
    Function for importation of the dataset
    """
    df = pd.read_csv(path)
    
    y = df['Made'].to_numpy()
    X = df.drop('Made', axis=1).to_numpy()
    
    return X, y


X, y = import_dataset('dataset.csv')

In [4]:
# we can use pandas df

df = pd.read_csv('dataset.csv')

df

Unnamed: 0,Made,Angle,Distance
0,1,54.70,0.72
1,0,63.22,7.14
2,0,88.33,6.91
3,0,79.19,1.36
4,0,10.53,7.33
...,...,...,...
1366,1,0.00,0.52
1367,0,60.91,4.29
1368,1,26.86,0.83
1369,0,3.42,5.38


In [5]:
y

array([1, 0, 0, ..., 1, 0, 0])

In [22]:
import pymc3 as pm

# surpressing warnings
import warnings
warnings.filterwarnings('ignore')


# Start our model
with pm.Model() as logistic_model:
    # Priors
    intercept = pm.Normal("intercept", mu=0, sd=10)
    beta1 = pm.Normal("beta1", mu=0, sd=10)
    beta2 = pm.Normal("beta2", mu=0, sd=10)
    
     # Logistic regression equation
    logit_p = intercept + pm.math.dot([beta1, beta2], X.T)
    
    # Likelihood
    likelihood = pm.Bernoulli("likelihood", logit_p=logit_p, observed=y)
    
    # Sampling from the posterior distribution
    trace = pm.sample(10000, tune=1000, cores=1)

    # Posterior distribution
    posterior = pm.sample_posterior_predictive(trace)
    
    print(posterior)


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (2 chains in 1 job)
NUTS: [beta2, beta1, intercept]


Sampling 2 chains for 1_000 tune and 10_000 draw iterations (2_000 + 20_000 draws total) took 70 seconds.


{'likelihood': array([[1, 0, 0, ..., 1, 0, 0],
       [1, 1, 0, ..., 0, 0, 0],
       [1, 0, 1, ..., 0, 1, 0],
       ...,
       [1, 0, 1, ..., 0, 1, 0],
       [0, 0, 1, ..., 1, 0, 1],
       [1, 1, 1, ..., 0, 1, 1]])}
