### Basic Bayesian Linear Regression with Lists

In [None]:
import matplotlib.pyplot as plt
import pymc3 as pm
import numpy as np

In [None]:
x = [1,2,3,4,5,6,7,8,9,10]
y = [1,4,9,16,25,36,49,64,81,100]

In [None]:
with pm.Model() as linear_model:
    intercept = pm.Normal('Intercept', mu = 0, sd = 10)
    slope = pm.Normal('slope', mu = 0, sd = 10)
    sigma = pm.HalfNormal('sigma', sd = 10)
    mean = intercept + slope * x
    Y_obs = pm.Normal('Y_obs', mu = mean, sd = sigma, observed = y)
    step = pm.NUTS()
    linear_trace = pm.sample(10, step)

In [None]:
plt.scatter(x, y)

pm.plot_posterior_predictive_glm(linear_trace, samples = 10, eval=np.linspace(2, 10, 3), linewidth = 1, 
                                 color = 'red', alpha = 0.8, label = 'Bayesian Posterior Fits',
                                lm = lambda x, linear_trace: linear_trace['Intercept'] + linear_trace['slope'] * x)
plt.show() 

In [None]:
pred = linear_trace['Intercept'] + linear_trace['slope'] * 13
print(max(pred))

### Bayesian Linear Regression using Pandas

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pymc3 as pm

In [None]:
data = pd.read_csv('datasets/lr_data.csv')

X = data.iloc[:, 0].values.reshape(-1, 1) 
Y = data.iloc[:, 1].values.reshape(-1, 1) 

with pm.Model() as linear_model:
    intercept = pm.Normal('Intercept', mu = 0, sd = 10)
    slope = pm.Normal('slope', mu = 0, sd = 10)
    sigma = pm.HalfNormal('sigma', sd = 10)
    mean = intercept + slope * X
    Y_obs = pm.Normal('Y_obs', mu = mean, sd = sigma, observed = Y)
    step = pm.NUTS()
    linear_trace = pm.sample(10, step)

In [None]:
plt.scatter(X, Y)
pm.plot_posterior_predictive_glm(linear_trace, samples = 30, eval=np.linspace(2, 70, 5), linewidth = 1, 
                                 color = 'red', alpha = 0.5, label = 'Bayesian Posterior Fits',
                                lm = lambda X, linear_trace: linear_trace['Intercept'] + linear_trace['slope'] * X)
plt.show()

### Bayesian Linear Regression on Images

In [1]:
import os, cv2
import numpy as np
import random
from tqdm import tqdm
import matplotlib.pyplot as plt
import pymc3 as pm



In [2]:
data = []
labels = []
path1 = "datasets/dogs-vs-cats/cats/"
random.shuffle(os.listdir(path1))
for imagepaths in tqdm(os.listdir(path1)[:300]):
    imagepath = path1 + imagepaths
    image = cv2.imread(imagepath)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.resize(image,(64,64)).flatten()
    data.append(list(image))
    labels.append(0) 

path2 = "datasets/dogs-vs-cats/dogs/"
random.shuffle(os.listdir(path2))
for imagepaths in tqdm(os.listdir(path2)[:300]):
    imagepath = path2 + imagepaths
    image = cv2.imread(imagepath)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.resize(image,(64,64)).flatten()
    data.append(list(image))
    labels.append(1) 

100%|███████████████████████████████████████████████████████████████████████████████| 300/300 [00:00<00:00, 625.00it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 300/300 [00:00<00:00, 597.61it/s]


In [3]:
import statistics 

s = []
m = []
for i, d in enumerate(data):
    std = np.std(d)
    s.append(std)
    mean = np.mean(d)
    m.append(mean)


print(f"Mean : {sum(m)/len(m)}, SD : {sum(s)/len(s)}")

Mean : 117.73762410481771, SD : 56.11368981873027


In [4]:
data = np.array(data)
labels = np.array(labels)

In [5]:
print(f"Length of Dog images are : {sum(labels)}")
print(f"Length of Cat images are : {len(labels) - sum(labels)}")

Length of Dog images are : 300
Length of Cat images are : 300


In [6]:
data = data.reshape(data.shape[0]*data.shape[1])
labels

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,

In [8]:
with pm.Model() as linear_model:
    intercept = pm.Normal('Intercept', mu = 0, sd = 100)
    slope = pm.Normal('slope', mu = 0, sd = 100)
    sigma = pm.HalfNormal('sigma', sd = 100)
    mean = intercept + slope * data[0]
    Y_obs = pm.Normal('Y_obs', mu = mean, sd = sigma, observed = labels[0])
    step = pm.NUTS()
    linear_trace = pm.sample(1000, step)

  linear_trace = pm.sample(1000, step)
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, slope, Intercept]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 28 seconds.
There were 132 divergences after tuning. Increase `target_accept` or reparameterize.
The acceptance probability does not match the target. It is 0.6595904194281307, but should be close to 0.8. Try to increase the number of tuning steps.
There were 86 divergences after tuning. Increase `target_accept` or reparameterize.
There were 114 divergences after tuning. Increase `target_accept` or reparameterize.
There were 116 divergences after tuning. Increase `target_accept` or reparameterize.
The estimated number of effective samples is smaller than 200 for some parameters.


In [12]:
import matplotlib.pyplot as plt

ypred1 = linear_trace['Intercept'] + linear_trace['slope'] * data[0] 
print(max(ypred1)*pow(10,15))
if max(ypred1)*pow(10,15) < 0.7:
    label1 = 'Predicted Label: Dog'
else:
    label1 = 'Predicted Label: Cat'
    
ypred2 = linear_trace['Intercept'] + linear_trace['slope'] * data[590] 
print(max(ypred2)*pow(10,15))
if max(ypred2)*pow(10,15) < 0.7:
    label2 = 'Predicted Label: Dog'
else:
    label2 = 'Predicted Label: Cat'

image1 = data[0].reshape(64,64)
image2 = data[590].reshape(64,64)
fig = plt.figure()
ax = fig.add_subplot(1, 2, 1)
plt.imshow(image1)
ax.set_title(label1)

ax = fig.add_subplot(1, 2, 2)
plt.imshow(image2)
ax.set_title(label2)

5.929622242350647e+17
6.940259776141893e+17
170


ValueError: cannot reshape array of size 1 into shape (64,64)

### Bayesian Linear Regression for NLP

In [None]:
import os,re
import numpy as np
from tqdm import tqdm
import pandas as pd
import pymc3 as pm

In [None]:
df = pd.read_excel("datasets/movie_review.xlsx")
df.head()

In [None]:
X = df["Reviews"][:1500]
y = df["Sentiment"][:1500]

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

In [None]:
print(sum(y), len(y))

In [None]:
data_list = []
for text in X:
        text = re.sub(r'[!@#$(),n"%^*?:;~`0-9]', ' ', text)
        text = re.sub(r'[[]]', ' ', text)
        text = text.lower()
        data_list.append(text)

In [None]:
from sklearn.feature_extraction.text import CountVectorizer

cv = CountVectorizer()
X = cv.fit_transform(data_list).toarray()
X.shape 

In [None]:
print(np.std(X), y.shape,y)

In [None]:
with pm.Model() as linear_model:
    intercept = pm.Normal('Intercept', mu = 0, sd = 1)
    slope = pm.Normal('slope', mu = 0, sd = 1)
    sigma = pm.HalfNormal('sigma', sd = 1)
    mean = intercept + slope * X.shape[0] 
    Y_obs = pm.Normal('Y_obs', mu = mean, sd = sigma, observed = y)
    step = pm.NUTS()
    linear_trace = pm.sample(1000, step)

In [None]:
def text_pred(text):
    text = cv.transform([text]).toarray()
    print(text)
    pred = linear_trace['Intercept'] + linear_trace['slope'] * text
    print(max(pred)*pow(10,16))
    preds = 0
    if max(pred)*pow(10,16) > 0.5:
        preds = 1
    label = le.inverse_transform([preds])[0]
    return label

In [None]:
text_pred("This movie sucks and the plot twist was already known")

In [None]:
text_pred("Nice movie and the actions are so good")