### Basic Linear Regression with Lists

In [None]:
import matplotlib.pyplot as plt
from scipy import stats

In [None]:
x = [1,2,3,4,5,6,7,8,9,10]
y = [1,4,9,16,25,36,49,64,81,100]

In [None]:
slope, intercept, r, p, std_err = stats.linregress(x, y)

def myfunc(x):
    return slope * x + intercept # y = m*x + c

In [None]:
mymodel = list(map(myfunc, x))
plt.scatter(x, y)
plt.plot(x, mymodel)
plt.show() 

In [None]:
#Predict with a new value z = 10

pred = myfunc(10)
print(pred) #linear regression fails to find the power of 10

### Linear Regression using Pandas

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LinearRegression

In [None]:
data = pd.read_csv('datasets/lr_data.csv')

X = data.iloc[:, 0].values.reshape(-1, 1) 
Y = data.iloc[:, 1].values.reshape(-1, 1) 

clf = LinearRegression()
clf.fit(X, Y)
Y_pred = clf.predict(X)

In [None]:
plt.scatter(X, Y)
plt.plot(X, Y_pred, color='red')
plt.show()

### Linear Regression on Images

In [None]:
import os, cv2
import numpy as np
import random
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [None]:
data = []
labels = []
path1 = "datasets/dogs-vs-cats/cats/"
random.shuffle(os.listdir(path1))
for imagepaths in tqdm(os.listdir(path1)[:200]):
    imagepath = path1 + imagepaths
    image = cv2.imread(imagepath)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.resize(image,(64,64)).flatten()
    data.append(image)
    labels.append(0) 

path2 = "datasets/dogs-vs-cats/dogs/"
random.shuffle(os.listdir(path2))
for imagepaths in tqdm(os.listdir(path2)[:200]):
    imagepath = path2 + imagepaths
    image = cv2.imread(imagepath)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.resize(image,(64,64)).flatten()
    data.append(image)
    labels.append(1) 

In [None]:
data = np.array(data)
labels = np.array(labels)

In [None]:
print(f"Length of Dog images are : {sum(labels)}")
print(f"Length of Cat images are : {len(labels) - sum(labels)}")

In [None]:
from sklearn.model_selection import train_test_split

xtrain, xtest, ytrain, ytest = train_test_split(data[:5000], labels[:5000], test_size=0.25)
print(xtrain.shape, xtest.shape, ytrain.shape, ytest.shape)

In [None]:
clf = LinearRegression()
clf.fit(xtrain, ytrain)

In [None]:
from sklearn.metrics import accuracy_score

ypred = clf.predict(xtest)
ypred = (ypred > 0.5)
acc = accuracy_score(ytest, ypred)
print(acc)

In [None]:
import matplotlib.pyplot as plt

ypred1 = clf.predict([xtest[0]])

if ypred1 > 0.5:
    label1 = 'Predicted Label: Dog'
else:
    label1 = 'Predicted Label: Cat'
    
ypred2 = clf.predict([xtest[59]])

if ypred2 > 0.5:
    label2 = 'Predicted Label: Dog'
else:
    label2 = 'Predicted Label: Cat'

image1 = xtest[0].reshape(64,64)
image2 = xtest[59].reshape(64,64)
fig = plt.figure()
ax = fig.add_subplot(1, 2, 1)
plt.imshow(image1)
ax.set_title(label1)

ax = fig.add_subplot(1, 2, 2)
plt.imshow(image2)
ax.set_title(label2)

### Linear Regression for NLP

In [None]:
import os,re
import numpy as np
from tqdm import tqdm
import pandas as pd
from sklearn.linear_model import LinearRegression

In [None]:
df = pd.read_excel("datasets/movie_review.xlsx")
df.head()

In [None]:
X = df["Reviews"][:2500]
y = df["Sentiment"][:2500]

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

In [None]:
data_list = []
for text in X:
        text = re.sub(r'[!@#$(),n"%^*?:;~`0-9]', ' ', text)
        text = re.sub(r'[[]]', ' ', text)
        text = text.lower()
        data_list.append(text)

In [None]:
from sklearn.feature_extraction.text import CountVectorizer

cv = CountVectorizer()
X = cv.fit_transform(data_list).toarray()
X.shape 

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)

In [None]:
model = LinearRegression()
model.fit(x_train, y_train)

In [None]:
y_pred = model.predict(x_test)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

y_pred = (y_pred > 0.5)
ac = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print("Accuracy is :",ac)

In [None]:
def text_pred(text):
    text = cv.transform([text]).toarray()
    pred = model.predict(text)
    preds = 0
    if pred > 0.5:
        preds = 1
    print(pred)
    label = le.inverse_transform([preds])[0]
    return label

In [None]:
text_pred("This movie sucks and the plot twist was already known")

In [None]:
text_pred("Nice movie and the actions are so good")