In [None]:
!pip install nltk==3.4
import nltk
import os
import glob
from nltk import word_tokenize
from nltk.util import ngrams
from nltk.corpus import stopwords
from nltk.stem.porter import *
import pandas as pd
import csv
import string
import numpy as np
from google.colab import drive
nltk.download('punkt')
nltk.download('stopwords')



In [None]:
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


# Preprocessing


In [None]:
df1 = pd.read_csv('/content/drive/MyDrive/fake_reviews_detection/dataset_files/true_positive.txt',delimiter='\n')
df1.to_csv('/content/drive/MyDrive/fake_reviews_detection/dataset_files/pos_true.csv', index = True)
df2 = pd.read_csv('/content/drive/MyDrive/fake_reviews_detection/dataset_files/neg_true.txt',delimiter='\n')
df2.to_csv('/content/drive/MyDrive/fake_reviews_detection/dataset_files/neg_true.csv', index = True)
df3 = pd.read_csv('/content/drive/MyDrive/fake_reviews_detection/dataset_files/false_positive.txt',delimiter='\n')
df3.to_csv('/content/drive/MyDrive/fake_reviews_detection/dataset_files/pos_fake.csv', index = True)
df4 = pd.read_csv('/content/drive/MyDrive/fake_reviews_detection/dataset_files/false_negative.txt',delimiter='\n')
df4.to_csv('/content/drive/MyDrive/fake_reviews_detection/dataset_files/neg_fake.csv', index = True)

In [None]:
#Since machine learning models cannot take a string as input, the reviews will have to be converted into a suitable data type like a vectorized list

# Feature Engineering

In [None]:
def create_unigram(review): 
	"""
	Input: review (string)
	Output: unigrams (list)
	"""
	token = word_tokenize(review)
	unigrams = ngrams(token,1)
	return list(unigrams)

In [None]:
def create_complete_unigram_list(dataset):
	"""
	Input:  dataset (dataframe)
	Output: final_unigram_list (list)
	"""
	final_unigram_list=[]
	
	for row in dataset:
		review=row[0]
		review=remove_stopwords(review)
		review=get_string_stem(review)
		final_unigram_list+=create_unigram(review)
		final_unigram_list=list(set(final_unigram_list))
	return final_unigram_list

In [None]:
def create_bigram(review):
	"""
  Input: review (string)
	Output: bigrams (list)
	"""
	token = word_tokenize(review)
	bigrams = ngrams(token,2)
	return list(bigrams)

In [None]:
def create_complete_bigram_list(dataset):
	"""
	Input:  dataset (dataframe)  
	Output: final_bigram_list (list)
	"""
	final_bigram_list=[]
	for row in dataset:
		review=row[0]
		review=remove_stopwords(review)
		review=get_string_stem(review)
		final_bigram_list+=create_bigram(review)
		final_bigram_list=list(set(final_bigram_list))
	return final_bigram_list

In [None]:
def create_trigram(review):
	"""
	Input: review (string)
	Output: trigrams (list)
	"""
	token = word_tokenize(review)
	trigrams = ngrams(token,3)
	return list(trigrams)

In [None]:
def create_complete_trigram_list(dataset):
	"""
	Input:  dataset (dataframe)
	Output: final_trigram_list (list)
	"""
	final_trigram_list=[]
	for row in dataset:
		review=row[0]
		review=remove_stopwords(review)
		review=get_string_stem(review)
		final_trigram_list+=create_trigram(review)
		final_trigram_list=list(set(final_trigram_list))
	return final_trigram_list

In [None]:
def get_top_ngram(ngrams_list,top_max_count): #returns a list of the most common ngrams
    """
	Input:  ngrams_list (list) 
          top_max_count (int) 
	Output: most_common_ngram_list (list)
	"""
    most_common_ngram_list=[]
    fdist = nltk.FreqDist(ngrams_list)
    for ngram_count in fdist.most_common(top_max_count):
        most_common_ngram_list.append(ngram_count[0])
    return most_common_ngram_list

In [None]:
def create_vector(row,final_ngram_list): #creates a vector array for every review in the dataset
	"""
	Input: row (dataframe row)
				 final_ngram_list (list)
	Output: review vector (list)
	"""
	review=row[0]
	review=remove_stopwords(review)
	review=get_string_stem(review)

	review_unigram=create_unigram(review)
	review_bigram=create_bigram(review)
	review_trigram=create_trigram(review)

	review_ngram=review_unigram+review_bigram+review_trigram

	review_vector=[0]*len(final_ngram_list)

	#Creates a 1/0 representation
	for ngram in review_ngram:
		if ngram in final_ngram_list:
			review_vector[final_ngram_list.index(ngram)]=1

	#Vector representation : [ngram_counts] + char_count+ punc_count +exclamation_count + question_count+[label from csv about pos/neg]
	review_vector+=[get_character_count(row[0])]+[get_punctuation_count(row[0])]+[get_exclamation_count(row[0])]+[get_question_count(row[0])]+[row[1]]
	return review_vector

In [None]:
def get_selected_ngrams(uni_count,bi_count,tri_count,dataset): 
	"""
	Input: uni_count (int)
				 bi_count (int)
				 tri_count (int)	
				 dataset   (dataframe)
	Output: final_ngram_list (list)
	"""
	final_unigram_list= create_complete_unigram_list(dataset)
	final_bigram_list= create_complete_bigram_list(dataset)
	final_trigram_list= create_complete_trigram_list(dataset)

	final_ngram_list=get_top_ngram(final_unigram_list,uni_count)+get_top_ngram(final_bigram_list,bi_count)+get_top_ngram(final_trigram_list,tri_count)

	return final_ngram_list

In [None]:
def create_vector_list(final_ngram_list, dataset): #returns a list of the vectorized rows
	"""
	Input: final_ngram_list (list)
		     dataset (dataframe)
	Output: vector_list (list)
	"""
	vector_list=[]

	for index,row in dataset.iterrows():
		vector_list.append(create_vector(row,final_ngram_list))
	
	return vector_list

In [None]:
def get_string_stem(input_string): #gets the root word from different variations of it (eg. 'like' is the root word for 'likes','liked','likely')
	"""
	Input: input_string (string)
	Output : ps.stem(input_string) (string)
	"""
	ps = PorterStemmer()
	return (ps.stem(input_string))

In [None]:
def remove_stopwords(review): #removes commonly used words like 'the', 'a', 'an'
	"""
	Input: review (string)
	Output: newTokens (string)
	"""
	tokens = word_tokenize(review)
	newTokens=[]
	for x in tokens:
		if x.lower() not in stopwords.words('english'):
			newTokens.append(x)
	return (" ".join(newTokens))

In [None]:
def get_punctuation_count(input_string): #gets number of punctuation marks 
	count = lambda l1, l2: len(list(filter(lambda c: c in l2, l1)))
	return count(input_string, string.punctuation)

In [None]:
def get_character_count(input_string): #gets number of characters
	count = lambda l1, l2: len(list(filter(lambda c: c in l2, l1)))
	return count(input_string, string.ascii_letters)

In [None]:
def get_exclamation_count(input_string): #gets number of exclamatory marks
	return input_string.count('!')

In [None]:
def get_question_count(input_string): #gets number of question marks
	return input_string.count('?')

# Preprocessing

In [None]:

def create_full_csv(): 
    with open("/content/drive/My Drive/fake_reviews_detection/dataset_files/neg_fake.csv") as f:
        neg_fake = [review for review in f.read()[:-1].split("\n")]
    with open("/content/drive/My Drive/fake_reviews_detection/dataset_files/pos_fake.csv") as f:
        pos_fake = [review for review in f.read()[:-1].split("\n")]
    with open("/content/drive/My Drive/fake_reviews_detection/dataset_files/pos_true.csv") as f:
        pos_true = [review for review in f.read()[:-1].split("\n")]
    with open("/content/drive/My Drive/fake_reviews_detection/dataset_files/neg_true.csv") as f:
        neg_true = [review for review in f.read()[:-1].split("\n")]
        
    data = pos_fake + neg_fake + pos_true + neg_true
    labels = [0] * (len(pos_fake) + len(neg_fake)) + [1] * (len(pos_true) + len(neg_true))
    pos_neg = [1] * len(pos_fake) + [0] * len(neg_fake) + [1] * len(pos_true) + [0] * len(neg_true)
    processed_data = zip(data, pos_neg, labels)

    data_file = open("data_full.csv", "w")
    wr_file = csv.writer(data_file, delimiter = ',')
    for row in processed_data:
        # print(row)
        wr_file.writerow(row)
    data_file.close()
    return processed_data


In [None]:
create_full_csv()

<zip at 0x7fb76a9b5e10>

In [None]:
df = pd.read_csv('/content/data_full.csv')
df.columns = ['Review','Pos or Neg','True or False']


In [None]:
df.shape

(1599, 3)

# Feature Engineering Results

## Punctuation Count

In [None]:
punctuation_count=[]
for i in range(1599):
  res=get_punctuation_count(df['Review'].iloc[i])
  punctuation_count.append(res)
print(len(punctuation_count))

1599


## Character Count

In [None]:
character_count=[]
for i in range(1599):
  res=get_character_count(df['Review'].iloc[i])
  character_count.append(res)
print(character_count)

[294, 304, 464, 452, 328, 379, 341, 292, 323, 696, 287, 489, 254, 705, 194, 300, 513, 431, 443, 1174, 364, 615, 452, 497, 238, 545, 1337, 614, 227, 468, 522, 439, 1226, 214, 287, 159, 322, 710, 143, 691, 744, 440, 183, 327, 749, 364, 343, 212, 461, 491, 770, 727, 588, 341, 330, 182, 305, 443, 918, 514, 480, 332, 316, 619, 1086, 518, 626, 329, 397, 560, 596, 719, 892, 612, 401, 458, 534, 203, 299, 292, 238, 522, 330, 242, 335, 338, 371, 251, 857, 1021, 310, 400, 342, 959, 642, 274, 564, 212, 548, 380, 456, 420, 711, 706, 610, 745, 595, 670, 429, 785, 253, 212, 284, 272, 479, 637, 399, 615, 658, 481, 657, 511, 291, 469, 417, 686, 301, 575, 1041, 521, 385, 686, 780, 406, 455, 443, 727, 874, 132, 475, 682, 401, 355, 527, 385, 230, 381, 577, 361, 170, 1146, 249, 582, 325, 526, 735, 147, 790, 144, 559, 518, 548, 213, 824, 820, 457, 259, 528, 253, 1321, 266, 264, 505, 371, 411, 535, 209, 168, 356, 623, 579, 368, 443, 493, 828, 349, 673, 436, 282, 816, 671, 481, 587, 214, 227, 903, 539, 1321, 

## Exclamation Count

In [None]:
exclamation_count=[]
for i in range(1599):
  res=get_exclamation_count(df['Review'].iloc[i])
  exclamation_count.append(res)
print(exclamation_count)

[6, 0, 2, 2, 0, 2, 0, 1, 2, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 6, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 4, 1, 0, 6, 1, 2, 0, 0, 0, 5, 0, 2, 1, 2, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 1, 4, 3, 0, 1, 1, 1, 0, 3, 1, 1, 0, 1, 0, 2, 0, 0, 1, 0, 1, 0, 0, 0, 3, 1, 1, 0, 5, 1, 0, 0, 2, 1, 2, 1, 0, 5, 2, 4, 0, 0, 0, 2, 0, 0, 0, 1, 6, 5, 0, 2, 3, 2, 1, 0, 1, 0, 4, 0, 2, 0, 0, 0, 1, 6, 0, 0, 1, 0, 2, 0, 1, 1, 0, 2, 1, 2, 1, 0, 0, 0, 0, 2, 1, 2, 2, 0, 0, 1, 6, 1, 4, 0, 1, 0, 4, 0, 0, 2, 2, 5, 0, 0, 0, 1, 0, 3, 1, 4, 0, 0, 0, 6, 1, 0, 1, 2, 0, 0, 2, 0, 0, 3, 0, 5, 0, 4, 5, 1, 0, 1, 0, 2, 1, 1, 1, 0, 1, 1, 2, 10, 0, 0, 1, 4, 2, 0, 0, 0, 0, 0, 1, 3, 0, 1, 1, 2, 0, 4, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 2, 0, 1, 0, 6, 3, 1, 3, 1, 0, 2, 1, 2, 0, 0, 4, 1, 0, 1, 3, 1, 2, 1, 0, 0, 0, 2, 0, 3, 7, 0, 1, 2, 0, 0, 2, 1, 0, 2, 0, 1, 2, 0, 3, 3, 0, 0, 1, 0, 0, 0, 2, 0, 2, 0, 2, 3, 1, 0, 1, 1, 1, 4, 0, 5, 0, 3, 0, 1, 1, 1, 0, 7, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 1, 2, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 2,

## Question Count

In [None]:
question_count=[]
for i in range(1599):
  res=get_question_count(df['Review'].iloc[i])
  question_count.append(res)
print(question_count)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 

# Adding the new features as columns to the dataframe

In [None]:
df['punctuation_count'] = punctuation_count
df['character_count'] = character_count
df['exclamation_count'] = exclamation_count
df['question_count'] = question_count

In [None]:
df

Unnamed: 0,Review,Pos or Neg,True or False,punctuation_count,character_count,exclamation_count,question_count
0,"0,""The Hilton Chicago was amazing!! It is clos...",1,0,20,294,6,0
1,"1,Excellent hotel in the heart of Chicago. The...",1,0,8,304,0,0
2,"2,""I stayed at the Hilton Chicago for my cousi...",1,0,15,464,2,0
3,"3,""The Downtown Chicago Hilton was the best co...",1,0,18,452,2,0
4,"4,The Chicago Hilton is a great hotel our stay...",1,0,6,328,0,0
...,...,...,...,...,...,...,...
1594,"394,""I have stayed in the Talbott a few times ...",0,1,21,875,0,0
1595,"395,""We reserved a room with a sleeper couch a...",0,1,22,752,0,0
1596,"396,""I can easily say this is one of the worst...",0,1,26,816,0,0
1597,"397,""Just back from spending Memorial Day week...",0,1,22,531,1,0


# Splitting the dataset

In [None]:
#copying the label to another array before removing it from the dataframe
y = df['True or False']
x = df
x.drop('True or False', axis=1, inplace=True)

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
final_ngram_list = get_selected_ngrams(2000, 0, 0, x_train)

In [None]:
x_train = create_vector_list(final_ngram_list, x_train)
x_test = create_vector_list(final_ngram_list, x_test)

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(x_train)
x_train_std = scaler.transform(x_train)
x_test_std = scaler.transform(x_test) 

In [None]:
print (x_train_std)

[[ 0.          0.          0.         ...  0.55460871 -0.21532761
  -1.00863777]
 [ 0.          0.          0.         ... -0.56423126 -0.21532761
   0.9914362 ]
 [ 0.          0.          0.         ... -0.00481127 -0.21532761
   0.9914362 ]
 ...
 [ 0.          0.          0.         ... -0.56423126 -0.21532761
   0.9914362 ]
 [ 0.          0.          0.         ...  0.55460871 -0.21532761
  -1.00863777]
 [ 0.          0.          0.         ...  3.91112864 -0.21532761
   0.9914362 ]]


# Machine Learning Models

## Linear Support Vector Classifier

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
clf = svm.SVC(kernel='linear')
clf.fit(x_train_std, y_train)
y_pred = clf.predict(x_test_std)
print(accuracy_score(y_test,y_pred))



0.678125


## Naive Bayes Classifier

In [None]:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(x_train_std,y_train)y_pred = clf.predict(x_test_std)
print (accuracy_score(y_test,y_pred))

0.553125


## KNeighborsClassifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier(n_neighbors=10)
clf.fit(x_train_std,y_train)
y_pred = clf.predict(x_test_std)
print (accuracy_score(y_test,y_pred))

0.603125


## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()
clf.fit(x_train_std,y_train)
y_pred = clf.predict(x_test_std)
print (accuracy_score(y_test,y_pred))

0.68125


# Hyperparameter Tuning

## Tuning Linear SVC

In [None]:
clf = svm.SVC(kernel='linear')
clf.fit(x_train_std,y_train)

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform
param_distributions = {"C": uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(clf, param_distributions, n_iter=10, verbose=2, cv=3)
rnd_search_cv.fit(x_train_std, y_train)

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] END .................................C=3.93889725208372; total time=   0.1s
[CV] END .................................C=3.93889725208372; total time=   0.1s
[CV] END .................................C=3.93889725208372; total time=   0.0s
[CV] END .................................C=9.65622790121861; total time=   0.1s
[CV] END .................................C=9.65622790121861; total time=   0.1s
[CV] END .................................C=9.65622790121861; total time=   0.1s
[CV] END ...............................C=2.5359394353820246; total time=   0.0s
[CV] END ...............................C=2.5359394353820246; total time=   0.0s
[CV] END ...............................C=2.5359394353820246; total time=   0.0s
[CV] END ................................C=3.263786453450758; total time=   0.1s
[CV] END ................................C=3.263786453450758; total time=   0.0s
[CV] END ................................C=3.263

RandomizedSearchCV(cv=3, estimator=SVC(kernel='linear'),
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fb769f88fd0>},
                   verbose=2)

In [None]:
rnd_search_cv.best_estimator_

SVC(C=4.507685232627908, kernel='linear')

In [None]:
rnd_search_cv.best_score_

0.6489336749092002

In [None]:
rnd_search_cv.best_estimator_.fit(x_train_std, y_train)

SVC(C=4.507685232627908, kernel='linear')

In [None]:
y_pred = rnd_search_cv.best_estimator_.predict(x_test_std)
accuracy_score(y_test, y_pred)

0.678125

## Tuning Logistic Regression

In [None]:
clf = LogisticRegression()
clf.fit(x_train_std,y_train)

In [None]:
from sklearn.model_selection import GridSearchCV
c_space = np.logspace(-5, 8, 15)
param_grid = {'C': c_space}
logreg_cv = GridSearchCV(clf, param_grid, cv = 5)
logreg_cv.fit(x_train_std, y_train)

GridSearchCV(cv=5, estimator=LogisticRegression(),
             param_grid={'C': array([1.00000000e-05, 8.48342898e-05, 7.19685673e-04, 6.10540230e-03,
       5.17947468e-02, 4.39397056e-01, 3.72759372e+00, 3.16227766e+01,
       2.68269580e+02, 2.27584593e+03, 1.93069773e+04, 1.63789371e+05,
       1.38949549e+06, 1.17876863e+07, 1.00000000e+08])})

In [None]:
logreg_cv.best_estimator_.fit(x_train_std, y_train)

LogisticRegression(C=3.727593720314938)

In [None]:
y_pred = logreg_cv.best_estimator_.predict(x_test_std)
accuracy_score(y_test, y_pred)

0.68125

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform
param_distributions = {"C": uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(clf, param_distributions, n_iter=10, verbose=2, cv=3)
rnd_search_cv.fit(x_train_std, y_train)

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] END ................................C=8.031788449927515; total time=   0.0s
[CV] END ................................C=8.031788449927515; total time=   0.0s
[CV] END ................................C=8.031788449927515; total time=   0.0s
[CV] END ...............................C=2.0090924817058164; total time=   0.0s
[CV] END ...............................C=2.0090924817058164; total time=   0.0s
[CV] END ...............................C=2.0090924817058164; total time=   0.0s
[CV] END ................................C=6.029083296345009; total time=   0.0s
[CV] END ................................C=6.029083296345009; total time=   0.0s
[CV] END ................................C=6.029083296345009; total time=   0.0s
[CV] END ................................C=5.898408204262755; total time=   0.0s
[CV] END ................................C=5.898408204262755; total time=   0.0s
[CV] END ................................C=5.898

RandomizedSearchCV(cv=3, estimator=LogisticRegression(),
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fb75fd1b3d0>},
                   verbose=2)

In [None]:
rnd_search_cv.best_estimator_.fit(x_train_std, y_train)

LogisticRegression(C=2.0090924817058164)

In [None]:
y_pred = rnd_search_cv.best_estimator_.predict(x_test_std)
accuracy_score(y_test, y_pred)

0.68125

#Evaluation Metrics

In [None]:
from sklearn.metrics import precision_score, f1_score, confusion_matrix

#Results

In [None]:
print("Accuracy score:",accuracy_score(y_test, y_pred))
print("Precision score:",precision_score(y_test, y_pred))
print("F1 score:",f1_score(y_test, y_pred))
print("Confusion Matrix:",confusion_matrix(y_test, y_pred))

Accuracy score: 0.68125
Precision score: 0.6710526315789473
F1 score: 0.6666666666666666
Confusion Matrix: [[116  50]
 [ 52 102]]
