In [None]:
# random forest
import json
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import datetime

In [None]:
# start time
start_time = datetime.datetime.now()

# load the reviews from the JSON file into a Pandas DataFrame
reviews = []
with open('goodreads_reviews_young_adult.json', 'r') as f:
    for line in f:
        review = json.loads(line)
        reviews.append((review['review_text'], review['rating']))
df = pd.DataFrame(reviews, columns=['text', 'rating'])

In [None]:
# define a function to map ratings to sentiment labels
def map_rating_to_sentiment(rating):
    if rating >= 3:
        return 'positive'
    else:
        return 'negative'

In [None]:
# map the ratings to sentiment labels
df['sentiment'] = df['rating'].apply(map_rating_to_sentiment)

# split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['sentiment'], test_size=0.2, random_state=42)

# vectorize the text data using a bag-of-words model
vectorizer = CountVectorizer(stop_words='english')
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [None]:
# train a random forest classifier on the training data
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_vec, y_train)

# evaluate the performance of the classifier on the testing data
accuracy = clf.score(X_test_vec, y_test)
print("Accuracy of the classifier on the testing set: {:.2f}%".format(accuracy * 100))

# end time
end_time = datetime.datetime.now()

# total time
delta_time = end_time - start_time
print(f"Elapsed time: {delta_time.total_seconds()} seconds")