In [None]:

#Install and Import Libraries

import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import torch
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments

np.random.seed(42)
torch.manual_seed(42)


# Get Professor Reviews from Planet Terp API

# Choose 5 professors (you can change these)
professors = ["Ilchul Yoon","Elias Gonzalez","Roohollah Ebrahimian","Larry Herman",
    "Timothy Pilachowski"]

def get_professor_reviews(professor_name):
    """Fetch reviews for a professor from Planet Terp API"""
    url = "https://planetterp.com/api/v1/professor"
    params = {"name": professor_name,
        "reviews": "true"}

    response = requests.get(url, params=params)
    data = response.json()

    return data

# Collect all reviews
all_reviews = []

for prof in professors:
    data = get_professor_reviews(prof)

    # Extract reviews if they exist
    if 'reviews' in data and data['reviews']:
        for review in data['reviews']:
            all_reviews.append({'professor': prof,
                'review': review['review'],
                'rating': review['rating']})

# Create DataFrame
df = pd.DataFrame(all_reviews)

print(f"\nCollected {len(df)} reviews total!")
print(f"\nDataset preview:")
print(df.head())

print(f"\nRating distribution:")
print(df['rating'].value_counts().sort_index())

# Save raw data
df.to_csv('professor_reviews.csv', index=False)


#Prepare Data for Training


df = df.dropna(subset=['review', 'rating'])

# Convert ratings to integers (1-5)
df['rating'] = df['rating'].astype(int)

# Only keep ratings 1-5
df = df[df['rating'].between(1, 5)]

# Create labels (0-indexed for model: 0=1star, 1=2star, etc.)
df['label'] = df['rating'] - 1

print(f"\n✓ Cleaned dataset: {len(df)} reviews")

# Split into train and test sets (80/20)
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

print(f"Training set: {len(train_df)} reviews")
print(f"Test set: {len(test_df)} reviews")


# Load tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased',num_labels=5)


#Tokenize the Reviews


# Tokenize training data
train_encodings = tokenizer(list(train_df['review']),
    truncation=True,
    padding=True,
    max_length=512)

# Tokenize test data
test_encodings = tokenizer(
    list(test_df['review']),
    truncation=True,
    padding=True,
    max_length=512)


#Create PyTorch Datasets

class ReviewDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Create datasets
train_dataset = ReviewDataset(train_encodings, list(train_df['label']))
test_dataset = ReviewDataset(test_encodings, list(test_df['label']))


# Set up training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=100,
    weight_decay=0.01,
    save_steps=500,)

# Create Trainer
trainer = Trainer(model=model,args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,)

# Train the model
trainer.train()


# Save the fine-tuned model
model.save_pretrained('./fine_tuned_model')
tokenizer.save_pretrained('./fine_tuned_model')


# Get predictions
predictions = trainer.predict(test_dataset)
pred_labels = np.argmax(predictions.predictions, axis=1)

# Convert back to star ratings (add 1)
predicted_stars = pred_labels + 1
actual_stars = test_df['rating'].values

# Calculate accuracy
accuracy = accuracy_score(actual_stars, predicted_stars)
print(f"\nAccuracy: {accuracy:.2%}")

#Visualizations

print("\nCreating visualizations...")

# 1. Confusion Matrix
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Confusion matrix
cm = confusion_matrix(actual_stars, predicted_stars, labels=[1, 2, 3, 4, 5])
im = axes[0].imshow(cm, cmap='Blues')
axes[0].set_title('Confusion Matrix', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Predicted Stars', fontsize=12)
axes[0].set_ylabel('Actual Stars', fontsize=12)
axes[0].set_xticks([0, 1, 2, 3, 4])
axes[0].set_yticks([0, 1, 2, 3, 4])
axes[0].set_xticklabels(['1', '2', '3', '4', '5'])
axes[0].set_yticklabels(['1', '2', '3', '4', '5'])

# Add numbers to confusion matrix
for i in range(5):
    for j in range(5):
        text = axes[0].text(j, i, cm[i, j],
                           ha="center", va="center", color="black", fontsize=10)

plt.colorbar(im, ax=axes[0])

# 2. Prediction Distribution Comparison
star_labels = ['1★', '2★', '3★', '4★', '5★']
x = np.arange(len(star_labels))
width = 0.35

actual_counts = [sum(actual_stars == i) for i in range(1, 6)]
predicted_counts = [sum(predicted_stars == i) for i in range(1, 6)]

axes[1].bar(x - width/2, actual_counts, width, label='Actual', color='#3498db', alpha=0.8)
axes[1].bar(x + width/2, predicted_counts, width, label='Predicted', color='#e74c3c', alpha=0.8)
axes[1].set_title('Actual vs Predicted Distribution', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Star Rating', fontsize=12)
axes[1].set_ylabel('Number of Reviews', fontsize=12)
axes[1].set_xticks(x)
axes[1].set_xticklabels(star_labels)
axes[1].legend()
axes[1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig('results_visualization.png', dpi=300, bbox_inches='tight')
plt.show()

print("✓ Visualizations saved!")

# 10: Show Example Predictions

print("\n Example Predictions:")
print("=" * 80)

sample_indices = np.random.choice(len(test_df), min(10, len(test_df)), replace=False)

for idx in sample_indices:
    review_text = test_df.iloc[idx]['review']
    actual = actual_stars[idx]
    predicted = predicted_stars[idx]

    # Truncate long reviews
    if len(review_text) > 100:
        review_text = review_text[:100] + "..."

    print(f"\nReview: {review_text}")
    print(f"Actual: {actual}★  |  Predicted: {predicted}★  {'✓' if actual == predicted else '✗'}")
    print("-" * 80)

#  11: Summary Statistics
# --------------------------------------------

print("\n Summary Statistics:")
print("=" * 80)
print(f"Total Reviews: {len(df)}")
print(f"Training Reviews: {len(train_df)}")
print(f"Test Reviews: {len(test_df)}")
print(f"Model Accuracy: {accuracy:.2%}")
print(f"Correct Predictions: {sum(actual_stars == predicted_stars)}/{len(actual_stars)}")

# Calculate accuracy by star rating
print("\nAccuracy by Star Rating:")
for star in range(1, 6):
    mask = actual_stars == star
    if sum(mask) > 0:
        star_accuracy = accuracy_score(actual_stars[mask], predicted_stars[mask])
        print(f"  {star}★: {star_accuracy:.2%} ({sum(mask)} reviews)")
