#Emotional Analysis of Student Feedback from RMP data

##Preprocessing and Set-Up

In [None]:
!pip install pandas matplotlib seaborn vaderSentiment transformers vertexai langchain_google_vertexai scikit-learn google-auth google-auth-oauthlib

Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Collecting vertexai
  Downloading vertexai-1.71.1-py3-none-any.whl.metadata (10 kB)
Collecting langchain_google_vertexai
  Downloading langchain_google_vertexai-2.0.8-py3-none-any.whl.metadata (3.8 kB)
[0mCollecting google-cloud-storage<3.0.0dev,>=1.32.0 (from google-cloud-aiplatform==1.71.1->google-cloud-aiplatform[all]==1.71.1->vertexai)
  Downloading google_cloud_storage-2.18.2-py2.py3-none-any.whl.metadata (9.1 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain_google_vertexai)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading vertexai-1.71.1-py3-none-any.whl (7.3 kB)
Downloading langchain_google_vertexai-2.0.8-py3-none-any.whl (90 kB)
[2K   [90m━━━━━━━━━━━━━━━━

In [None]:
# Mount Google Drive and load data
from google.colab import drive
import pandas as pd
from transformers import pipeline
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from langchain_google_vertexai import ChatVertexAI
from vertexai import init
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Mount Google Drive
drive.mount('/content/drive')

# Define file paths
data_dir = '/content/drive/MyDrive/2024/Fall 2024/CSCI 5541/CSCI 5541 Final Project/'
file_path = f"{data_dir}UMN_CS_Professors_Ratings.csv"

# Load data
data = pd.read_csv(file_path)
print(data.head())


Mounted at /content/drive
                Name  Teacher ID        Department  Rating Difficulty  \
0      Phillip Barry      410470  Computer Science     3.5        62%   
1       John Bartucz     2977279  Computer Science     4.7       100%   
2       Daniel Boley      309254  Computer Science     2.2         0%   
3  Stevie Chancellor     2783392  Computer Science     3.7        67%   
4   Abhishek Chandra     1692316  Computer Science     4.1        83%   

   Total Ratings Would Take Again Rating Class  \
0             45              62%         good   
1              3             100%         good   
2             24               0%         poor   
3              3              67%         good   
4             46              83%         good   

                                            Comments  \
0  {'CSCI2011': ["2011 was harder than it should'...   
1  {'CI4311W': ['Professor is an understanding gu...   
2  {'CSCI5302': ['Examples are given in class, bu...   
3  {'CSCI5

In [None]:
# Flatten comments into a structured DataFrame
def preprocess_comments(data):
    comments_data = []
    for _, row in data.iterrows():
        professor = row["Name"]
        courses = eval(row["Comments"])
        for course, comments in courses.items():
            for comment in comments:
                comments_data.append({
                    "Professor": professor,
                    "Course": course,
                    "Comment": comment,
                    "Rating": row["Rating"],
                    "Difficulty": row["Difficulty"],
                    "Top Tags": eval(row["Top Tags"])
                })
    return pd.DataFrame(comments_data)

# Preprocess data
comments_df = preprocess_comments(data)
print(comments_df.head())


       Professor    Course                                            Comment  \
0  Phillip Barry  CSCI2011  2011 was harder than it should've been. I foun...   
1  Phillip Barry  CSCI2011  I wholeheartedly agree with the previous revie...   
2  Phillip Barry  CSCI2011  He's a good professor and lecturer, it's just ...   
3  Phillip Barry  CSCI2011  He is a great lecturer, but the class itself i...   
4  Phillip Barry  CSCI2011  I liked this class. I think you'll dislike it ...   

   Rating Difficulty                                           Top Tags  
0     3.5        62%  [Lots of homework, Get ready to read, Clear gr...  
1     3.5        62%  [Lots of homework, Get ready to read, Clear gr...  
2     3.5        62%  [Lots of homework, Get ready to read, Clear gr...  
3     3.5        62%  [Lots of homework, Get ready to read, Clear gr...  
4     3.5        62%  [Lots of homework, Get ready to read, Clear gr...  


##Sentiment Analysis

In [None]:
# Initialize VADER
analyzer = SentimentIntensityAnalyzer()

# Function for sentiment analysis
def analyze_sentiment(comment):
    scores = analyzer.polarity_scores(comment)
    return scores["compound"], scores["pos"], scores["neu"], scores["neg"]

# Perform sentiment analysis
def add_sentiment_analysis(comments_df):
    print("Starting sentiment analysis...")
    comments_df[["Sentiment_Score", "Positive", "Neutral", "Negative"]] = comments_df["Comment"].apply(
        lambda x: pd.Series(analyze_sentiment(x))
    )
    print("Sentiment analysis complete.")
    return comments_df

comments_df = add_sentiment_analysis(comments_df)

Starting sentiment analysis...
Sentiment analysis complete.


In [None]:
# Print all unique professors in comments_df
print("List of professors in the dataset:")
print(comments_df["Professor"].unique())

List of professors in the dataset:
['Phillip Barry' 'John Bartucz' 'Daniel Boley' 'Stevie Chancellor'
 'Abhishek Chandra' 'Zhu-Tian Chen' 'Yao-Yi Chiang' 'Karthik Desingh'
 'Chris Dovolis' 'Andrew Exley' 'Kuen-Bang Favonia' 'Mattia Fazzini'
 'Samuel Fountain' 'Shonal Gangopadhyay' 'Maria Gini' 'Stephen Guy'
 'Mats Heimdahl' 'Nick Hopper' 'Victoria Interrante' 'Volkan Isler'
 'Ravi Janardan' 'Elizabeth Jensen' 'George Karypis' 'Daniel Keefe'
 'Daniel Kluver' 'Dan Knights' 'Jack Kolb' 'Joseph Konstan' 'Rui Kuang'
 'Vipin Kumar' 'Kangjie Lu' 'Stephen McCamant' 'James Moen'
 'Mohamed Mokbel' 'Chad Myers' 'Gopalan Nadathur' 'Daniel Orban'
 'Nikolaos Papanikolopoulos' 'Hyun Soo Park' 'Yousef Saad' 'Junaed Sattar'
 'Paul Schrater' 'Shashi Shekhar' 'Jaideep Srivastava' 'Carl Sturtivant'
 'Ju Sun' 'Nathan Taylor' 'Loren Terveen' 'Anand Tripathi' 'Eric Van Wyk'
 'Shana Watters' 'Jon Weissman' 'Kevin Wendt' 'Pen-Chung Yew'
 'Antonia Zhai' 'Zhi-Li Zhang' 'Catherine Zhao']


##Emotion Analysis

In [None]:
emotion_classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", return_all_scores=True)

def detect_emotions(comment):
    emotions = emotion_classifier(comment)
    return {e['label']: e['score'] for e in emotions[0]}

def add_emotion_analysis(comments_df):
    print("Starting emotion analysis...")
    comments_df["Emotions"] = comments_df["Comment"].apply(detect_emotions)

    print("Flattening emotions into separate columns...")
    emotion_cols = ["anger", "disgust", "fear", "joy", "neutral", "sadness", "surprise"]
    for col in emotion_cols:
        comments_df[col] = comments_df["Emotions"].apply(lambda x: x.get(col, 0))
    return comments_df

comments_df = add_emotion_analysis(comments_df)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.00k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/329M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/294 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]



Starting emotion analysis...
Flattening emotions into separate columns...


##Aggregate

In [None]:
# Group data by professor for sentiment and emotion aggregation
def group_by_professor(comments_df):
    print("Grouping data by professor...")
    numeric_columns = ["Sentiment_Score", "Positive", "Neutral", "Negative", "anger", "disgust", "fear", "joy", "neutral", "sadness", "surprise"]
    professor_grouped = comments_df.groupby("Professor")[numeric_columns].mean().reset_index()
    return professor_grouped

professor_grouped = group_by_professor(comments_df)
print(professor_grouped.head())


Grouping data by professor...
          Professor  Sentiment_Score  Positive  Neutral  Negative     anger  \
0  Abhishek Chandra          0.60867   0.17815  0.76300   0.05895  0.014981   
1    Anand Tripathi          0.29132   0.15690  0.76240   0.08070  0.074643   
2      Andrew Exley          0.54773   0.15260  0.80620   0.04105  0.075601   
3      Antonia Zhai         -0.03522   0.07570  0.76455   0.15970  0.075481   
4   Carl Sturtivant          0.37910   0.14475  0.80180   0.05340  0.057705   

    disgust      fear       joy   neutral   sadness  surprise  
0  0.130990  0.038063  0.118623  0.566919  0.070123  0.060300  
1  0.259356  0.039078  0.148765  0.435558  0.029413  0.013187  
2  0.092889  0.049670  0.320618  0.356923  0.025684  0.078615  
3  0.263261  0.146732  0.042650  0.374780  0.052483  0.044613  
4  0.149299  0.008558  0.058504  0.554638  0.102434  0.068862  


In [None]:
output_dir = '/content/drive/MyDrive/2024/Fall 2024/CSCI 5541/CSCI 5541 Final Project/Professor_Visualizations'
os.makedirs(output_dir, exist_ok=True)


emotion_cols = ["anger", "disgust", "fear", "joy", "neutral", "sadness", "surprise"]

# Save grouped data and generate visualizations
def save_visualizations_and_csv(professor_grouped):
    # Save the CSV file
    csv_path = os.path.join(output_dir, "Professor_Grouped_Sentiment_Emotion.csv")
    professor_grouped.to_csv(csv_path, index=False)
    print(f"Grouped data saved to {csv_path}")

    # Create a folder for visualizations
    vis_dir = os.path.join(output_dir, "Visualizations")
    os.makedirs(vis_dir, exist_ok=True)
    print(f"Saving visualizations to {vis_dir}...")

    for _, row in professor_grouped.iterrows():
        professor_name = row["Professor"]
        sentiment_values = [row["Positive"], row["Negative"]]
        emotion_values = [row[emotion] for emotion in emotion_cols]

        # Bar chart for sentiment analysis
        plt.figure(figsize=(8, 6))
        plt.bar(["Positive", "Negative"], sentiment_values, color=["green", "red"])
        plt.title(f"Sentiment Analysis for {professor_name}", fontsize=16)
        plt.ylabel("Scores")
        plt.xlabel("Sentiment")
        sentiment_chart_path = os.path.join(vis_dir, f"{professor_name}_Sentiment_BarChart.png")
        plt.savefig(sentiment_chart_path)
        plt.close()

        # # Pie chart for emotion analysis
        plt.figure(figsize=(10, 8))
        plt.pie(
            emotion_values,
            labels=emotion_cols,
            autopct="%1.1f%%",
            startangle=140,
            colors=plt.cm.tab10.colors
        )
        plt.title(f"Emotion Distribution for {professor_name}", fontsize=16)
        emotion_chart_path = os.path.join(vis_dir, f"{professor_name}_Emotion_PieChart.png")
        plt.savefig(emotion_chart_path)
        plt.close()

    print(f"All visualizations saved successfully in {vis_dir}.")

# Save data and generate visualizations
save_visualizations_and_csv(professor_grouped)

Grouped data saved to /content/drive/MyDrive/2024/Fall 2024/CSCI 5541/CSCI 5541 Final Project/Professor_Visualizations/Professor_Grouped_Sentiment_Emotion.csv
Saving visualizations to /content/drive/MyDrive/2024/Fall 2024/CSCI 5541/CSCI 5541 Final Project/Professor_Visualizations/Visualizations...
All visualizations saved successfully in /content/drive/MyDrive/2024/Fall 2024/CSCI 5541/CSCI 5541 Final Project/Professor_Visualizations/Visualizations.


##Generate Actionable Insights

In [None]:
from getpass import getpass
import os
from vertexai import init
from langchain_google_vertexai import ChatVertexAI

In [None]:
def initialize_gemini():
    service_account_path = "/content/drive/MyDrive/2024/Fall 2024/CSCI 5541/CSCI 5541 Final Project/gen-lang-client-0030563685-bafb4c97b3ee.json"
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = service_account_path

    project_id = "gen-lang-client-0030563685"
    os.environ["GOOGLE_CLOUD_PROJECT"] = project_id
    init(project=project_id, location="us-central1")

    os.environ["GOOGLE_API_KEY"] = ""

    try:
        gemini_model = ChatVertexAI(model="gemini-1.5-pro-002")
        print("Gemini model initialized successfully.")
        return gemini_model

    except Exception as e:
        print(f"Error initializing model: {e}")

gemini_model = initialize_gemini()
print(gemini_model)

Gemini model initialized successfully.
project='gen-lang-client-0030563685' model_name='gemini-1.5-pro-002' full_model_name='projects/gen-lang-client-0030563685/locations/us-central1/publishers/google/models/gemini-1.5-pro-002' client_options=ClientOptions: {'api_endpoint': 'us-central1-aiplatform.googleapis.com', 'client_cert_source': None, 'client_encrypted_cert_source': None, 'quota_project_id': None, 'credentials_file': None, 'scopes': None, 'api_key': None, 'api_audience': None, 'universe_domain': None} default_metadata=() model_family=<GoogleModelFamily.GEMINI_ADVANCED: '2'>


In [None]:
def generate_actionable_insights(professor_name, sentiment_emotion_data, rmp_feedback, num_ratings):
    """
    Generate actionable insights for a professor using Gemini.
    """
    prompt = f"""
    You are tasked with analyzing professor feedback to provide actionable insights for improving teaching methods.
    Use the following data for Professor {professor_name}:
    Sentiment and Emotion Data: {sentiment_emotion_data}
    RMP Feedback: {rmp_feedback}
    Also, be mindful of the how accurate these insights would be given the total number of ratings given to this professor.
    You shouldn't overfit on very few ratings.
    Total number of ratings given to this professor: {num_ratings}
    Provide a structured summary of actionable insights.
    """
    try:
        response = gemini_model.invoke([{"role": "user", "content": prompt}])

        if hasattr(response, "content"):
            return response.content
        else:
            print("Unexpected response format.")
            return "Insights could not be generated."

    except Exception as e:
        print(f"Error generating insights: {e}")
        return "Insights could not be generated."


In [None]:
def generate_and_save_insights(comments_df, professor_grouped, insights_folder):
    os.makedirs(insights_folder, exist_ok=True)  # Ensure the folder exists

    # Process comments DataFrame: Flatten nested comments
    flattened_comments = []
    for _, row in comments_df.iterrows():
        professor_name = row["Name"]  # Professor's name from the comments file
        total_ratings = row.get("Total Ratings", 0)  # Get the 'Total Ratings' column; default to 0 if missing

        if isinstance(row["Comments"], str):
            courses = eval(row["Comments"])
            for course, comments in courses.items():
                for comment in comments:
                    flattened_comments.append({
                        "Professor": professor_name,
                        "Comment": comment,
                        "Total Ratings": total_ratings
                    })

    flattened_comments_df = pd.DataFrame(flattened_comments)

    # Ensure professor_grouped has 'Total Ratings'
    if "Total Ratings" not in professor_grouped.columns:
        total_ratings_mapping = (
            flattened_comments_df.groupby("Professor")["Total Ratings"]
            .first()  # Assuming all rows for a professor have the same Total Ratings
            .to_dict()
        )
        professor_grouped["Total Ratings"] = professor_grouped["Professor"].map(total_ratings_mapping)

    # Process grouped DataFrame
    professor_grouped["Professor"] = professor_grouped["Professor"].str.strip()

    for professor_name in professor_grouped["Professor"]:
        print(f"Generating actionable insights for Professor {professor_name}...")

        # Prepare data
        sentiment_emotion_data = professor_grouped[professor_grouped["Professor"] == professor_name].to_dict(orient="records")
        rmp_feedback = " ".join(flattened_comments_df[flattened_comments_df["Professor"] == professor_name]["Comment"].tolist())

        # Ensure 'Total Ratings' exists for the professor
        num_ratings = professor_grouped.loc[professor_grouped["Professor"] == professor_name, "Total Ratings"].values[0]

        # Skip if no feedback is found
        if not rmp_feedback:
            print(f"No comments found for Professor {professor_name}. Skipping...")
            continue

        # Generate insights
        insights = generate_actionable_insights(professor_name, sentiment_emotion_data, rmp_feedback, num_ratings)

        # Save insights to a file
        file_name = f"{professor_name.replace(' ', '_')}_insight.txt"
        file_path = os.path.join(insights_folder, file_name)
        with open(file_path, "w") as f:
            f.write(insights)
        print(f"Saved insights for Professor {professor_name} to {file_path}")

In [None]:
comments_df_path = "/content/drive/MyDrive/2024/Fall 2024/CSCI 5541/CSCI 5541 Final Project/UMN_CS_Professors_Ratings.csv"
professor_grouped_path = "/content/drive/MyDrive/2024/Fall 2024/CSCI 5541/CSCI 5541 Final Project/Professor_Visualizations/Professor_Grouped_Sentiment_Emotion.csv"

comments_df = pd.read_csv(comments_df_path)
professor_grouped = pd.read_csv(professor_grouped_path)

insights_folder = "/content/drive/MyDrive/2024/Fall 2024/CSCI 5541/CSCI 5541 Final Project/Professor_Visualizations/Insights"
generate_and_save_insights(comments_df, professor_grouped, insights_folder)

Generating actionable insights for Professor Abhishek Chandra...
Saved insights for Professor Abhishek Chandra to /content/drive/MyDrive/2024/Fall 2024/CSCI 5541/CSCI 5541 Final Project/Professor_Visualizations/Insights/Abhishek_Chandra_insight.txt
Generating actionable insights for Professor Anand Tripathi...
Saved insights for Professor Anand Tripathi to /content/drive/MyDrive/2024/Fall 2024/CSCI 5541/CSCI 5541 Final Project/Professor_Visualizations/Insights/Anand_Tripathi_insight.txt
Generating actionable insights for Professor Andrew Exley...
Saved insights for Professor Andrew Exley to /content/drive/MyDrive/2024/Fall 2024/CSCI 5541/CSCI 5541 Final Project/Professor_Visualizations/Insights/Andrew_Exley_insight.txt
Generating actionable insights for Professor Antonia Zhai...
Saved insights for Professor Antonia Zhai to /content/drive/MyDrive/2024/Fall 2024/CSCI 5541/CSCI 5541 Final Project/Professor_Visualizations/Insights/Antonia_Zhai_insight.txt
Generating actionable insights for

#Accuracy Testing

## Emotion and Sentiment Analysis -- Accuracy
Only doing this for three professors because human annotations can be expensive

In [None]:
data_dir = '/content/drive/MyDrive/2024/Fall 2024/CSCI 5541/CSCI 5541 Final Project/'
results_dir = f"{data_dir}Results/"
os.makedirs(results_dir, exist_ok=True)

# Load human-annotated data
human_eval_akansha = pd.read_csv(f"{data_dir}Professor Comment Anaylsis Human Evaluation - Akansha.csv")
human_eval_rimika = pd.read_csv(f"{data_dir}Professor Comment Anaylsis Human Evaluation - Rimika.csv")

human_eval_df = pd.concat([human_eval_akansha, human_eval_rimika], ignore_index=True)

emotion_columns = ["Anger", "Disgust", "Fear", "Joy", "Neutral", "Sadness", "Surprise"]
human_eval_df[emotion_columns] = human_eval_df[emotion_columns].astype(int)

# Group human annotations by professor
grouped_human_df = human_eval_df.groupby("Professor Name")[emotion_columns].mean().reset_index()

print("\nGrouped Human Annotation DataFrame:")
print(grouped_human_df.head())

comments_df_path = f"{data_dir}Professor_Visualizations/Professor_Grouped_Sentiment_Emotion.csv"
comments_df = pd.read_csv(comments_df_path)
print("\nComments DataFrame:")
print(comments_df.head())


In [None]:
grouped_human_df = grouped_human_df.groupby("Professor Name").mean().reset_index()
comments_df = comments_df.groupby("Professor").mean().reset_index()

merged_df = pd.merge(
    grouped_human_df,
    comments_df,
    left_on="Professor Name",
    right_on="Professor",
    suffixes=("_human", "_predicted"),
    how="inner"
)
merged_df.rename(
    columns={
        "Anger": "human_anger",
        "Disgust": "human_disgust",
        "Fear": "human_fear",
        "Joy": "human_joy",
        "Neutral_human": "human_neutral",
        "Sadness": "human_sadness",
        "Surprise": "human_surprise",
        "anger": "predicted_anger",
        "disgust": "predicted_disgust",
        "fear": "predicted_fear",
        "joy": "predicted_joy",
        "neutral": "predicted_neutral",
        "sadness": "predicted_sadness",
        "surprise": "predicted_surprise",
    },
    inplace=True
)

print("\nUnique Human Annotated Professors:", grouped_human_df["Professor Name"].nunique())
print("Unique Predicted Professors:", comments_df["Professor"].nunique())
print("\nMerged DataFrame:")
print(len(merged_df))
print(merged_df.columns)


In [None]:
def calculate_accuracy(merged_df, threshold=0.2, leniency=0.5):
    """
    Calculate accuracy for each emotion and overall accuracy with leniency.

    Args:
        merged_df: DataFrame containing human annotations and predictions.
        threshold: Allowed difference between human and predicted values.
        leniency: Minimum proportion of correct emotions per row to count as correct.

    Returns:
        accuracy_scores: Dictionary with per-emotion and overall accuracies.
    """
    accuracy_scores = {}
    emotions = ["anger", "disgust", "fear", "joy", "neutral", "sadness", "surprise"]
    total_samples = len(merged_df)

    for emotion in emotions:
        human_col = f"human_{emotion}"
        predicted_col = f"predicted_{emotion}"

        if human_col in merged_df.columns and predicted_col in merged_df.columns:
            correct_predictions = np.sum(
                np.abs(merged_df[human_col] - merged_df[predicted_col]) <= threshold
            )
            accuracy_scores[emotion] = correct_predictions / total_samples
        else:
            print(f"Columns for {emotion} not found in DataFrame.")

    # Overall accuracy: At least `leniency` proportion of emotions correct per row
    correct_rows = 0
    for _, row in merged_df.iterrows():
        correct_count = sum(
            np.abs(row[f"human_{emotion}"] - row[f"predicted_{emotion}"]) <= threshold
            for emotion in emotions
        )
        if correct_count / len(emotions) >= leniency:
            correct_rows += 1

    overall_accuracy = correct_rows / total_samples
    accuracy_scores["overall"] = overall_accuracy

    return accuracy_scores


# Calculate accuracy for all emotions with leniency
accuracy_scores = calculate_accuracy(merged_df, threshold=0.2, leniency=0.5)

# Print accuracy scores
print("\nAccuracy Scores:")
for emotion, accuracy in accuracy_scores.items():
    print(f"{emotion.capitalize()} Accuracy: {accuracy:.2f}")

# Save results to the results directory
results_path = f"{results_dir}accuracy_scores.csv"
pd.DataFrame(list(accuracy_scores.items()), columns=["Metric", "Accuracy"]).to_csv(results_path, index=False)
print(f"\nAccuracy scores saved to: {results_path}")


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

def visualize_accuracy(accuracy_scores, results_dir):
    """
    Visualize and save accuracy scores as a bar plot.

    Args:
        accuracy_scores: Dictionary with accuracy scores for each emotion and overall accuracy.
        results_dir: Directory path to save the visualization.
    """
    # Convert accuracy scores to DataFrame for easier manipulation
    accuracy_df = pd.DataFrame(list(accuracy_scores.items()), columns=["Metric", "Accuracy"])

    # Bar Plot of Accuracy Scores
    plt.figure(figsize=(10, 6))
    sns.barplot(data=accuracy_df, x="Accuracy", y="Metric", palette="viridis")
    plt.title("Emotion Accuracy Scores", fontsize=16)
    plt.xlabel("Accuracy", fontsize=14)
    plt.ylabel("Emotion", fontsize=14)
    plt.xlim(0, 1)  # Accuracy ranges from 0 to 1
    plt.tight_layout()

    # Save the plot
    plot_path = f"{results_dir}emotion_accuracy_plot.png"
    plt.savefig(plot_path)
    plt.close()
    print(f"Accuracy visualization saved to: {plot_path}")

# Ensure the results directory exists
os.makedirs(results_dir, exist_ok=True)

# Visualize accuracy scores
visualize_accuracy(accuracy_scores, results_dir)

# Save results to the results directory as CSV
results_path = f"{results_dir}accuracy_scores.csv"
pd.DataFrame(list(accuracy_scores.items()), columns=["Metric", "Accuracy"]).to_csv(results_path, index=False)
print(f"\nAccuracy scores saved to: {results_path}")


##Archive

In [None]:
def initialize_gemini():
    service_account_path = "/content/drive/MyDrive/2024/Fall 2024/CSCI 5541/CSCI 5541 Final Project/gen-lang-client-0030563685-bafb4c97b3ee.json"
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = service_account_path

    project_id = "gen-lang-client-0030563685"
    os.environ["GOOGLE_CLOUD_PROJECT"] = project_id
    init(project=project_id, location="us-central1")

    os.environ["GOOGLE_API_KEY"] = "AIzaSyBIktTmkjw_6R7DuJmT0vOqUvpl9n8YPCQ"

    try:
        gemini_model = ChatVertexAI(model="gemini-1.5-pro-002")
        print("Gemini model initialized successfully.")
        return gemini_model

    except Exception as e:
        print(f"Error initializing model: {e}")

gemini_model = initialize_gemini()
print(gemini_model)

def generate_actionable_insights(professor_name, sentiment_emotion_data, rmp_feedback):
    """
    Generate actionable insights for a professor using Gemini.
    """
    prompt = f"""
    You are tasked with analyzing professor feedback to provide actionable insights for improving teaching methods.
    Use the following data for Professor {professor_name}:
    Sentiment and Emotion Data: {sentiment_emotion_data}
    RMP Feedback: {rmp_feedback}
    Provide a structured summary of actionable insights.
    """
    try:
        response = gemini_model.invoke([{"role": "user", "content": prompt}])

        print("Full Response:")
        print(response)

        if hasattr(response, "content"):
            return response.content
        else:
            print("Unexpected response format.")
            return "Insights could not be generated."

    except Exception as e:
        print(f"Error generating insights: {e}")
        return "Insights could not be generated."

# def generate_insights_for_professor(professor_name):
#     sentiment_emotion_data = professor_grouped[professor_grouped["Professor"] == professor_name].to_dict(orient="records")
#     rmp_feedback = " ".join(comments_df[comments_df["Professor"] == professor_name]["Comment"].tolist())
#     return generate_actionable_insights(professor_name, sentiment_emotion_data, rmp_feedback)

# professor_name = "Maria Gini"
# insights = generate_insights_for_professor(professor_name)
# print(insights)


Gemini model initialized successfully.
project='gen-lang-client-0030563685' model_name='gemini-1.5-pro-002' full_model_name='projects/gen-lang-client-0030563685/locations/us-central1/publishers/google/models/gemini-1.5-pro-002' client_options=ClientOptions: {'api_endpoint': 'us-central1-aiplatform.googleapis.com', 'client_cert_source': None, 'client_encrypted_cert_source': None, 'quota_project_id': None, 'credentials_file': None, 'scopes': None, 'api_key': None, 'api_audience': None, 'universe_domain': None} default_metadata=() model_family=<GoogleModelFamily.GEMINI_ADVANCED: '2'>


In [None]:
def generate_and_save_insights(comments_df, professor_grouped, insights_folder):
    os.makedirs(insights_folder, exist_ok=True)  # Ensure the folder exists

    for professor_name in professor_grouped["Professor"]:
        print(f"Generating actionable insights for Professor {professor_name}...")

        # Prepare data
        sentiment_emotion_data = professor_grouped[professor_grouped["Professor"] == professor_name].to_dict(orient="records")
        rmp_feedback = " ".join(comments_df[comments_df["Professor"] == professor_name]["Comment"].tolist())

        # Generate insights
        insights = generate_actionable_insights(professor_name, sentiment_emotion_data, rmp_feedback)

        # Save insights to a file
        file_name = f"{professor_name.replace(' ', '_')}_insight.txt"
        file_path = os.path.join(insights_folder, file_name)
        with open(file_path, "w") as f:
            f.write(insights)
        print(f"Saved insights for Professor {professor_name} to {file_path}")

# Example Usage
comments_df_path = "/content/drive/MyDrive/2024/Fall 2024/CSCI 5541/CSCI 5541 Final Project/UMN_CS_Professors_Ratings.csv"
professor_grouped_path = "/content/drive/MyDrive/2024/Fall 2024/CSCI 5541/CSCI 5541 Final Project/Professor_Visualizations/Professor_Grouped_Sentiment_Emotion.csv"

comments_df = pd.read_csv(comments_df_path)
professor_grouped = pd.read_csv(professor_grouped_path)

insights_folder = "/content/drive/MyDrive/2024/Fall 2024/CSCI 5541/CSCI 5541 Final Project/Professor_Visualizations/Insights"
generate_and_save_insights(comments_df, professor_grouped, insights_folder)