<a href="https://colab.research.google.com/github/mengjie514/Dynamic-Emotional-Messaging/blob/main/IBM_Tone_Analyzer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import standard libraries
import os
import warnings

# Import third-party libraries
import pandas as pd
import numpy as np

from google.colab import drive

# Configure Pandas display options
pd.set_option('display.max_colwidth', None)  # Set to None for unlimited column width

# Suppress specific warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Function to mount Google Drive and change working directory
def setup_environment(mount_point: str, target_directory: str):
    """
    Mounts Google Drive and changes the working directory.

    Parameters:
        mount_point (str): The mount point for Google Drive.
        target_directory (str): The directory path within Google Drive to navigate to.
    """
    drive.mount(mount_point)
    os.chdir(target_directory)

# Function to load and preprocess the dataset
def load_and_preprocess_data(file_name: str) -> pd.DataFrame:
    """
    Loads the CSV dataset, removes unnecessary columns, and resets the index.

    Parameters:
        file_name (str): The name of the CSV file to load.

    Returns:
        pd.DataFrame: The cleaned and preprocessed DataFrame.
    """
    # Read the CSV file
    df = pd.read_csv(file_name, encoding='utf-8')

    # Drop unnecessary columns if they exist
    columns_to_drop = ['Unnamed: 0', 'index']
    existing_columns_to_drop = [col for col in columns_to_drop if col in df.columns]
    df.drop(columns=existing_columns_to_drop, axis=1, inplace=True)

    # Reset the index
    df.reset_index(drop=True, inplace=True)
    df.reset_index(inplace=True)  # Creates a new 'index' column

    return df

# Main execution block
def main():
    """
    Main function to set up the environment, load, and preprocess the dataset.
    """
    # Define mount point and target directory (update as necessary)
    MOUNT_POINT = '/content/gdrive'
    TARGET_DIRECTORY = '/content/gdrive/My Drive/Colab Notebooks/PSV'  # Update path as needed

    # Mount Google Drive and navigate to the target directory
    setup_environment(MOUNT_POINT, TARGET_DIRECTORY)

    # Define the CSV file name
    csv_file = 'PSIV_SP_Raw_2022Midterm_Clean.csv'

    # Load and preprocess the data
    df_raw = load_and_preprocess_data(csv_file)

    # Display the first few rows of the cleaned DataFrame
    print(df_raw.head())

# Execute the main function
if __name__ == "__main__":
    main()

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


  pd.set_option('display.max_colwidth', -1)


In [None]:
import json
from watson_developer_cloud import ToneAnalyzerV3

# Read CSV file
df_raw = pd.read_csv('')

# Drop unnecessary columns
columns_to_drop = ['Unnamed: 0', 'index']
existing_columns_to_drop = [col for col in columns_to_drop if col in df_raw.columns]
df_raw.drop(columns=existing_columns_to_drop, axis=1, inplace=True)

# Reset index and create a new 'index' column
df_raw.reset_index(drop=True, inplace=True)
df_raw.reset_index(inplace=True)  # Creates a new 'index' column

# Clean text by removing newline characters and appending a period
df_raw['clean_text_deep'] = df_raw['clean_text_deep'].replace('\n', '', regex=True) + "."

# Convert 'clean_text_deep' to a single string without index
text = df_raw['clean_text_deep'].to_string(index=False)

# Initialize Tone Analyzer
tone_analyzer = ToneAnalyzerV3(
    version='',
    iam_apikey='',
    url=''
)

# Perform tone analysis
tone_analysis = tone_analyzer.tone(
    {'text': text},
    'application/json'
).get_result()

# Save tone analysis results as JSON
with open('data.json', 'w') as fp:
    json.dump(tone_analysis, fp, indent=4)

# Load tone analysis JSON file
with open('data.json', 'r') as fp:
    data = json.load(fp)

# Convert 'sentences_tone' to DataFrame
df_tone = pd.DataFrame(data.get("sentences_tone", []))

# Extract first tone score and tone_id
df_tone['score0'] = df_tone['tones'].astype(str).str.split(":").str[1].str.split(",").str[0]
df_tone['tone_id0'] = df_tone['tones'].astype(str).str.split(":").str[2].str.split(",").str[0].str.replace("'", '')

# Extract second tone score and tone_id
df_tone['score1'] = df_tone['tones'].astype(str).str.split(":").str[4].str.split(",").str[0]
df_tone['tone_id1'] = df_tone['tones'].astype(str).str.split(":").str[5].str.split(",").str[0].str.replace("'", '')

# Save the processed tone analysis to CSV
df_tone.to_csv('tone_analysis_results.csv', index=False)

  tone_analyzer = ToneAnalyzerV3(
