In [58]:
#apply KNN and changes the text into numbers using a TF-IDF method.
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

dataset = pd.read_csv("sentimentdataset.csv")


# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(dataset['Text'], dataset['Sentiment'], test_size=0.2, random_state=42)

# TF-IDF Vectorization
text_vector = TfidfVectorizer()
X_train = text_vector.fit_transform(X_train)
X_test = text_vector.transform(X_test)

# KNN Classification
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

# Calculating the accuracy rate
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy rate:", accuracy * 100, "%")
for desired, computed in zip(y_test, y_pred):
    print("Desired class:", desired)
    print("Computed class:", computed)

Accuracy rate: 25.170068027210885 %
Desired class:  Curiosity 
Computed class:  Positive  
Desired class:  Positive  
Computed class:  Empowerment   
Desired class:  Fearful 
Computed class:  Fearful 
Desired class:  Heartbreak 
Computed class:  Apprehensive 
Desired class:  Joy 
Computed class:  Joy 
Desired class:  Miscalculation 
Computed class:  Despair 
Desired class:  Positive  
Computed class:  Positive  
Desired class:  Reflection    
Computed class:  Positive  
Desired class:  Happy 
Computed class:  Pride        
Desired class:  Joy 
Computed class:  Enthusiasm 
Desired class:  Sorrow      
Computed class:  Desolation 
Desired class:  Anticipation  
Computed class:  Adventure 
Desired class:  Grief           
Computed class:  Contentment   
Desired class:  Hate 
Computed class:  Hate 
Desired class:  Acceptance   
Computed class:  Acceptance      
Desired class:  Sadness      
Computed class:  Sad 
Desired class:  Excitement   
Computed class:  Excitement 
Desired class:  Eup

In [44]:
unique_sentiments = dataset['Sentiment'].unique()
print(unique_sentiments)


[' Positive  ' ' Negative  ' ' Neutral   ' ' Anger        '
 ' Fear         ' ' Sadness      ' ' Disgust      ' ' Happiness    '
 ' Joy          ' ' Love         ' ' Amusement    ' ' Enjoyment    '
 ' Admiration   ' ' Affection    ' ' Awe          ' ' Disappointed '
 ' Surprise     ' ' Acceptance   ' ' Adoration    ' ' Anticipation '
 ' Bitter       ' ' Calmness     ' ' Confusion    ' ' Excitement   '
 ' Kind         ' ' Pride        ' ' Shame        ' ' Confusion '
 ' Excitement ' ' Shame ' ' Elation       ' ' Euphoria      '
 ' Contentment   ' ' Serenity      ' ' Gratitude     ' ' Hope          '
 ' Empowerment   ' ' Compassion    ' ' Tenderness    ' ' Arousal       '
 ' Enthusiasm    ' ' Fulfillment  ' ' Reverence     ' ' Compassion'
 ' Fulfillment   ' ' Reverence ' ' Elation   ' ' Despair         '
 ' Grief           ' ' Loneliness      ' ' Jealousy        '
 ' Resentment      ' ' Frustration     ' ' Boredom         '
 ' Anxiety         ' ' Intimidation    ' ' Helplessness    '
 ' 

In [56]:
#predicts future hours based on sentiment.
import pandas as pd
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score

data = pd.read_csv('sentimentdataset.csv')

data = data.drop(['Unnamed: 0.1', 'Unnamed: 0'], axis=1)#drop unnecessary part

label_encoder = LabelEncoder()
data['Sentiment'] = label_encoder.fit_transform(data['Sentiment'])
X = data[['Sentiment']]
y = data['Hour']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a KNN Regressor
knn_reg = KNeighborsRegressor(n_neighbors=5)

# Train
knn_reg.fit(X_train, y_train)

# Make predictions on the test set
y_pred = knn_reg.predict(X_test)

# checks how good the guesses
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared: {r2:.2f}")

# Make predictions for future years
future_sentiments = [0, 1, 2]  # Replace with actual sentiment values for future years
future_df = pd.DataFrame({'Sentiment': future_sentiments})
future_predictions = knn_reg.predict(future_df)

print("\nPredictions for future years:")
for i, prediction in enumerate(future_predictions):
    print(f"Sample {i+1}: Predicted number of hours spent: {prediction:.2f}")


Mean Squared Error: 15.20
R-squared: 0.15

Predictions for future years:
Sample 1: Predicted number of hours spent: 14.80
Sample 2: Predicted number of hours spent: 16.00
Sample 3: Predicted number of hours spent: 12.20
