## Load the data set

In [27]:
import pandas as pd

# Load the combined dataset
file_path = "sorted_aligned_bible_data.tsv"
df = pd.read_csv(file_path, sep="\t")

# Preview the dataset
print(df.head())

   Index Version                                       Book  Chapter  Verse  \
0      1     DRB  The Epistle of St. Paul to the Colossians        1      1   
1      1     DRB  The Epistle of St. Paul to the Colossians        1      2   
2      1     DRB  The Epistle of St. Paul to the Colossians        1      3   
3      1     DRB  The Epistle of St. Paul to the Colossians        1      4   
4      1     DRB  The Epistle of St. Paul to the Colossians        1      5   

                                                Text  
0  Paul, an apostle of Jesus Christ, by the will ...  
1  To the saints and faithful brethren in Christ ...  
2  Grace be to you and peace, from God our Father...  
3  Hearing your faith in Christ Jesus and the lov...  
4  For the hope that is laid up for you in heaven...  


## Install Sentiment Analysis Tools

In [28]:
%pip install textblob
%pip install vaderSentiment


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [32]:
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


## Preprocess Data

In [33]:
import re

# Function to preprocess text
def clean_text(text):
    # Remove special characters and multiple spaces
    return re.sub(r'[^\w\s]', '', str(text)).strip()

# Apply cleaning
df["Cleaned_Text"] = df["Text"].apply(clean_text)


## Sentiment Analysis

### Per Version

In [34]:
# Overall sentiment per version
overall_sentiment = df.groupby("Version")["Sentiment_VADER"].mean().reset_index()
overall_sentiment.columns = ["Version", "Average Sentiment"]

# Display the overall sentiment
print(overall_sentiment)

# Plot overall sentiment
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.bar(overall_sentiment["Version"], overall_sentiment["Average Sentiment"], color="skyblue")
plt.title("Overall Sentiment by Version")
plt.ylabel("Average Sentiment (VADER)")
plt.xlabel("Version")
plt.show()


KeyError: 'Column not found: Sentiment_VADER'

In [None]:
# Overall sentiment per version
overall_sentiment_textblob = df.groupby("Version")["Sentiment_TextBlob"].mean().reset_index()
overall_sentiment_textblob.columns = ["Version", "Average Sentiment (TextBlob)"]

# Display the overall sentiment
print(overall_sentiment_textblob)

# Plot overall sentiment
plt.figure(figsize=(10, 6))
plt.bar(
    overall_sentiment_textblob["Version"],
    overall_sentiment_textblob["Average Sentiment (TextBlob)"],
    color="lightgreen",
)
plt.title("Overall Sentiment by Version (TextBlob)")
plt.ylabel("Average Sentiment (TextBlob)")
plt.xlabel("Version")
plt.show()


### Per book

In [None]:
# Sentiment per version per book
sentiment_by_version_book = (
    df.groupby(["Version", "Book"])["Sentiment_VADER"]
    .mean()
    .reset_index()
    .rename(columns={"Sentiment_VADER": "Average Sentiment"})
)

# Display the sentiment per version per book
print(sentiment_by_version_book)

# Plot sentiment per book for each version
import seaborn as sns

plt.figure(figsize=(14, 8))
sns.barplot(
    data=sentiment_by_version_book,
    x="Average Sentiment",
    y="Book",
    hue="Version",
    dodge=True,
)
plt.title("Sentiment by Version and Book")
plt.xlabel("Average Sentiment (VADER)")
plt.ylabel("Book")
plt.legend(title="Version", loc="lower right")
plt.tight_layout()
plt.show()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Set figure size and style
plt.figure(figsize=(10, 14))
sns.set_theme(style="whitegrid")

# Sort the data by sentiment for better organization
sentiment_by_version_book_textblob = sentiment_by_version_book_textblob.sort_values(by="Average Sentiment (TextBlob)", ascending=False)

# Create the barplot
sns.barplot(
    data=sentiment_by_version_book_textblob,
    x="Average Sentiment (TextBlob)",
    y="Book",
    hue="Version",
    dodge=True,
    palette="muted",  # Muted color palette for clarity
)

# Adjust plot aesthetics
plt.title("Sentiment by Version and Book (TextBlob)", fontsize=16)
plt.xlabel("Average Sentiment (TextBlob)", fontsize=12)
plt.ylabel("Book", fontsize=12)
plt.legend(title="Version", loc="upper right")
plt.tight_layout()

# Rotate y-axis labels for better readability
plt.yticks(fontsize=10)
plt.xticks(fontsize=10)
plt.show()



## Over chapters and verses