In [None]:
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.feature_extraction.text import TfidfVectorizer

# Sample text documents
documents = [
    "Machine learning is fascinating.",
    "PCA helps in reducing dimensionality.",
    "Text analysis using Python is fun and informative.",
    "Data science applications bring insights to complex topics.",
    "Statistics is essential for sound data analysis."
]

# Convert text to a TF-IDF feature matrix
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(documents)

# PCA requires a dense array, so convert the sparse matrix to dense format
tfidf_dense = tfidf_matrix.toarray()

# Perform PCA to reduce the dimensions to 2 for visualization
pca = PCA(n_components=2)
pca_result = pca.fit_transform(tfidf_dense)

# Print the PCA results for each document
for idx, (doc, coords) in enumerate(zip(documents, pca_result), start=1):
    print(f"Document {idx}: {doc}")
    print(f"PCA Components: {coords}\n")

# Optional: Plotting the results
plt.figure(figsize=(8, 6))
plt.scatter(pca_result[:, 0], pca_result[:, 1], marker='o')

# Annotate each point with its document number
for idx, (x, y) in enumerate(pca_result, start=1):
    plt.annotate(str(idx), (x, y), textcoords="offset points", xytext=(5,5))

plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.title("PCA of Text Documents")
plt.grid(True)
plt.show()


In [None]:
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.feature_extraction.text import TfidfVectorizer

# Sample movie reviews
movie_reviews = [
    "The movie was an absolute masterpiece with stunning visuals and a captivating story.",
    "I found the film to be boring and predictable, lacking any real heart or originality.",
    "An excellent blend of humor and drama, this movie keeps you engaged from start to finish.",
    "The storyline was confusing and the characters were underdeveloped, making it hard to connect.",
    "A brilliant performance by the lead actor, paired with brilliant direction and cinematography."
]

# Convert the movie reviews to a TF-IDF feature matrix
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(movie_reviews)

# Convert the sparse matrix to a dense array for PCA
tfidf_dense = tfidf_matrix.toarray()

# Perform PCA to reduce the dimensions to 2 for visualization
pca = PCA(n_components=2)
pca_result = pca.fit_transform(tfidf_dense)

# Print the PCA results for each review
for idx, (review, coords) in enumerate(zip(movie_reviews, pca_result), start=1):
    print(f"Review {idx}: {review}")
    print(f"PCA Components: {coords}\n")

# Plotting the results
plt.figure(figsize=(8, 6))
plt.scatter(pca_result[:, 0], pca_result[:, 1], marker='o')

# Annotate each point with its review number
for idx, (x, y) in enumerate(pca_result, start=1):
    plt.annotate(str(idx), (x, y), textcoords="offset points", xytext=(5, 5))

plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.title("PCA of Movie Reviews")
plt.grid(True)
plt.show()
