In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.decomposition import LatentDirichletAllocation

# Load data
reviews = pd.read_csv('reviews.csv')

# EDA
plt.figure(figsize=(10,6))
sns.countplot(x='Category', data=reviews)
plt.title('Category Distribution')
plt.xlabel('Category')
plt.ylabel('Count')

# Text Mining
vectorizer = TfidfVectorizer(stop_words='english')
review_text = reviews['Review Text']
X = vectorizer.fit_transform(review_text)
y = reviews['Rating']

# Sentiment Analysis
sentiment = reviews.groupby('Category')['Rating'].mean().reset_index()
plt.figure(figsize=(10,6))
sns.barplot(x='Category', y='Rating', data=sentiment)
plt.title('Sentiment by Category')
plt.xlabel('Category')
plt.ylabel('Rating')

# Predictive Analytics
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
print('Accuracy:', accuracy_score(y_test, y_pred))
print('Classification Report:')
print(classification_report(y_test, y_pred))
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))

# Topic Mining
lda = LatentDirichletAllocation(n_topics=5)
topics = lda.fit_transform(X)
print('Topics:')
print(lda.components_)