# Sentiment Analysis Multimodel Project

This notebook demonstrates a sentiment analysis pipeline using multiple models, including SVM, Logistic Regression, Random Forest, and XGBoost. Each step in the process, from preprocessing to evaluation, is detailed for clarity.

## 1. Install Required Libraries

In [None]:
!pip install nltk scikit-learn pandas
!pip install spacy
!python -m spacy download en_core_web_sm

## 2. Import Necessary Libraries

In [None]:
import spacy
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
import joblib

## 3. Load and Preprocess the Dataset

In [None]:
# Load SpaCy model
nlp = spacy.load('en_core_web_sm')

# Dataset (sample data)
data = {
    'text': [
        'I love this product!', 'This is the worst thing I have ever bought.',
        'I am so happy with my purchase!', 'Not worth the money.', 'Great value for the price.'
    ],
    'sentiment': ['positive', 'negative', 'positive', 'negative', 'positive']
}

# Convert the 'sentiment' column to numeric labels
label_encoder = LabelEncoder()
data['sentiment'] = label_encoder.fit_transform(data['sentiment'])
df = pd.DataFrame(data)

# Split the dataset
X = df['text']
y = df['sentiment']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Text preprocessing
def preprocess_text_spacy(text):
    doc = nlp(text.lower())
    return ' '.join([token.lemma_ for token in doc if token.is_alpha])

X_train_cleaned = X_train.apply(preprocess_text_spacy)
X_test_cleaned = X_test.apply(preprocess_text_spacy)

## 4. Feature Extraction Using TF-IDF

In [None]:
tfidf = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfidf.fit_transform(X_train_cleaned).toarray()
X_test_tfidf = tfidf.transform(X_test_cleaned).toarray()

## 5. Model Training and Evaluation

In [None]:
# Example: Support Vector Machine (SVM)
svm = SVC()
svm.fit(X_train_tfidf, y_train)

y_pred_svm = svm.predict(X_test_tfidf)
print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))

## 6. Save Models and TF-IDF Vectorizer

In [None]:
joblib.dump(svm, 'svm_model.pkl')
joblib.dump(tfidf, 'tfidf_vectorizer.pkl')

## 7. Prediction Function

In [None]:
def predict_sentiment(text):
    cleaned_text = preprocess_text_spacy(text)
    features = tfidf.transform([cleaned_text]).toarray()
    prediction = svm.predict(features)
    return label_encoder.inverse_transform(prediction)[0]

# Example usage
text_input = "I absolutely love this!"
print("Predicted Sentiment:", predict_sentiment(text_input))