**Logistic Regression**

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import re
# Load data (replace 'sentiment_data.csv' with your actual file path)
data = pd.read_csv('Tweets.csv')
print(data.head())
print(data['text'].dtype)
data['text'] = data['text'].astype(str)
# Separate text and sentiment labels
text = data['text']
sentiment = data['sentiment']  # Assuming sentiment is labeled (0 - negative, 1 - positive)


       textID                                               text  \
0  cb774db0d1                I`d have responded, if I were going   
1  549e992a42      Sooo SAD I will miss you here in San Diego!!!   
2  088c60f138                          my boss is bullying me...   
3  9642c003ef                     what interview! leave me alone   
4  358bd9e861   Sons of ****, why couldn`t they put them on t...   

                         selected_text sentiment  
0  I`d have responded, if I were going   neutral  
1                             Sooo SAD  negative  
2                          bullying me  negative  
3                       leave me alone  negative  
4                        Sons of ****,  negative  
object


In [None]:
# Text preprocessing (can be extended for more advanced cleaning)
def preprocess_text(text):
  text = text.lower()  # lowercase
  # Remove punctuation and special characters
  text = re.sub(r'[^\w\s]', '', text)
  # Remove stopwords (optional)
  # from nltk.corpus import stopwords
  # stop_words = stopwords.words('english')
  # text = [word for word in text.split() if word not in stop_words]
  return text

text = [preprocess_text(t) for t in text]

# Feature extraction using TF-IDF
vectorizer = TfidfVectorizer(max_features=2000)  # Adjust max_features as needed
features = vectorizer.fit_transform(text)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, sentiment, test_size=0.2, random_state=42)

# Train Logistic Regression model
model = LogisticRegression(solver='lbfgs')
model.fit(X_train, y_train)

# Make predictions on test data
y_pred = model.predict(X_test)

# Evaluate model accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Predict sentiment for new text (optional)
new_text = "This movie was absolutely fantastic!"  # Replace with your text
new_features = vectorizer.transform([preprocess_text(new_text)])
prediction = model.predict(new_features)
if prediction[0] == 0:
  print("Sentiment: Negative")
else:
  print("Sentiment: Positive")

Accuracy: 0.6931
Sentiment: Positive


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


**SVM**

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load data (replace 'sentiment_data.csv' with your actual file path)
data = pd.read_csv('Tweets.csv')
print(data.head())
print(data['text'].dtype)
data['text'] = data['text'].astype(str)
# Separate text and sentiment labels
text = data['text']
sentiment = data['sentiment']  # Assuming sentiment is labeled (0 - negative, 1 - positive)



       textID                                               text  \
0  cb774db0d1                I`d have responded, if I were going   
1  549e992a42      Sooo SAD I will miss you here in San Diego!!!   
2  088c60f138                          my boss is bullying me...   
3  9642c003ef                     what interview! leave me alone   
4  358bd9e861   Sons of ****, why couldn`t they put them on t...   

                         selected_text sentiment  
0  I`d have responded, if I were going   neutral  
1                             Sooo SAD  negative  
2                          bullying me  negative  
3                       leave me alone  negative  
4                        Sons of ****,  negative  
object


In [None]:
# Text preprocessing (can be extended for more advanced cleaning)
def preprocess_text(text):
  text = text.lower()  # lowercase
  # Remove punctuation and special characters
  text = re.sub(r'[^\w\s]', '', text)
  # Remove stopwords (optional)
  # from nltk.corpus import stopwords
  # stop_words = stopwords.words('english')
  # text = [word for word in text.split() if word not in stop_words]
  return text

text = [preprocess_text(t) for t in text]

# Feature extraction using TF-IDF
vectorizer = TfidfVectorizer(max_features=2000)  # Adjust max_features as needed
features = vectorizer.fit_transform(text)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, sentiment, test_size=0.2, random_state=42)

# Train SVM model
model = SVC(kernel='linear')  # Choose kernel (linear, rbf, etc.)
model.fit(X_train, y_train)

# Make predictions on test data
y_pred = model.predict(X_test)

# Evaluate model accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Predict sentiment for new text (optional)
new_text = "This product is a total disappointment!"  # Replace with your text
new_features = vectorizer.transform([preprocess_text(new_text)])
prediction = model.predict(new_features)
if prediction[0] == 0:
  print("Sentiment: Negative")
else:
  print("Sentiment: Positive")

Accuracy: 0.7018
Sentiment: Positive


**Naive Bayes**

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load data (replace 'sentiment_data.csv' with your actual file path)
data = pd.read_csv('Tweets.csv')
print(data.head())
print(data['text'].dtype)
data['text'] = data['text'].astype(str)
# Separate text and sentiment labels
text = data['text']
sentiment = data['sentiment']  # Assuming sentiment is labeled (0 - negative, 1 - positive)



       textID                                               text  \
0  cb774db0d1                I`d have responded, if I were going   
1  549e992a42      Sooo SAD I will miss you here in San Diego!!!   
2  088c60f138                          my boss is bullying me...   
3  9642c003ef                     what interview! leave me alone   
4  358bd9e861   Sons of ****, why couldn`t they put them on t...   

                         selected_text sentiment  
0  I`d have responded, if I were going   neutral  
1                             Sooo SAD  negative  
2                          bullying me  negative  
3                       leave me alone  negative  
4                        Sons of ****,  negative  
object


In [None]:
# Text preprocessing (can be extended for more advanced cleaning)
def preprocess_text(text):
  text = text.lower()  # lowercase
  # Remove punctuation and special characters
  text = re.sub(r'[^\w\s]', '', text)
  # Remove stopwords (optional)
  # from nltk.corpus import stopwords
  # stop_words = stopwords.words('english')
  # text = [word for word in text.split() if word not in stop_words]
  return text

text = [preprocess_text(t) for t in text]

# Feature extraction using TF-IDF
vectorizer = TfidfVectorizer(max_features=2000)  # Adjust max_features as needed
features = vectorizer.fit_transform(text)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, sentiment, test_size=0.2, random_state=42)

# Train Naive Bayes model
model = MultinomialNB()
model.fit(X_train, y_train)

# Make predictions on test data
y_pred = model.predict(X_test)

# Evaluate model accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Predict sentiment for new text (optional)
new_text = "I'm very happy with this service!"  # Replace with your text
new_features = vectorizer.transform([preprocess_text(new_text)])
prediction = model.predict(new_features)
if prediction[0] == 0:
  print("Sentiment: Negative")
else:
  print("Sentiment: Positive")


Accuracy: 0.6442
Sentiment: Positive
