**1 IMPORT LIBRARIES**

In [None]:
import pandas as pd
import re
import string

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

**2 LOAD DATASET**

In [None]:
df = pd.read_csv("/content/Tweets.csv")

#** Keep only required column**

In [None]:
df = df[['text', 'airline_sentiment']]

In [None]:
print("Sample data:")
print(df.head())

Sample data:
                                                text airline_sentiment
0                @VirginAmerica What @dhepburn said.           neutral
1  @VirginAmerica plus you've added commercials t...          positive
2  @VirginAmerica I didn't today... Must mean I n...           neutral
3  @VirginAmerica it's really aggressive to blast...          negative
4  @VirginAmerica and it's a really big bad thing...          negative


**# 3. Text Cleaning Function**

In [None]:
def clean_text(text):
    # lower case
    text = text.lower()
    # remove links
    text = re.sub(r"http\S+|www\S+", '', text)
    # remove @mentions and hashtags
    text = re.sub(r'@\w+|#', '', text)
    # remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    # remove numbers
    text = re.sub(r'\d+', '', text)
    # remove extra spaces
    text = text.strip()
    return text

In [None]:

df["clean_text"] = df["text"].apply(clean_text)

**# 4. Train–Test Split**

In [None]:
X = df["clean_text"]
y = df["airline_sentiment"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

**# 5. Text → Numbers (Bag of Words)**

In [None]:
vectorizer = CountVectorizer(stop_words="english", max_features=5000)

X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

**6. ONLY MODEL: Logistic Regression**

In [None]:
log_reg = LogisticRegression(max_iter=500)
log_reg.fit(X_train_vec, y_train)

 **7. Evaluation**

In [None]:
y_pred = log_reg.predict(X_test_vec)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.7773224043715847

Classification Report:
               precision    recall  f1-score   support

    negative       0.84      0.88      0.86      1835
     neutral       0.60      0.59      0.59       620
    positive       0.76      0.61      0.68       473

    accuracy                           0.78      2928
   macro avg       0.73      0.70      0.71      2928
weighted avg       0.77      0.78      0.77      2928



**8. Predict on New Tweet**

In [None]:
def predict_sentiment(text):
    cleaned = clean_text(text)
    vec = vectorizer.transform([cleaned])
    return log_reg.predict(vec)[0]

sample = "I am very disappointed with this airline, such a bad experience."
print("\nSample Tweet:", sample)
print("Predicted Sentiment:", predict_sentiment(sample))


Sample Tweet: I am very disappointed with this airline, such a bad experience.
Predicted Sentiment: negative


In [None]:
def predict_sentiment(text):
    cleaned = clean_text(text)
    vec = vectorizer.transform([cleaned])
    return log_reg.predict(vec)[0]

sample = "I am very happy with this airline, such a good  experience."
print("\nSample Tweet:", sample)
print("Predicted Sentiment:", predict_sentiment(sample))


Sample Tweet: I am very happy with this airline, such a good  experience.
Predicted Sentiment: positive
