In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import re

In [None]:
# Function to preprocess data from text file
def preprocess_data_from_file(filepath):
    with open(filepath, 'r') as file:
        data = file.read()
    lines = data.strip().split('\n')
    labels = []
    texts = []
    for line in lines:
        label, text = line.split(' ', 1)
        label = int(label.split('__label__')[1])
        labels.append(label)
        texts.append(text)
    return pd.DataFrame({'label': labels, 'text': texts})

In [None]:
# Load data from file
filepath = 'train.3270.txt'
df = preprocess_data_from_file(filepath)

In [None]:
df.head(10)

In [None]:
# Preprocess text
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'\W', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    return text

In [None]:
df['text'] = df['text'].apply(preprocess_text)

In [None]:
df.head(10)

In [None]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

In [None]:
# Feature engineering
vectorizer = TfidfVectorizer(stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [None]:
X_train_tfidf

In [None]:
# Model selection and training
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

In [None]:
# Function to predict sentiment for a custom input text
def predict_custom_text(text):
    text = preprocess_text(text)
    X = vectorizer.transform([text])
    prediction = model.predict(X)
    return prediction[0]

# Example custom input text
custom_text = "I absolutely love this product! It's fantastic and works great."
predicted_label = predict_custom_text(custom_text)
print(f"Custom Text Prediction: {predicted_label}")

In [None]:
# Model evaluation
y_pred = model.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

In [None]:
# Load test data from file
test_filepath = 'test.135.txt'
df_test = preprocess_data_from_file(test_filepath)

# Apply text preprocessing
df_test['text'] = df_test['text'].apply(preprocess_text)

# Feature engineering
X_test_tfidf = vectorizer.transform(df_test['text'])

# Model evaluation on test set
y_test_pred = model.predict(X_test_tfidf)
y_test_actual = df_test['label'].to_numpy()
print("Test Accuracy:", accuracy_score(y_test_actual, y_test_pred))
print("Test Classification Report:\n", classification_report(df_test['label'], y_test_pred))

In [None]:
from flask import Flask, request, jsonify

# Deployment example (Flask app)
app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
    text = request.json['text']
    text = preprocess_text(text)
    X = vectorizer.transform([text])
    prediction = model.predict(X)
    return jsonify({'prediction': int(prediction[0])})

if __name__ == '__main__':
    app.run(debug=True)
