Name: Saleh Abdallah

In [18]:
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from flask import Flask, request, jsonify
import joblib
import warnings
warnings.filterwarnings('ignore')

In [12]:
# Data Collection and Preprocessing

# Download the Sentiment140 dataset from the bottom of this page.

df = pd.read_csv('training.csv', encoding='latin-1', header=None)
df.columns = ['sentiment', 'id', 'date', 'query', 'user', 'text']

# Clean and preprocess the tweets:
# Remove URLs, mentions, and special characters.
# Tokenize text and convert to lowercase.

def clean_tweet(tweet):
    tweet = re.sub(r"http\S+|@\S+|[^A-Za-z0-9 ]+", '', tweet)
    return tweet.lower()

df['text'] = df['text'].apply(clean_tweet)
df['sentiment'] = df['sentiment'].map({0: 0, 4: 1})

In [14]:
# Model Training
# Utilize a logistic regression model for sentiment analysis.
# Validate the model’s performance through metrics like accuracy and F1-score, using a separate validation dataset.

X_train, X_test, y_train, y_test = train_test_split(df['text'], df['sentiment'], test_size=0.2)

vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model = LogisticRegression()
model.fit(X_train_vec, y_train)

preds = model.predict(X_test_vec)
print("Accuracy:", round(accuracy_score(y_test, preds), 4))
print("F1 Score:", round(f1_score(y_test, preds, average='weighted'), 4))

Accuracy: 0.7983
F1 Score: 0.7983


In [21]:
# Save the Model
joblib.dump(model, 'model.pkl')
joblib.dump(vectorizer, 'vectorizer.pkl')

['vectorizer.pkl']

In [3]:
# Create a Dockerfile to containerize the Flask application, ensuring all dependencies are correctly handled.
# Push the Docker image to Docker Hub.

###
# Created a docker file container that includes the flask api and the traained sentiment model.
###

In [None]:
# MLOps Integration
# Set up GitHub Actions for CI/CD to automate the testing and deployment of the Flask application whenever updates are made to the code.
# Integrate tests that check the API’s ability to receive requests and return the correct sentiment.
# Document the model versioning and how to roll back to previous versions if needed.

###
# Created a github repo for CI/CD to automate the testing and deployment of the Flask application .
###

In [None]:
# Deployment
# Deploy the containerized application to a cloud provider like AWS or a local server using Kubernetes for orchestration.
# Configure Kubernetes to manage the deployment and scaling of the application automatically.