In [None]:
import pandas as pd
import joblib
import os
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

# 1. Load Data
try:
    # Adjust path to find the csv in ../../data/
    file_path = os.path.join(os.path.dirname(__file__), '../../data/raw_data.csv')
    df = pd.read_csv(file_path, names=['Sentiment', 'Headline'], encoding='latin-1')
    print(f"‚úÖ Loaded Data: {len(df)} rows")
except Exception as e:
    print("‚ö†Ô∏è Data file not found. Using dummy data for setup.")
    data = {'Headline': ["Stocks soar", "Revenue dropped", "Profits rise"], 
            'Sentiment': ["positive", "negative", "positive"]}
    df = pd.DataFrame(data)

# 2. Build Pipeline
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english', max_features=5000)),
    ('clf', LogisticRegression(class_weight='balanced'))
])

# 3. Train
print("‚è≥ Training...")
pipeline.fit(df['Headline'], df['Sentiment'])

# 4. Save
output_dir = os.path.join(os.path.dirname(__file__), '../model_artifacts')
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, 'sentiment_model.pkl')

joblib.dump(pipeline, output_path)
print(f"üéâ Model saved to {output_path}")