In [15]:
import nltk
nltk.download('vader_lexicon')
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import yfinance as yf

# Function to get historical stock data
def get_stock_data(symbol, start_date, end_date):
    stock = yf.download(symbol, start=start_date, end=end_date)
    return stock['Adj Close']

# Sample data - replace this with actual S&P500 company symbols and names
s_and_p_500_data = {
    'Symbol': ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'FB'],
    'Company_Name': ['Apple Inc.', 'Alphabet Inc.', 'Microsoft Corp.', 'Amazon.com Inc.', 'Meta Platforms Inc.']
}

# Define the date range for historical stock data
start_date = '2023-01-01'
end_date = '2024-01-01'

# Initialize a DataFrame for the analysis
analysis_data = pd.DataFrame(index=pd.date_range(start_date, end_date))

# Iterate through S&P500 symbols
for i in range(len(s_and_p_500_data['Symbol'])):
    symbol = s_and_p_500_data['Symbol'][i]
    company_name = s_and_p_500_data['Company_Name'][i]

    # Get historical stock data
    stock_data = get_stock_data(symbol, start_date, end_date)

    # Calculate daily percentage change for momentum
    analysis_data[f'{symbol}_Momentum'] = stock_data.pct_change()

    # Sentiment Analysis using NLTK's VaderSentiment
    sid = SentimentIntensityAnalyzer()
    sentiment_scores = [sid.polarity_scores('Sample news')['compound'] for _ in range(len(analysis_data))]

    # Add sentiment scores to the DataFrame
    analysis_data[f'{symbol}_Sentiment'] = sentiment_scores

# Define the target variable based on historical stock trends
# For simplicity, let's say if the closing price increased (1) or decreased (0)
analysis_data['Target'] = (analysis_data['AAPL_Momentum'].shift(-1) > 0).astype(int)

# Machine Learning model training
X = analysis_data[['AAPL_Momentum', 'AAPL_Sentiment', 'GOOGL_Momentum', 'GOOGL_Sentiment']]
y = analysis_data['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
clf = RandomForestClassifier()
clf.fit(X_train, y_train)

# Use the trained model to predict buy/hold/sell decisions
predictions = clf.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
classification_rep = classification_report(y_test, predictions)
print(f"Accuracy: {accuracy}")
print("Classification Report:\n", classification_rep)

# You can add predictions to s_and_p_500_data if needed


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/skd/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['FB']: Exception('%ticker%: No timezone found, symbol may be delisted')


Accuracy: 0.5675675675675675
Classification Report:
               precision    recall  f1-score   support

           0       0.57      0.80      0.67        40
           1       0.56      0.29      0.38        34

    accuracy                           0.57        74
   macro avg       0.56      0.55      0.53        74
weighted avg       0.56      0.57      0.54        74



In [27]:
import nltk
nltk.download('vader_lexicon')
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import yfinance as yf

# Function to get historical stock data
def get_stock_data(symbol, start_date, end_date):
    stock = yf.download(symbol, start=start_date, end=end_date)
    return stock['Adj Close']

# Sample data - replace this with actual S&P500 company symbols and names
s_and_p_500_data = {
    'Symbol': ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'FB'],
    'Company_Name': ['Apple Inc.', 'Alphabet Inc.', 'Microsoft Corp.', 'Amazon.com Inc.', 'Meta Platforms Inc.']
}

# Define the date range for historical stock data
start_date = '2000-01-01'
end_date = '2024-01-01'

# Initialize a DataFrame for the analysis
analysis_data = pd.DataFrame(index=pd.date_range(start_date, end_date))

# Iterate through S&P500 symbols
for i in range(len(s_and_p_500_data['Symbol'])):
    symbol = s_and_p_500_data['Symbol'][i]
    company_name = s_and_p_500_data['Company_Name'][i]

    # Get historical stock data
    stock_data = get_stock_data(symbol, start_date, end_date)

    # Calculate daily percentage change for momentum
    analysis_data[f'{symbol}_Momentum'] = stock_data.pct_change()

    # Sentiment Analysis using NLTK's VaderSentiment
    sid = SentimentIntensityAnalyzer()
    sentiment_scores = [sid.polarity_scores('Sample news')['compound'] for _ in range(len(analysis_data))]

    # Add sentiment scores to the DataFrame
    analysis_data[f'{symbol}_Sentiment'] = sentiment_scores

# Define the target variable based on historical stock trends
# For simplicity, let's say if the closing price increased (1) or decreased (0)
analysis_data['Target'] = (analysis_data['AAPL_Momentum'].shift(-1) > 0).astype(int)

# Machine Learning model training
X = analysis_data[['AAPL_Momentum', 'AAPL_Sentiment', 'GOOGL_Momentum', 'GOOGL_Sentiment']]
y = analysis_data['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
clf = RandomForestClassifier()
clf.fit(X_train, y_train)




# Use the trained model to predict buy/hold/sell decisions
predictions = clf.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
classification_rep = classification_report(y_test, predictions)
print(f"Accuracy: {accuracy}")
print("Classification Report:\n", classification_rep)

# Save the DataFrame to an Excel file
excel_file_path = 'stock_analysis_results.xlsx'
analysis_data.to_excel(excel_file_path, index=True)

# Display or export the analysis_data DataFrame with predictions and momentum
print(f"Excel file created: {excel_file_path}")


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/skd/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['FB']: Exception('%ticker%: No timezone found, symbol may be delisted')


Accuracy: 0.5969213226909921
Classification Report:
               precision    recall  f1-score   support

           0       0.65      0.79      0.71      1121
           1       0.41      0.26      0.32       633

    accuracy                           0.60      1754
   macro avg       0.53      0.52      0.52      1754
weighted avg       0.56      0.60      0.57      1754

Excel file created: stock_analysis_results.xlsx
