# Starter Script (Change REPO_PATH at Start)

In [1]:
# REPO_PATH, for easier standardization
from os import path
REPO_PATH = "/Users/tks/Documents/GitHub/cs591_sns_ipo/" # change me

# all imports
import os.path as path
from os import makedirs, listdir
import pandas as pd

# ------------------------------------
# Helper functions 
# ------------------------------------

def compute_avg_score(df, ticker):
    num_tweets = len(df)

    total_compound_0 = total_compound_1 = total_compound_2 = total_compound_3 = 0
    for index, row in df.iterrows():
        total_compound_0 += row["weighted_compound_0"] * row["score_0"]
        total_compound_1 += row["weighted_compound_1"] * row["score_1"]
        total_compound_2 += row["weighted_compound_2"] * row["score_2"]
        total_compound_3 += row["weighted_compound_3"] * row["score_3"]

    avg_compound_0 = total_compound_0 / num_tweets
    avg_compound_1 = total_compound_1 / num_tweets
    avg_compound_2 = total_compound_2 / num_tweets
    avg_compound_3 = total_compound_3 / num_tweets
    
    return [avg_compound_0, avg_compound_1, avg_compound_2, avg_compound_3]

def compute_avg_score_all(sentiment_data):
    total_compound_all_0 = total_compound_all_1 = total_compound_all_2 = total_compound_all_3 = 0
    num_tweets_all = 0
    for df in sentiment_data:
        num_tweets_all += len(df)

        for index, row in df.iterrows():
            total_compound_all_0 += row["weighted_compound_0"] * row["score_0"]
            total_compound_all_1 += row["weighted_compound_1"] * row["score_1"]
            total_compound_all_2 += row["weighted_compound_2"] * row["score_2"]
            total_compound_all_3 += row["weighted_compound_3"] * row["score_3"]

    avg_compound_all_0 = total_compound_all_0 / num_tweets_all
    avg_compound_all_1 = total_compound_all_1 / num_tweets_all
    avg_compound_all_2 = total_compound_all_2 / num_tweets_all
    avg_compound_all_3 = total_compound_all_3 / num_tweets_all
    
    return [avg_compound_all_0, avg_compound_all_1, avg_compound_all_2, avg_compound_all_3]

# ------------------------------------
# Helper functions for utility
# ------------------------------------

def save_df(df, title=None, save_path=None):
    PATH_TO_REPO = "/Users/tks/Documents/GitHub/cs591_sns_ipo" # change me
    
    if save_path is None:
        save_path = path.join(PATH_TO_REPO, "sentiment_data")
    if not path.isdir(save_path):
        makedirs(save_path)
    if title is None:
        title = "Untitled_" + str(len(listdir(save_path)))
    
    save_file = path.join(save_path, title)
    
    df.to_csv(save_file)

# Data Anlaysis Phase
## Scoring Calculation Methods:
Original: Score as is ((fav_count + rt_count) / engagement_total)
- Method 0. Add 1 to all the original scores
- Method 1. Add average Score to all the original scores
- Method 2. Add minimum non-zero score to all the original scores
- Method 3. If tweets have no engagement, set the engagement to be 1 to avoid 0

## Notes from Taka's Experiments
- Joining after data did not improve the results in any major way
- Tried various score weighting methods but the current 4 methods seem to be the best performing one so far

# Test 1
- Baseline = 0
    - Predicts to go "Up" if the weighted average compound is above 0
    - Predicts to go "Down" if the weighted average compound is below 0

In [7]:
SENTIMENT_FILE_PATH = path.join(REPO_PATH, "sentiment_data")
SENTIMENT_FILENAMES = listdir(SENTIMENT_FILE_PATH)

sentiment_data_path = [x for x in SENTIMENT_FILENAMES if x.endswith("_Tweets.csv") and ("after" not in x)]
sentiment_data = [pd.read_csv(path.join(SENTIMENT_FILE_PATH, x)) for x in sentiment_data_path]

price_signal_answer = {
    "PLTR": "Down",
    "ASAN": "Up",
    "SNOW": "Up",
    "ZI": "Down",
    "KC": "Up",
    "U": "Down"
}

method_0_correct = method_1_correct = method_2_correct = method_3_correct = 0
total_down_prediction = total_up_prediction = 0

# iterate on the dataframes
for i, x in enumerate(sentiment_data_path):
    ticker = x.split("_")[0]
    price_signal = price_signal_answer[ticker]
    
    avg_score = compute_avg_score(sentiment_data[i], ticker)
    
    print(f"Ticker: {ticker}")
    

    # Announce Weighted Compound Average and the predictions from it
    avg_score_pred_0 = "Up" if avg_score[0] > 0 else "Down"
    print(f"\tMethod 0 Weighted Compound Average: {avg_score[0]}; Prediction: {avg_score_pred_0}")
    
    avg_score_pred_1 = "Up" if avg_score[1] > 0 else "Down"
    print(f"\tMethod 1 Weighted Compound Average: {avg_score[1]}; Prediction: {avg_score_pred_1}")
    
    avg_score_pred_2 = "Up" if avg_score[2] > 0 else "Down"
    print(f"\tMethod 2 Weighted Compound Average: {avg_score[2]}; Prediction: {avg_score_pred_2}")
    
    avg_score_pred_3 = "Up" if avg_score[3] > 0 else "Down"
    print(f"\tMethod 3 Weighted Compound Average: {avg_score[3]}; Prediction: {avg_score_pred_3}")
    
    # Announce Method Correctness
    if price_signal == avg_score_pred_0: 
        method_0_correct += 1
        print("\t  method 0 correct")
    if price_signal == avg_score_pred_1: 
        method_1_correct += 1
        print("\t  method 1 correct")
    if price_signal == avg_score_pred_2: 
        method_2_correct += 1
        print("\t  method 2 correct")
    if price_signal == avg_score_pred_3: 
        method_3_correct += 1
        print("\t  method 3 correct")
    print()
    
    # General Analysis Stats
    #   How many down and up predictions?
    total_down_prediction += 1 if avg_score_pred_0 == "Down" else 0
    total_down_prediction += 1 if avg_score_pred_1 == "Down" else 0
    total_down_prediction += 1 if avg_score_pred_2 == "Down" else 0
    total_down_prediction += 1 if avg_score_pred_3 == "Down" else 0
    
    total_up_prediction += 1 if avg_score_pred_0 == "Up" else 0
    total_up_prediction += 1 if avg_score_pred_1 == "Up" else 0
    total_up_prediction += 1 if avg_score_pred_2 == "Up" else 0
    total_up_prediction += 1 if avg_score_pred_3 == "Up" else 0
    
print()  
print(f"Method 0 got: {method_0_correct} correct answers; Successful Prediction Rate: {(method_0_correct/6) * 100}%")
print(f"Method 1 got: {method_1_correct} correct answers; Successful Prediction Rate: {(method_1_correct/6) * 100}%")
print(f"Method 2 got: {method_2_correct} correct answers; Successful Prediction Rate: {(method_2_correct/6) * 100}%")
print(f"Method 3 got: {method_3_correct} correct answers; Successful Prediction Rate: {(method_3_correct/6) * 100}%")
print()
print(f"Total Down Predictions: {total_down_prediction}")
print(f"Total Up Predictions: {total_up_prediction}")

Ticker: SNOW
	Method 0 Weighted Compound Average: 0.15395951781768497; Prediction: Up
	Method 1 Weighted Compound Average: 5.625653798611805e-06; Prediction: Up
	Method 2 Weighted Compound Average: 0.15395951781768497; Prediction: Up
	Method 3 Weighted Compound Average: 4.543713571076787e-06; Prediction: Up
	  method 0 correct
	  method 1 correct
	  method 2 correct
	  method 3 correct

Ticker: PLTR
	Method 0 Weighted Compound Average: 0.10347956756367165; Prediction: Up
	Method 1 Weighted Compound Average: 8.841104541002168e-06; Prediction: Up
	Method 2 Weighted Compound Average: 0.10347956756367165; Prediction: Up
	Method 3 Weighted Compound Average: 7.027554964111171e-06; Prediction: Up

Ticker: ZI
	Method 0 Weighted Compound Average: 0.2719792597811435; Prediction: Up
	Method 1 Weighted Compound Average: 0.0009537705201543545; Prediction: Up
	Method 2 Weighted Compound Average: 0.2719792597811435; Prediction: Up
	Method 3 Weighted Compound Average: 0.0008308186376617621; Prediction

# Test 2
- Start by computing the weighted average compound across all tweets data (across all 6 dataframes)
- Baseline = Weighted Average Compound (All)
    - Predicts to go "Up" if the weighted average compound is above Baseline
    - Predicts to go "Down" if the weighted average compound is below Baseline

In [8]:
SENTIMENT_FILE_PATH = path.join(REPO_PATH, "sentiment_data")
SENTIMENT_FILENAMES = listdir(SENTIMENT_FILE_PATH)

sentiment_data_path = [x for x in SENTIMENT_FILENAMES if x.endswith("_Tweets.csv") and ("after" not in x)]
sentiment_data = [pd.read_csv(path.join(SENTIMENT_FILE_PATH, x)) for x in sentiment_data_path]

avg_score_all = compute_avg_score_all(sentiment_data)
print("Average Score across all the datasets")
print(f"\t Method 0 Average Score: {avg_score_all[0]}")
print(f"\t Method 1 Average Score: {avg_score_all[1]}")
print(f"\t Method 2 Average Score: {avg_score_all[2]}")
print(f"\t Method 3 Average Score: {avg_score_all[3]}")
print()

price_signal_answer = {
    "PLTR": "Down",
    "ASAN": "Up",
    "SNOW": "Up",
    "ZI": "Down",
    "KC": "Up",
    "U": "Down"
}

method_0_correct = method_1_correct = method_2_correct = method_3_correct = 0
total_down_prediction = total_up_prediction = 0

# iterate on the dataframes
for i, x in enumerate(sentiment_data_path):
    ticker = x.split("_")[0]
    price_signal = price_signal_answer[ticker]
    
    avg_score = compute_avg_score(sentiment_data[i], ticker)
    
    print(f"Ticker: {ticker}")
    

    # Announce Weighted Compound Average and the predictions from it
    avg_score_pred_0 = "Up" if avg_score[0] > avg_score_all[0] else "Down"
    print(f"\tMethod 0 Weighted Compound Average: {avg_score[0]}; Prediction: {avg_score_pred_0}")
    
    avg_score_pred_1 = "Up" if avg_score[1] > avg_score_all[1] else "Down"
    print(f"\tMethod 1 Weighted Compound Average: {avg_score[1]}; Prediction: {avg_score_pred_1}")
    
    avg_score_pred_2 = "Up" if avg_score[2] > avg_score_all[2] else "Down"
    print(f"\tMethod 2 Weighted Compound Average: {avg_score[2]}; Prediction: {avg_score_pred_2}")
    
    avg_score_pred_3 = "Up" if avg_score[3] > avg_score_all[3] else "Down"
    print(f"\tMethod 3 Weighted Compound Average: {avg_score[3]}; Prediction: {avg_score_pred_3}")
    
    # Announce Method Correctness
    if price_signal == avg_score_pred_0: 
        method_0_correct += 1
        print("\t  method 0 correct")
    if price_signal == avg_score_pred_1: 
        method_1_correct += 1
        print("\t  method 1 correct")
    if price_signal == avg_score_pred_2: 
        method_2_correct += 1
        print("\t  method 2 correct")
    if price_signal == avg_score_pred_3: 
        method_3_correct += 1
        print("\t  method 3 correct")
    print()
    
    # General Analysis Stats
    #   How many down and up predictions?
    total_down_prediction += 1 if avg_score_pred_0 == "Down" else 0
    total_down_prediction += 1 if avg_score_pred_1 == "Down" else 0
    total_down_prediction += 1 if avg_score_pred_2 == "Down" else 0
    total_down_prediction += 1 if avg_score_pred_3 == "Down" else 0
    
    total_up_prediction += 1 if avg_score_pred_0 == "Up" else 0
    total_up_prediction += 1 if avg_score_pred_1 == "Up" else 0
    total_up_prediction += 1 if avg_score_pred_2 == "Up" else 0
    total_up_prediction += 1 if avg_score_pred_3 == "Up" else 0
    
print()  
print(f"Method 0 got: {method_0_correct} correct answers; Successful Prediction Rate: {(method_0_correct/6) * 100}%")
print(f"Method 1 got: {method_1_correct} correct answers; Successful Prediction Rate: {(method_1_correct/6) * 100}%")
print(f"Method 2 got: {method_2_correct} correct answers; Successful Prediction Rate: {(method_2_correct/6) * 100}%")
print(f"Method 3 got: {method_3_correct} correct answers; Successful Prediction Rate: {(method_3_correct/6) * 100}%")
print()
print(f"Total Down Predictions: {total_down_prediction}")
print(f"Total Up Predictions: {total_up_prediction}")

Average Score across all the datasets
	 Method 0 Average Score: 0.156345807677761
	 Method 1 Average Score: 5.1923461850999696e-05
	 Method 2 Average Score: 0.156345807677761
	 Method 3 Average Score: 4.3083621875943096e-05

Ticker: SNOW
	Method 0 Weighted Compound Average: 0.15395951781768497; Prediction: Down
	Method 1 Weighted Compound Average: 5.625653798611805e-06; Prediction: Down
	Method 2 Weighted Compound Average: 0.15395951781768497; Prediction: Down
	Method 3 Weighted Compound Average: 4.543713571076787e-06; Prediction: Down

Ticker: PLTR
	Method 0 Weighted Compound Average: 0.10347956756367165; Prediction: Down
	Method 1 Weighted Compound Average: 8.841104541002168e-06; Prediction: Down
	Method 2 Weighted Compound Average: 0.10347956756367165; Prediction: Down
	Method 3 Weighted Compound Average: 7.027554964111171e-06; Prediction: Down
	  method 0 correct
	  method 1 correct
	  method 2 correct
	  method 3 correct

Ticker: ZI
	Method 0 Weighted Compound Average: 0.271979259

# Test 3
- Start by computing the weighted average compound across all tweets data (across all 6 dataframes)
- Baseline = Weighted Average Compound (All)
    - New Twist, we completely doubt twitter sentiment and act opposite to the sentiment
    - Predicts to go "Up" if the weighted average compound is below Baseline
    - Predicts to go "Down" if the weighted average compound is above Baseline

In [9]:
SENTIMENT_FILE_PATH = path.join(REPO_PATH, "sentiment_data")
SENTIMENT_FILENAMES = listdir(SENTIMENT_FILE_PATH)

sentiment_data_path = [x for x in SENTIMENT_FILENAMES if x.endswith("_Tweets.csv") and ("after" not in x)]
sentiment_data = [pd.read_csv(path.join(SENTIMENT_FILE_PATH, x)) for x in sentiment_data_path]

avg_score_all = compute_avg_score_all(sentiment_data)
print("Average Score across all the datasets")
print(f"\t Method 0 Average Score: {avg_score_all[0]}")
print(f"\t Method 1 Average Score: {avg_score_all[1]}")
print(f"\t Method 2 Average Score: {avg_score_all[2]}")
print(f"\t Method 3 Average Score: {avg_score_all[3]}")
print()

price_signal_answer = {
    "PLTR": "Down",
    "ASAN": "Up",
    "SNOW": "Up",
    "ZI": "Down",
    "KC": "Up",
    "U": "Down"
}

method_0_correct = method_1_correct = method_2_correct = method_3_correct = 0
total_down_prediction = total_up_prediction = 0

# iterate on the dataframes
for i, x in enumerate(sentiment_data_path):
    ticker = x.split("_")[0]
    price_signal = price_signal_answer[ticker]
    
    avg_score = compute_avg_score(sentiment_data[i], ticker)
    
    print(f"Ticker: {ticker}")
    

    # Announce Weighted Compound Average and the predictions from it
    avg_score_pred_0 = "Up" if avg_score[0] < avg_score_all[0] else "Down"
    print(f"\tMethod 0 Weighted Compound Average: {avg_score[0]}; Prediction: {avg_score_pred_0}")
    
    avg_score_pred_1 = "Up" if avg_score[1] < avg_score_all[1] else "Down"
    print(f"\tMethod 1 Weighted Compound Average: {avg_score[1]}; Prediction: {avg_score_pred_1}")
    
    avg_score_pred_2 = "Up" if avg_score[2] < avg_score_all[2] else "Down"
    print(f"\tMethod 2 Weighted Compound Average: {avg_score[2]}; Prediction: {avg_score_pred_2}")
    
    avg_score_pred_3 = "Up" if avg_score[3] < avg_score_all[3] else "Down"
    print(f"\tMethod 3 Weighted Compound Average: {avg_score[3]}; Prediction: {avg_score_pred_3}")
    
    # Announce Method Correctness
    if price_signal == avg_score_pred_0: 
        method_0_correct += 1
        print("\t  method 0 correct")
    if price_signal == avg_score_pred_1: 
        method_1_correct += 1
        print("\t  method 1 correct")
    if price_signal == avg_score_pred_2: 
        method_2_correct += 1
        print("\t  method 2 correct")
    if price_signal == avg_score_pred_3: 
        method_3_correct += 1
        print("\t  method 3 correct")
    print()
    
    # General Analysis Stats
    #   How many down and up predictions?
    total_down_prediction += 1 if avg_score_pred_0 == "Down" else 0
    total_down_prediction += 1 if avg_score_pred_1 == "Down" else 0
    total_down_prediction += 1 if avg_score_pred_2 == "Down" else 0
    total_down_prediction += 1 if avg_score_pred_3 == "Down" else 0
    
    total_up_prediction += 1 if avg_score_pred_0 == "Up" else 0
    total_up_prediction += 1 if avg_score_pred_1 == "Up" else 0
    total_up_prediction += 1 if avg_score_pred_2 == "Up" else 0
    total_up_prediction += 1 if avg_score_pred_3 == "Up" else 0
    
print()  
print(f"Method 0 got: {method_0_correct} correct answers; Successful Prediction Rate: {(method_0_correct/6) * 100}%")
print(f"Method 1 got: {method_1_correct} correct answers; Successful Prediction Rate: {(method_1_correct/6) * 100}%")
print(f"Method 2 got: {method_2_correct} correct answers; Successful Prediction Rate: {(method_2_correct/6) * 100}%")
print(f"Method 3 got: {method_3_correct} correct answers; Successful Prediction Rate: {(method_3_correct/6) * 100}%")
print()
print(f"Total Down Predictions: {total_down_prediction}")
print(f"Total Up Predictions: {total_up_prediction}")

Average Score across all the datasets
	 Method 0 Average Score: 0.156345807677761
	 Method 1 Average Score: 5.1923461850999696e-05
	 Method 2 Average Score: 0.156345807677761
	 Method 3 Average Score: 4.3083621875943096e-05

Ticker: SNOW
	Method 0 Weighted Compound Average: 0.15395951781768497; Prediction: Up
	Method 1 Weighted Compound Average: 5.625653798611805e-06; Prediction: Up
	Method 2 Weighted Compound Average: 0.15395951781768497; Prediction: Up
	Method 3 Weighted Compound Average: 4.543713571076787e-06; Prediction: Up
	  method 0 correct
	  method 1 correct
	  method 2 correct
	  method 3 correct

Ticker: PLTR
	Method 0 Weighted Compound Average: 0.10347956756367165; Prediction: Up
	Method 1 Weighted Compound Average: 8.841104541002168e-06; Prediction: Up
	Method 2 Weighted Compound Average: 0.10347956756367165; Prediction: Up
	Method 3 Weighted Compound Average: 7.027554964111171e-06; Prediction: Up

Ticker: ZI
	Method 0 Weighted Compound Average: 0.2719792597811435; Predict

# Test 4
- Start by computing the weighted average compound across all tweets data (across all 6 dataframes)
- Baseline = Weighted Average Compound (All) * 0.7
    - Essentially taking a fractional value of Weighted Average Compound (All) to adjust for the "hype" factor prevelant in social media
    - Predicts to go "Up" if the weighted average compound is above Baseline
    - Predicts to go "Down" if the weighted average compound is below Baseline

In [10]:
SENTIMENT_FILE_PATH = path.join(REPO_PATH, "sentiment_data")
SENTIMENT_FILENAMES = listdir(SENTIMENT_FILE_PATH)

sentiment_data_path = [x for x in SENTIMENT_FILENAMES if x.endswith("_Tweets.csv") and ("after" not in x)]
sentiment_data = [pd.read_csv(path.join(SENTIMENT_FILE_PATH, x)) for x in sentiment_data_path]

avg_score_all = compute_avg_score_all(sentiment_data)
print("Average Score across all the datasets")
print(f"\t Method 0 Average Score: {avg_score_all[0]}")
print(f"\t Method 1 Average Score: {avg_score_all[1]}")
print(f"\t Method 2 Average Score: {avg_score_all[2]}")
print(f"\t Method 3 Average Score: {avg_score_all[3]}")
print()

avg_score_all = [x * 0.7 for x in avg_score_all]

price_signal_answer = {
    "PLTR": "Down",
    "ASAN": "Up",
    "SNOW": "Up",
    "ZI": "Down",
    "KC": "Up",
    "U": "Down"
}

method_0_correct = method_1_correct = method_2_correct = method_3_correct = 0
total_down_prediction = total_up_prediction = 0

# iterate on the dataframes
for i, x in enumerate(sentiment_data_path):
    ticker = x.split("_")[0]
    price_signal = price_signal_answer[ticker]
    
    avg_score = compute_avg_score(sentiment_data[i], ticker)
    
    print(f"Ticker: {ticker}")
    

    # Announce Weighted Compound Average and the predictions from it
    avg_score_pred_0 = "Up" if avg_score[0] > avg_score_all[0] else "Down"
    print(f"\tMethod 0 Weighted Compound Average: {avg_score[0]}; Prediction: {avg_score_pred_0}")
    
    avg_score_pred_1 = "Up" if avg_score[1] > avg_score_all[1] else "Down"
    print(f"\tMethod 1 Weighted Compound Average: {avg_score[1]}; Prediction: {avg_score_pred_1}")
    
    avg_score_pred_2 = "Up" if avg_score[2] > avg_score_all[2] else "Down"
    print(f"\tMethod 2 Weighted Compound Average: {avg_score[2]}; Prediction: {avg_score_pred_2}")
    
    avg_score_pred_3 = "Up" if avg_score[3] > avg_score_all[3] else "Down"
    print(f"\tMethod 3 Weighted Compound Average: {avg_score[3]}; Prediction: {avg_score_pred_3}")
    
    # Announce Method Correctness
    if price_signal == avg_score_pred_0: 
        method_0_correct += 1
        print("\t  method 0 correct")
    if price_signal == avg_score_pred_1: 
        method_1_correct += 1
        print("\t  method 1 correct")
    if price_signal == avg_score_pred_2: 
        method_2_correct += 1
        print("\t  method 2 correct")
    if price_signal == avg_score_pred_3: 
        method_3_correct += 1
        print("\t  method 3 correct")
    print()
    
    # General Analysis Stats
    #   How many down and up predictions?
    total_down_prediction += 1 if avg_score_pred_0 == "Down" else 0
    total_down_prediction += 1 if avg_score_pred_1 == "Down" else 0
    total_down_prediction += 1 if avg_score_pred_2 == "Down" else 0
    total_down_prediction += 1 if avg_score_pred_3 == "Down" else 0
    
    total_up_prediction += 1 if avg_score_pred_0 == "Up" else 0
    total_up_prediction += 1 if avg_score_pred_1 == "Up" else 0
    total_up_prediction += 1 if avg_score_pred_2 == "Up" else 0
    total_up_prediction += 1 if avg_score_pred_3 == "Up" else 0
    
print()  
print(f"Method 0 got: {method_0_correct} correct answers; Successful Prediction Rate: {(method_0_correct/6) * 100}%")
print(f"Method 1 got: {method_1_correct} correct answers; Successful Prediction Rate: {(method_1_correct/6) * 100}%")
print(f"Method 2 got: {method_2_correct} correct answers; Successful Prediction Rate: {(method_2_correct/6) * 100}%")
print(f"Method 3 got: {method_3_correct} correct answers; Successful Prediction Rate: {(method_3_correct/6) * 100}%")
print()
print(f"Total Down Predictions: {total_down_prediction}")
print(f"Total Up Predictions: {total_up_prediction}")

Average Score across all the datasets
	 Method 0 Average Score: 0.156345807677761
	 Method 1 Average Score: 5.1923461850999696e-05
	 Method 2 Average Score: 0.156345807677761
	 Method 3 Average Score: 4.3083621875943096e-05

Ticker: SNOW
	Method 0 Weighted Compound Average: 0.15395951781768497; Prediction: Up
	Method 1 Weighted Compound Average: 5.625653798611805e-06; Prediction: Down
	Method 2 Weighted Compound Average: 0.15395951781768497; Prediction: Up
	Method 3 Weighted Compound Average: 4.543713571076787e-06; Prediction: Down
	  method 0 correct
	  method 2 correct

Ticker: PLTR
	Method 0 Weighted Compound Average: 0.10347956756367165; Prediction: Down
	Method 1 Weighted Compound Average: 8.841104541002168e-06; Prediction: Down
	Method 2 Weighted Compound Average: 0.10347956756367165; Prediction: Down
	Method 3 Weighted Compound Average: 7.027554964111171e-06; Prediction: Down
	  method 0 correct
	  method 1 correct
	  method 2 correct
	  method 3 correct

Ticker: ZI
	Method 0 We

# Test 5
- Start by computing the weighted average compound across all tweets data (across all 6 dataframes)
- Baseline = Weighted Average Compound (All) * 0.9
    - Essentially taking a fractional value of Weighted Average Compound (All) to adjust for the "hype" factor prevelant in social media
    - New Twist, we completely doubt twitter sentiment and act opposite to the sentiment
    - Predicts to go "Up" if the weighted average compound is below Baseline
    - Predicts to go "Down" if the weighted average compound is above Baseline

In [11]:
SENTIMENT_FILE_PATH = path.join(REPO_PATH, "sentiment_data")
SENTIMENT_FILENAMES = listdir(SENTIMENT_FILE_PATH)

sentiment_data_path = [x for x in SENTIMENT_FILENAMES if x.endswith("_Tweets.csv") and ("after" not in x)]
sentiment_data = [pd.read_csv(path.join(SENTIMENT_FILE_PATH, x)) for x in sentiment_data_path]

avg_score_all = compute_avg_score_all(sentiment_data)
print("Average Score across all the datasets")
print(f"\t Method 0 Average Score: {avg_score_all[0]}")
print(f"\t Method 1 Average Score: {avg_score_all[1]}")
print(f"\t Method 2 Average Score: {avg_score_all[2]}")
print(f"\t Method 3 Average Score: {avg_score_all[3]}")
print()

avg_score_all = [x * 0.7 for x in avg_score_all]

price_signal_answer = {
    "PLTR": "Down",
    "ASAN": "Up",
    "SNOW": "Up",
    "ZI": "Down",
    "KC": "Up",
    "U": "Down"
}

method_0_correct = method_1_correct = method_2_correct = method_3_correct = 0
total_down_prediction = total_up_prediction = 0

# iterate on the dataframes
for i, x in enumerate(sentiment_data_path):
    ticker = x.split("_")[0]
    price_signal = price_signal_answer[ticker]
    
    avg_score = compute_avg_score(sentiment_data[i], ticker)
    
    print(f"Ticker: {ticker}")
    

    # Announce Weighted Compound Average and the predictions from it
    avg_score_pred_0 = "Up" if avg_score[0] < avg_score_all[0] else "Down"
    print(f"\tMethod 0 Weighted Compound Average: {avg_score[0]}; Prediction: {avg_score_pred_0}")
    
    avg_score_pred_1 = "Up" if avg_score[1] < avg_score_all[1] else "Down"
    print(f"\tMethod 1 Weighted Compound Average: {avg_score[1]}; Prediction: {avg_score_pred_1}")
    
    avg_score_pred_2 = "Up" if avg_score[2] < avg_score_all[2] else "Down"
    print(f"\tMethod 2 Weighted Compound Average: {avg_score[2]}; Prediction: {avg_score_pred_2}")
    
    avg_score_pred_3 = "Up" if avg_score[3] < avg_score_all[3] else "Down"
    print(f"\tMethod 3 Weighted Compound Average: {avg_score[3]}; Prediction: {avg_score_pred_3}")
    
    # Announce Method Correctness
    if price_signal == avg_score_pred_0: 
        method_0_correct += 1
        print("\t  method 0 correct")
    if price_signal == avg_score_pred_1: 
        method_1_correct += 1
        print("\t  method 1 correct")
    if price_signal == avg_score_pred_2: 
        method_2_correct += 1
        print("\t  method 2 correct")
    if price_signal == avg_score_pred_3: 
        method_3_correct += 1
        print("\t  method 3 correct")
    print()
    
    # General Analysis Stats
    #   How many down and up predictions?
    total_down_prediction += 1 if avg_score_pred_0 == "Down" else 0
    total_down_prediction += 1 if avg_score_pred_1 == "Down" else 0
    total_down_prediction += 1 if avg_score_pred_2 == "Down" else 0
    total_down_prediction += 1 if avg_score_pred_3 == "Down" else 0
    
    total_up_prediction += 1 if avg_score_pred_0 == "Up" else 0
    total_up_prediction += 1 if avg_score_pred_1 == "Up" else 0
    total_up_prediction += 1 if avg_score_pred_2 == "Up" else 0
    total_up_prediction += 1 if avg_score_pred_3 == "Up" else 0
    
print()  
print(f"Method 0 got: {method_0_correct} correct answers; Successful Prediction Rate: {(method_0_correct/6) * 100}%")
print(f"Method 1 got: {method_1_correct} correct answers; Successful Prediction Rate: {(method_1_correct/6) * 100}%")
print(f"Method 2 got: {method_2_correct} correct answers; Successful Prediction Rate: {(method_2_correct/6) * 100}%")
print(f"Method 3 got: {method_3_correct} correct answers; Successful Prediction Rate: {(method_3_correct/6) * 100}%")
print()
print(f"Total Down Predictions: {total_down_prediction}")
print(f"Total Up Predictions: {total_up_prediction}")

Average Score across all the datasets
	 Method 0 Average Score: 0.156345807677761
	 Method 1 Average Score: 5.1923461850999696e-05
	 Method 2 Average Score: 0.156345807677761
	 Method 3 Average Score: 4.3083621875943096e-05

Ticker: SNOW
	Method 0 Weighted Compound Average: 0.15395951781768497; Prediction: Down
	Method 1 Weighted Compound Average: 5.625653798611805e-06; Prediction: Up
	Method 2 Weighted Compound Average: 0.15395951781768497; Prediction: Down
	Method 3 Weighted Compound Average: 4.543713571076787e-06; Prediction: Up
	  method 1 correct
	  method 3 correct

Ticker: PLTR
	Method 0 Weighted Compound Average: 0.10347956756367165; Prediction: Up
	Method 1 Weighted Compound Average: 8.841104541002168e-06; Prediction: Up
	Method 2 Weighted Compound Average: 0.10347956756367165; Prediction: Up
	Method 3 Weighted Compound Average: 7.027554964111171e-06; Prediction: Up

Ticker: ZI
	Method 0 Weighted Compound Average: 0.2719792597811435; Prediction: Down
	Method 1 Weighted Compoun