# Starter Script (Change REPO_PATH at Start)

In [43]:
# REPO_PATH, for easier standardization
from os import path
REPO_PATH = "/Users/tks/Documents/GitHub/cs591_sns_ipo/" # change me

# all imports
import os.path as path
from os import makedirs, listdir
import pandas as pd

# ------------------------------------
# Helper functions 
# ------------------------------------

def compute_avg_score(df, ticker):
    num_tweets = len(df)

    total_compound_0 = total_compound_1 = total_compound_2 = total_compound_3 = 0
    for index, row in df.iterrows():
        total_compound_0 += row["weighted_compound_0"] * row["score_0"]
        total_compound_1 += row["weighted_compound_1"] * row["score_1"]
        total_compound_2 += row["weighted_compound_2"] * row["score_2"]
        total_compound_3 += row["weighted_compound_3"] * row["score_3"]

    avg_compound_0 = total_compound_0 / num_tweets
    avg_compound_1 = total_compound_1 / num_tweets
    avg_compound_2 = total_compound_2 / num_tweets
    avg_compound_3 = total_compound_3 / num_tweets
    
    return [avg_compound_0, avg_compound_1, avg_compound_2, avg_compound_3]

def compute_avg_score_all(sentiment_data):
    total_compound_all_0 = total_compound_all_1 = total_compound_all_2 = total_compound_all_3 = 0
    num_tweets_all = 0
    for df in sentiment_data:
        num_tweets_all += len(df)

        for index, row in df.iterrows():
            total_compound_all_0 += row["weighted_compound_0"] * row["score_0"]
            total_compound_all_1 += row["weighted_compound_1"] * row["score_1"]
            total_compound_all_2 += row["weighted_compound_2"] * row["score_2"]
            total_compound_all_3 += row["weighted_compound_3"] * row["score_3"]

    avg_compound_all_0 = total_compound_all_0 / num_tweets_all
    avg_compound_all_1 = total_compound_all_1 / num_tweets_all
    avg_compound_all_2 = total_compound_all_2 / num_tweets_all
    avg_compound_all_3 = total_compound_all_3 / num_tweets_all
    
    return [avg_compound_all_0, avg_compound_all_1, avg_compound_all_2, avg_compound_all_3]

# ------------------------------------
# Helper functions for utility
# ------------------------------------

def save_df(df, title=None, save_path=None):
    PATH_TO_REPO = "/Users/tks/Documents/GitHub/cs591_sns_ipo" # change me
    
    if save_path is None:
        save_path = path.join(PATH_TO_REPO, "sentiment_data")
    if not path.isdir(save_path):
        makedirs(save_path)
    if title is None:
        title = "Untitled_" + str(len(listdir(save_path)))
    
    save_file = path.join(save_path, title)
    
    df.to_csv(save_file)

In [47]:
SENTIMENT_FILE_PATH = path.join(REPO_PATH, "sentiment_data")
SENTIMENT_FILENAMES = listdir(SENTIMENT_FILE_PATH)

sentiment_data_path = [x for x in SENTIMENT_FILENAMES if x.endswith("_Tweets.csv") and ("after" not in x)]
sentiment_data = [pd.read_csv(path.join(SENTIMENT_FILE_PATH, x)) for x in sentiment_data_path]

avg_score_all = compute_avg_score_all(sentiment_data)
print("Average Score across all the datasets")
print(f"\t Method 0 Average Score: {avg_score_all[0]}")
print(f"\t Method 1 Average Score: {avg_score_all[1]}")
print(f"\t Method 2 Average Score: {avg_score_all[2]}")
print(f"\t Method 3 Average Score: {avg_score_all[3]}")
print()

price_signal_answer = {
    "PLTR": "Down",
    "ASAN": "Up",
    "SNOW": "Up",
    "ZI": "Down",
    "KC": "Up",
    "U": "Down"
}

method_0_correct = method_1_correct = method_2_correct = method_3_correct = 0

for i, x in enumerate(sentiment_data_path):
    ticker = x.split("_")[0]
    price_signal = price_signal_answer[ticker]
    
    avg_score = compute_avg_score(sentiment_data[i], ticker)
    
    print(f"Ticker: {ticker}")
    
    print(f"\tMethod 0 Weighted Compound Average: {avg_score[0]}")
    avg_score_pred_0 = "Up" if avg_score[0] > avg_score_all[0] else "Down"
    
    print(f"\tMethod 1 Weighted Compound Average: {avg_score[1]}")
    avg_score_pred_1 = "Up" if avg_score[1] > avg_score_all[1] else "Down"
    
    print(f"\tMethod 2 Weighted Compound Average: {avg_score[2]}")
    avg_score_pred_2 = "Up" if avg_score[2] > avg_score_all[2] else "Down"
    
    print(f"\tMethod 3 Weighted Compound Average: {avg_score[3]}")
    avg_score_pred_3 = "Up" if avg_score[3] > avg_score_all[3] else "Down"
    
    if price_signal == avg_score_pred_0: 
        method_0_correct += 1
        print("\t  method 0 correct")
    if price_signal == avg_score_pred_1: 
        method_1_correct += 1
        print("\t  method 1 correct")
    if price_signal == avg_score_pred_2: 
        method_2_correct += 1
        print("\t  method 2 correct")
    if price_signal == avg_score_pred_3: 
        method_3_correct += 1
        print("\t  method 3 correct")
    print()

print()  
print(f"Method 0 got: {method_0_correct} correct answers")
print(f"Method 1 got: {method_1_correct} correct answers")
print(f"Method 2 got: {method_2_correct} correct answers")
print(f"Method 3 got: {method_3_correct} correct answers")

Average Score across all the datasets
	 Method 0 Average Score: 4.3822875830995654e-05
	 Method 1 Average Score: 0.156345807677761
	 Method 2 Average Score: 5.1923461850999696e-05
	 Method 3 Average Score: 4.3083621875943096e-05

Ticker: SNOW
	Method 0 Weighted Compound Average: 4.443025441168489e-06
	Method 1 Weighted Compound Average: 0.15395951781768497
	Method 2 Weighted Compound Average: 5.625653798611805e-06
	Method 3 Weighted Compound Average: 4.543713571076787e-06

Ticker: PLTR
	Method 0 Weighted Compound Average: 6.922366268141744e-06
	Method 1 Weighted Compound Average: 0.10347956756367165
	Method 2 Weighted Compound Average: 8.841104541002168e-06
	Method 3 Weighted Compound Average: 7.027554964111171e-06
	  method 0 correct
	  method 1 correct
	  method 2 correct
	  method 3 correct

Ticker: ZI
	Method 0 Weighted Compound Average: 0.0008218509948443405
	Method 1 Weighted Compound Average: 0.2719792597811435
	Method 2 Weighted Compound Average: 0.0009537705201543545
	Method 3